-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfromDriveMedium.py
More file actions
152 lines (131 loc) · 6.41 KB
/
fromDriveMedium.py
File metadata and controls
152 lines (131 loc) · 6.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/python3.10
# -*- coding: utf-8 -*-
import os
import io
import re
from google.auth.transport.requests import Request # type: ignore
from google.oauth2.credentials import Credentials # type: ignore
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore
from googleapiclient.discovery import build # type: ignore
from googleapiclient.http import MediaIoBaseDownload # type: ignore
# If modifying these SCOPES, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
# Hard-coded inputs
DRIVE_URL = 'https://drive.google.com/drive/folders/1eHdmkbaVUtHaTV-mMWn3N5h4KR_FGnDc'
OUTPUT_FOLDER = 'Z:/GDRIVE/'
def authenticate():
"""Authenticate the user and return the service object."""
creds = None
credentials_path = 'credentials.json'
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(credentials_path, SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
service = build('drive', 'v3', credentials=creds)
return service
def extract_file_id(drive_url):
"""Extract the folder ID from a Google Drive folder URL."""
match = re.search(r'/folders/([a-zA-Z0-9_-]+)', drive_url)
if not match:
raise ValueError('Invalid Google Drive folder URL')
return match.group(1)
def download_files_recursive(service, folder_id, output_folder,log_file):
"""Download all files in a folder recursively."""
files,total_pages = list_files_in_folder(service, folder_id)
for file in files:
if file['mimeType'] == 'application/vnd.google-apps.folder':
# If it's a folder, recursively download its contents
subfolder_id = file['id']
subfolder_name = file['name']
subfolder_output_folder = os.path.join(output_folder, subfolder_name)
os.makedirs(subfolder_output_folder, exist_ok=True)
download_files_recursive(service, subfolder_id, subfolder_output_folder,log_file)
else:
# If it's a file, download it
download_file(service, file['id'], output_folder,log_file,total_pages)
def download_file(service, file_id, output_folder,log_file,total_pages):
"""Download a file by its file ID."""
file_metadata = service.files().get(fileId=file_id).execute()
file_name = file_metadata['name']
try:
print("file_metadatafile_metadata",file_metadata)
# Check if the file is a Google Docs Editors file
if file_metadata['mimeType'] == 'application/vnd.google-apps.document':
# Export the Google Docs file as a PDF
request = service.files().export_media(fileId=file_id, mimeType='application/pdf')
elif file_metadata['mimeType'] == 'application/vnd.google-apps.spreadsheet':
# Export the Google Sheets file as an Excel file
request = service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
elif file_metadata['mimeType'] == 'application/vnd.google-apps.presentation':
# Export the Google Slides file as a PowerPoint file
request = service.files().export_media(fileId=file_id, mimeType='application/vnd.openxmlformats-officedocument.presentationml.presentation')
else:
# For other file types, download as is
request = service.files().get_media(fileId=file_id)
file_path = os.path.join(output_folder, file_name)
os.makedirs(output_folder, exist_ok=True) # Ensure the output directory exists
with io.FileIO(file_path, 'wb') as fh:
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
print(f"Download {int(status.progress() * 100)}% complete.")
print("done",done)
#print(f"File downloaded to {file_path}")
# Log file path and download status to the log file
with open(log_file, 'a') as log:
log.write(f"'Page No: '{total_pages} FilePath: {file_path},{'Downloaded' if done else 'Not Downloaded'}\n")
print(f"File downloaded to {file_path}")
except Exception as e:
print(f"Error downloading file: {e}")
# Log error message to the log file
with open(log_file, 'a') as log:
log.write(f"Error downloading file: PageNo:::: {total_pages} {file_name}, {str(e)}\n")
def list_files_in_folder(service, folder_id):
"""List all files in a folder."""
files = []
page_token = None
page_number = 1 # Initialize page number
print("List all files in a folder")
while True:
request = service.files().list(
q=f"'{folder_id}' in parents",
fields='nextPageToken, files(id, name,mimeType)',
#pageToken=page_token,
pageToken=None if page_number == 1 else page_token,
pageSize=1000 # You can adjust the page size as needed
)
response = request.execute()
# Extract files from the response
files.extend(response.get('files', []))
# Check if there are more pages to retrieve
page_token = response.get('nextPageToken')
if not page_token:
break
# Increment the page number
page_number += 1
return files,page_number
def download_files_in_folder(service, folder_id, output_folder):
"""Download all files in a folder."""
files,total_pages = list_files_in_folder(service, folder_id)
print(f"Downloading files from folder with ID: {folder_id} to {output_folder}...")
for file in files:
download_file(service, file['id'], output_folder)
def main():
service = authenticate()
file_id = extract_file_id(DRIVE_URL)
log_file = 'Z:/GDRIVE/DownloadFilesDetails.txt'
print(f"Downloading file with ID: {file_id} to {OUTPUT_FOLDER}...")
download_files_recursive(service, file_id, OUTPUT_FOLDER,log_file)
if __name__ == '__main__':
main()
"""
source: https://medium.com/@aalam-info-solutions-llp/downloading-files-from-google-drive-link-to-a-target-folder-using-python-93b8c67f1304
auteur: https://aalamsoft.com/
"""