Skip to content

Instantly share code, notes, and snippets.

@lelogrott
Last active August 6, 2020 21:37
Show Gist options
  • Select an option

  • Save lelogrott/84102f9e4d655bd92a45ab024a13bc90 to your computer and use it in GitHub Desktop.

Select an option

Save lelogrott/84102f9e4d655bd92a45ab024a13bc90 to your computer and use it in GitHub Desktop.
downloads all files in a google drive folder keeping the folder structure.
from __future__ import print_function
import pickle
import io
import argparse
import sys
import os.path
from pathlib import Path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from apiclient.http import MediaIoBaseDownload
class GoogleDriveService:
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive']
def __init__(self):
self.load_creds()
self.client = build('drive', 'v3', credentials=self.creds)
def load_creds(self):
self.creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
self.creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not self.creds or not self.creds.valid:
if self.creds and self.creds.expired and self.creds.refresh_token:
self.creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'./../credentials.json', GoogleDriveService.SCOPES)
if REMOTE_RUN:
self.creds = flow.run_console()
else:
self.creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('./../token.pickle', 'wb') as token:
pickle.dump(self.creds, token)
def download_file(self, file_id, file_name, file_path='./', verbose=False):
request = self.client.files().get_media(fileId=file_id)
fh = io.FileIO((file_path + file_name), 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
if verbose:
print("%s%s - Download %d%%." % (file_path, file_name, int(status.progress() * 100)))
def download_all_files_in_folder(self, folder_id, file_path='./'):
# creates path if does not exist
if not os.path.exists(file_path):
os.makedirs(file_path)
page_token = None
while(True):
# Call the Drive v3 API
results = self.client.files().list(
q=("'%s' in parents" % folder_id),
fields="nextPageToken, files(id, name, mimeType, parents)", pageToken=page_token
).execute()
items = results.get('files', [])
if not items:
print('No files found in %s' % file_path)
else:
for item in items:
if item['mimeType'] == 'application/vnd.google-apps.folder':
# continue
new_path = ('%s%s/' % (file_path, item['name'].replace('/','\/')))
self.download_all_files_in_folder(item['id'], new_path)
else:
download_name = item['name'].replace('/','-')
# handle avatar files without extension
if (item['mimeType'] == 'image/jpeg') and not ('.jpeg' in download_name):
download_name += '.jpeg'
full_path_file_name = file_path + download_name
if os.path.exists(full_path_file_name):
print("%s - already exists." % (full_path_file_name))
else:
try:
self.download_file(item['id'], download_name, file_path, verbose=True)
except:
e = sys.exc_info()[0]
error_file = open('./../download_errors.txt', 'a+')
error_file.write("Couldn't download %s%s (%s) - Error: %s\n" % (file_path, download_name, item['id'], e))
error_file.close()
page_token = results.get('nextPageToken', None)
if page_token is None:
break
def main():
gdrive_service = GoogleDriveService()
# uncomment line below for debugging
# import code; code.interact(local=dict(globals(), **locals()))
gdrive_service.download_all_files_in_folder(PARENT_FOLDER_ID)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Download files stored in Google Drive.')
parser.add_argument('--folder_id', dest='folder_id', help='parent folder id. If none given, all files are downloaded')
parser.add_argument('--remote', dest='remote', default=False, help='set to true if running remotely. Eg via SSH. default: false')
args = parser.parse_args()
PARENT_FOLDER_ID = args.folder_id
REMOTE_RUN = bool(args.remote)
main()
@lelogrott
Copy link
Copy Markdown
Author

before running it you should enable the Google Drive API, generate and save the credentials.json file and install dependencies.
You can find more information on steps 1 and 2 of this Quickstart document

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment