Skip to content

Instantly share code, notes, and snippets.

@lelogrott
Last active August 6, 2020 21:37
Show Gist options
  • Save lelogrott/84102f9e4d655bd92a45ab024a13bc90 to your computer and use it in GitHub Desktop.
Save lelogrott/84102f9e4d655bd92a45ab024a13bc90 to your computer and use it in GitHub Desktop.
downloads all files in a google drive folder keeping the folder structure.
from __future__ import print_function
import pickle
import io
import argparse
import sys
import os.path
from pathlib import Path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from apiclient.http import MediaIoBaseDownload
class GoogleDriveService:
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/drive']
def __init__(self):
self.load_creds()
self.client = build('drive', 'v3', credentials=self.creds)
def load_creds(self):
self.creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
self.creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not self.creds or not self.creds.valid:
if self.creds and self.creds.expired and self.creds.refresh_token:
self.creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'./../credentials.json', GoogleDriveService.SCOPES)
if REMOTE_RUN:
self.creds = flow.run_console()
else:
self.creds = flow.run_local_server(port=0)
# Save the credentials for the next run
with open('./../token.pickle', 'wb') as token:
pickle.dump(self.creds, token)
def download_file(self, file_id, file_name, file_path='./', verbose=False):
request = self.client.files().get_media(fileId=file_id)
fh = io.FileIO((file_path + file_name), 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
if verbose:
print("%s%s - Download %d%%." % (file_path, file_name, int(status.progress() * 100)))
def download_all_files_in_folder(self, folder_id, file_path='./'):
# creates path if does not exist
if not os.path.exists(file_path):
os.makedirs(file_path)
page_token = None
while(True):
# Call the Drive v3 API
results = self.client.files().list(
q=("'%s' in parents" % folder_id),
fields="nextPageToken, files(id, name, mimeType, parents)", pageToken=page_token
).execute()
items = results.get('files', [])
if not items:
print('No files found in %s' % file_path)
else:
for item in items:
if item['mimeType'] == 'application/vnd.google-apps.folder':
# continue
new_path = ('%s%s/' % (file_path, item['name'].replace('/','\/')))
self.download_all_files_in_folder(item['id'], new_path)
else:
download_name = item['name'].replace('/','-')
# handle avatar files without extension
if (item['mimeType'] == 'image/jpeg') and not ('.jpeg' in download_name):
download_name += '.jpeg'
full_path_file_name = file_path + download_name
if os.path.exists(full_path_file_name):
print("%s - already exists." % (full_path_file_name))
else:
try:
self.download_file(item['id'], download_name, file_path, verbose=True)
except:
e = sys.exc_info()[0]
error_file = open('./../download_errors.txt', 'a+')
error_file.write("Couldn't download %s%s (%s) - Error: %s\n" % (file_path, download_name, item['id'], e))
error_file.close()
page_token = results.get('nextPageToken', None)
if page_token is None:
break
def main():
gdrive_service = GoogleDriveService()
# uncomment line below for debugging
# import code; code.interact(local=dict(globals(), **locals()))
gdrive_service.download_all_files_in_folder(PARENT_FOLDER_ID)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Download files stored in Google Drive.')
parser.add_argument('--folder_id', dest='folder_id', help='parent folder id. If none given, all files are downloaded')
parser.add_argument('--remote', dest='remote', default=False, help='set to true if running remotely. Eg via SSH. default: false')
args = parser.parse_args()
PARENT_FOLDER_ID = args.folder_id
REMOTE_RUN = bool(args.remote)
main()
@lelogrott
Copy link
Author

before running it you should enable the Google Drive API, generate and save the credentials.json file and install dependencies.
You can find more information on steps 1 and 2 of this Quickstart document

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment