-
-
Save xflr6/d106aa5b561fbac4ce1a9969eba728bb to your computer and use it in GitHub Desktop.
| """os.walk() variation with Google Drive API v3.""" | |
| from collections.abc import Iterator, Sequence | |
| import os | |
| import pathlib | |
| from typing import TypedDict | |
| # $ pip install google-api-python-client google-auth-oauthlib | |
| from apiclient import discovery | |
| from google.oauth2 import credentials | |
| from google_auth_oauthlib import flow as flow_lib | |
| SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly'] | |
| FOLDER = 'application/vnd.google-apps.folder' | |
| def get_credentials(scopes: Sequence[str], *, | |
| secrets: os.PathLike[str] | str = '~/client_secrets.json', | |
| storage: os.PathLike[str] | str | None = '~/authorized_user.json' | |
| ) -> credentials.Credentials: | |
| creds = None | |
| if storage is not None: | |
| storage = pathlib.Path(storage).expanduser() | |
| if storage.exists(): | |
| creds = credentials.Credentials.from_authorized_user_file(storage, scopes=scopes) | |
| if creds is None or creds.token_state.name == 'INVALID': | |
| secrets = pathlib.Path(secrets).expanduser() | |
| flow = flow_lib.InstalledAppFlow.from_client_secrets_file(secrets, scopes=scopes) | |
| flow.run_local_server() | |
| creds = flow.credentials | |
| if storage is not None: | |
| authorized_user_info = creds.to_json() | |
| storage.write_text(authorized_user_info) | |
| return creds | |
| creds = get_credentials(SCOPES) | |
| service = discovery.build('drive', version='v3', credentials=creds) | |
| def iterfiles(name: str | None = None, *, | |
| is_folder: bool | None = None, | |
| parent: str | None = None, | |
| order_by: str = 'folder,name,createdTime') -> Iterator['File']: | |
| q = [] | |
| if name is not None: | |
| q.append("name = '{}'".format(name.replace("'", "\\'"))) | |
| if is_folder is not None: | |
| q.append("mimeType {} '{}'".format('=' if is_folder else '!=', FOLDER)) | |
| if parent is not None: | |
| q.append("'{}' in parents".format(parent.replace("'", "\\'"))) | |
| params = {'pageToken': None, 'orderBy': order_by} | |
| if q: | |
| params['q'] = ' and '.join(q) | |
| while True: | |
| response = service.files().list(**params).execute() | |
| for f in response['files']: | |
| yield f | |
| try: | |
| params['pageToken'] = response['nextPageToken'] | |
| except KeyError: | |
| return | |
| class File(TypedDict): | |
| id: str | |
| kind: str | |
| name: str | |
| mimeType: str | |
| resourceKey: str | |
| def walk(top: str = 'root', *, | |
| by_name: bool = False) -> Iterator[tuple[str, list[File], list[File]]]: | |
| if by_name: | |
| (top,) = iterfiles(name=top, is_folder=True) | |
| else: | |
| top = service.files().get(fileId=top).execute() | |
| if top['mimeType'] != FOLDER: | |
| raise ValueError(f'not a folder: {top!r}') | |
| stack = [((top['name'],), top)] | |
| while stack: | |
| (path, top) = stack.pop() | |
| (dirs, files) = is_file = ([], []) | |
| for f in iterfiles(parent=top['id']): | |
| is_file[f['mimeType'] != FOLDER].append(f) | |
| yield path, top, dirs, files | |
| if dirs: | |
| stack.extend((path + (d['name'],), d) for d in reversed(dirs)) | |
| for kwargs in [{'top': 'spam', 'by_name': True}, {}]: | |
| print('', f'walk(**{kwargs!r})', sep='\n') | |
| for path, root, dirs, files in walk(**kwargs): | |
| print('/'.join(path), f'{len(dirs):d}', f'{len(files):d}', sep='\t') |
Thanks @Schizo (looks like gist does not send notifications on comments), should be fixed in revision4.
Hello good code
If I have next folders
english/videos
spanish/videos
How can I list files from english/videos? Thanks!
One thing to note, if you have more then 1 folder with the same name this wont work. To accommodate I made a quick update.
def walk(top):
if top:
top = service.files().get(fileId=top).execute()
else:
top = service.files().get(fileId = "root").execute()
stack = [((top['name'],), top)]
while stack:
path, top = stack.pop()
dirs, files = is_file = [], []
for f in iterfiles(parent=top['id']):
is_file[f['mimeType'] != FOLDER].append(f)
yield path, top, dirs, files
if dirs:
stack.extend((path + (d['name'],), d) for d in dirs)
To get the folder Id you can open google drive, then copy the value from the url.
Thanks a lot @dbfanmanga and @john-delivuk for pointing this out (sorry for the late response).
Updated the signature of walk() to accept a folder id instead of a name per default (use by_name for the previous behaviour).
Using 'root' as default for walking complete drive contents.
nice one. thanks!
as i needed to download files from share drive i added some args
top = service.files().get(fileId=top, supportsAllDrives=True).execute()
params = {
"pageToken": None,
"orderBy": order_by,
"supportsAllDrives": True,
"includeItemsFromAllDrives": True,
}
Thank You
Hello there!
I am a new user to GDrive Python tools. Could you please give us an example of how ~/client_secrets.json should be?
An use case would be nice for us dummies to be able to set up this code, :)
I tried downloading a json on google's service account screen but it gave me the following error:
"InvalidClientSecretsError: Invalid file format. See https://developers.google.com/api-client-library/python/guide/aaa_client_secrets Expected a JSON object with a single property for a "web" or "installed" application"
Hey, :) maybe this helps: https://medium.com/@ashokyogi5/a-beginners-guide-to-google-oauth-and-google-apis-450f36389184 (there is an example for a clients_secrets.json).
https://github.com/googleapis/google-auth-library-python/blob/main/docs/index.rst
oauth2client was recently deprecated in favor of this library. For more details on the deprecation, see :doc:oauth2client-deprecation.
Thanks, I have updated get_credentials() in the latest version of the gist to use google_auth_oauthlib instead of oauth2client now using ~/authorized_user.json instead of ~/storage.json as default path for storage since the new library uses a slightly different format (notably 'token' instead of 'access_token' as key for the token in the JSON).
There seems to be a little bug in your code, If i have a folder as following
it will return /folders/ with 2 files rather then
/folders/folderA 1/folders/folderB 1