-
-
Save yaci/581062fd19713faffffe1668f37707c0 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3 | |
""" | |
!! IMPORTANT !! | |
!! READ THIS !! | |
In order to run this script you need python3 and pip3 installed. | |
You also need some additional python modules. Please run | |
sudo pip3 install httplib2 oauth2client | |
sudo pip3 install --upgrade google-api-python-client | |
To authenticate in Goolge follow the instructions at | |
https://developers.google.com/drive/v3/web/quickstart/python | |
A client_secret.json file needs to placed in the same directory | |
with this script. The link above contains the instruction on | |
how to obtain this file. Once you complete these steps run | |
python3 this_script.py --noauth_local_webserver | |
and follow the instructions | |
On subsequent runs you can execute | |
python3 this_script.py | column -t -x -s '|' | |
for nicer formating | |
Most of the code is copy-pasted from Google's | |
official docs, I only made minor modifications. | |
""" | |
import httplib2 | |
import os | |
from apiclient import discovery | |
from oauth2client import client | |
from oauth2client import tools | |
from oauth2client.file import Storage | |
try: | |
import argparse | |
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() | |
except ImportError: | |
flags = None | |
# If modifying these scopes, delete your previously saved credentials | |
# at ~/.credentials/drive-python-quickstart.json | |
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly' | |
CLIENT_SECRET_FILE = 'client_secret.json' | |
APPLICATION_NAME = 'Drive API Python Quickstart' | |
def get_credentials(): | |
"""Gets valid user credentials from storage. | |
If nothing has been stored, or if the stored credentials are invalid, | |
the OAuth2 flow is completed to obtain the new credentials. | |
Returns: | |
Credentials, the obtained credential. | |
""" | |
home_dir = os.path.expanduser('~') | |
credential_dir = os.path.join(home_dir, '.credentials') | |
if not os.path.exists(credential_dir): | |
os.makedirs(credential_dir) | |
credential_path = os.path.join(credential_dir, | |
'drive-python-quickstart.json') | |
store = Storage(credential_path) | |
credentials = store.get() | |
if not credentials or credentials.invalid: | |
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES) | |
flow.user_agent = APPLICATION_NAME | |
if flags: | |
credentials = tools.run_flow(flow, store, flags) | |
else: # Needed only for compatibility with Python 2.6 | |
credentials = tools.run(flow, store) | |
print('Storing credentials to ' + credential_path) | |
return credentials | |
def main(): | |
""" | |
Creates a Google Drive API service object and outputs the names and IDs | |
for up to 1000 files. | |
""" | |
credentials = get_credentials() | |
http = credentials.authorize(httplib2.Http()) | |
service = discovery.build('drive', 'v3', http=http) | |
results = service.files().list( | |
pageSize=1000,fields="nextPageToken, files(name, md5Checksum, size)").execute() | |
items = results.get('files', []) | |
if not items: | |
print('No files found.') | |
else: | |
print('Files:') | |
for item in items: | |
#uncomment the line below to list only the files which have a checksum assigned | |
#if 'md5Checksum' in item: | |
name = item['name'] | |
checksum = item.get('md5Checksum', 'no checksum') | |
size = item.get('size', '-') | |
print('{1} | {2} | {0}'.format(name, checksum, size)) | |
if __name__ == '__main__': | |
main() |
Thanks haidahaha! I fixed the typo the you've mentioned.
to have files stored in teamdrives listed as well edit lines 84, 85 to this:
results = service.files().list(
pageSize=1000,
fields="nextPageToken, files(name, md5Checksum, size)",
includeItemsFromAllDrives=True,
supportsAllDrives=True).execute()
from [https://developers.google.com/drive/api/v3/enable-shareddrives#including_shared_drive_content_fileslist]
Could you please advise if/how I can call this script only for a specific dir? e.g. /Reports or /Reports/2018/
@simkin in line 82, you can add this param:
q="'FOLDERID' in parents"
to find the FOLDERID, just click on the folder in your Google Drive, the url will look like thishttps://drive.google.com/drive/u/0/folders/FOLDERID
I need to find the m5checksum of a folder. I have added q="'FOLDERID' in parents"
, but then what should I do? @haidahaha
As far as I can tell, folders do not have a md5checksum; only files do.
This script no longer works since Google has deprecated out of band (OOB) authentication. The solution is to use the newer Google API library. I've gone ahead and rewritten this script (as well as added several new features) to support this.
#!/usr/bin/env python3
"""
In order to run this script you need python3 and pip3 installed.
You also need some additional python modules. Please run:
pip3 install httplib2 google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client tabulate
To authenticate with Google, follow the instructions at:
https://developers.google.com/drive/api/v3/quickstart/python
You need a client_secret.json file in the same directory as this script.
Example usage:
python3 drive_list.py [options]
Options:
-f, --folder Starting folder ID (default: 'root')
-d, --delimiter Output delimiter (default: ',')
-e, --exclude-folders Exclude folders from the output
-t, --file-types Filter by file extensions (comma-separated, e.g., .jpg,.pdf)
-r, --raw-size Show file sizes in raw bytes instead of human-readable
-x, --exclude-no-checksum Exclude files without an MD5 checksum
--no-mime Omit MIME type column
--no-size Omit file size column
-h, --help Show this help message and exit
Example:
python3 drive_list.py
python3 drive_list.py -rxe
python3 drive_list.py -f 1AbCDeFGHiJkLmNoP -d '|' -e -t .pdf,.docx -r -x --no-mime
"""
import os
import argparse
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly']
def authenticate():
creds = None
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
if not creds or not creds.valid:
flow = InstalledAppFlow.from_client_secrets_file('client_secret.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.json', 'w') as token:
token.write(creds.to_json())
return creds
def human_readable_size(size_bytes):
if not size_bytes:
return "-"
size_bytes = int(size_bytes)
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if size_bytes < 1024:
return f"{size_bytes:.1f} {unit}"
size_bytes /= 1024
return f"{size_bytes:.1f} PB"
def list_files_recursive(service, folder_id='root', path='', delimiter=',',
exclude_folders=False, file_types=None,
human_readable=True, exclude_no_checksum=False,
show_mime=True, show_size=True):
page_token = None
while True:
response = service.files().list(
q=f"'{folder_id}' in parents and trashed = false",
spaces='drive',
fields="nextPageToken, files(id, name, mimeType, size, md5Checksum)",
pageSize=1000,
pageToken=page_token
).execute()
for file in response.get('files', []):
mime_type = file['mimeType']
name = file['name']
full_path = os.path.join(path, name)
is_folder = mime_type == 'application/vnd.google-apps.folder'
if is_folder:
if not exclude_folders:
row = ["FOLDER"]
if show_size:
row.append("-")
if show_mime:
row.append("-")
row.append(full_path)
print(delimiter.join(row))
list_files_recursive(
service, file['id'], full_path, delimiter,
exclude_folders, file_types, human_readable,
exclude_no_checksum, show_mime, show_size
)
else:
ext = os.path.splitext(name)[-1].lower()
if file_types and ext not in file_types:
continue
checksum = file.get('md5Checksum')
if exclude_no_checksum and not checksum:
continue
size_val = file.get('size')
size = human_readable_size(size_val) if human_readable else (size_val or "-")
row = [checksum or 'no checksum']
if show_size:
row.append(str(size))
if show_mime:
row.append(mime_type)
row.append(full_path)
print(delimiter.join(row))
page_token = response.get('nextPageToken', None)
if not page_token:
break
def main():
parser = argparse.ArgumentParser(description='Recursively list Google Drive files.')
parser.add_argument('-f', '--folder', default='root', help='Starting folder ID (default: root)')
parser.add_argument('-d', '--delimiter', default=',', help='Output delimiter (default: comma)')
parser.add_argument('-e', '--exclude-folders', action='store_true', help='Do not list folders')
parser.add_argument('-t', '--file-types', nargs='*', help='Only include files with these extensions (e.g. .pdf .jpg)')
parser.add_argument('-r', '--raw-size', action='store_true', help='Show raw file size in bytes')
parser.add_argument('-x', '--exclude-no-checksum', action='store_true', help='Exclude files without MD5 checksum')
parser.add_argument('--no-mime', action='store_true', help='Do not include MIME type column')
parser.add_argument('--no-size', action='store_true', help='Do not include file size column')
args = parser.parse_args()
creds = authenticate()
service = build('drive', 'v3', credentials=creds)
header = ['Checksum']
if not args.no_size:
header.append('Size (bytes)' if args.raw_size else 'Size')
if not args.no_mime:
header.append('MIME Type')
header.append('Path')
print(args.delimiter.join(header))
list_files_recursive(
service,
folder_id=args.folder,
delimiter=args.delimiter,
exclude_folders=args.exclude_folders,
file_types=[f.lower() for f in args.file_types] if args.file_types else None,
human_readable=not args.raw_size,
exclude_no_checksum=args.exclude_no_checksum,
show_mime=not args.no_mime,
show_size=not args.no_size
)
if __name__ == '__main__':
main()
Thanks @haidahaha :)