Created
July 7, 2018 21:12
-
-
Save wdecoster/9b18cc597b91eb609323a4779cd091f0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from apiclient.discovery import build | |
| from httplib2 import Http | |
| from oauth2client import file, client, tools | |
| from time import time | |
| from datetime import datetime | |
| def main(): | |
| service = setup_api() | |
| file_ids = get_files_in_folder(service, folder_id="folder_id") | |
| service = setup_api(version='v3') | |
| wc = get_wordcount(service, file_ids) | |
| now = datetime.fromtimestamp(time()).strftime('%Y-%m-%d %H:%M:%S') | |
| print(f"{now}\t{wc}") | |
| def setup_api(version='v2'): | |
| """ | |
| Load api, or open browser for authorization | |
| If you adapt the scopes you have to delete the credentials.json file | |
| to enable reauthorization | |
| """ | |
| SCOPES = ( | |
| 'https://www.googleapis.com/auth/drive.metadata.readonly', | |
| 'https://www.googleapis.com/auth/drive.file', | |
| 'https://www.googleapis.com/auth/drive', | |
| 'https://www.googleapis.com/auth/spreadsheets.readonly', | |
| ) | |
| store = file.Storage('credentials.json') | |
| client_secret = 'client_secret.json' | |
| creds = store.get() | |
| if not creds or creds.invalid: | |
| flow = client.flow_from_clientsecrets(client_secret, SCOPES) | |
| creds = tools.run_flow(flow, store) | |
| return build('drive', version, http=creds.authorize(Http())) | |
| def get_files_in_folder(service, folder_id): | |
| """Return files belonging to a folder.""" | |
| page_token = None | |
| ids = [] | |
| while True: | |
| param = {} | |
| if page_token: | |
| param['pageToken'] = page_token | |
| files = service.children().list(folderId=folder_id, **param).execute() | |
| ids.extend([f['id'] for f in files['items']]) | |
| page_token = files.get('nextPageToken') | |
| if not page_token: | |
| break | |
| return ids | |
| def get_wordcount(service, file_ids): | |
| """Return the total wordcount of the list of files. | |
| Ignore line breaks and underscores (titles) | |
| """ | |
| character_count = 0 | |
| for f in file_ids: | |
| data = service.files().export(fileId=f, mimeType='text/plain').execute() | |
| if data: | |
| decoded = data.decode('utf-8') | |
| character_count += len([i for i in decoded if i not in ['\n', '\r', '_']]) | |
| return character_count | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment