Skip to content

Instantly share code, notes, and snippets.

@wdecoster
Created July 7, 2018 21:12
Show Gist options
  • Select an option

  • Save wdecoster/9b18cc597b91eb609323a4779cd091f0 to your computer and use it in GitHub Desktop.

Select an option

Save wdecoster/9b18cc597b91eb609323a4779cd091f0 to your computer and use it in GitHub Desktop.
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
from time import time
from datetime import datetime
def main():
service = setup_api()
file_ids = get_files_in_folder(service, folder_id="folder_id")
service = setup_api(version='v3')
wc = get_wordcount(service, file_ids)
now = datetime.fromtimestamp(time()).strftime('%Y-%m-%d %H:%M:%S')
print(f"{now}\t{wc}")
def setup_api(version='v2'):
"""
Load api, or open browser for authorization
If you adapt the scopes you have to delete the credentials.json file
to enable reauthorization
"""
SCOPES = (
'https://www.googleapis.com/auth/drive.metadata.readonly',
'https://www.googleapis.com/auth/drive.file',
'https://www.googleapis.com/auth/drive',
'https://www.googleapis.com/auth/spreadsheets.readonly',
)
store = file.Storage('credentials.json')
client_secret = 'client_secret.json'
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets(client_secret, SCOPES)
creds = tools.run_flow(flow, store)
return build('drive', version, http=creds.authorize(Http()))
def get_files_in_folder(service, folder_id):
"""Return files belonging to a folder."""
page_token = None
ids = []
while True:
param = {}
if page_token:
param['pageToken'] = page_token
files = service.children().list(folderId=folder_id, **param).execute()
ids.extend([f['id'] for f in files['items']])
page_token = files.get('nextPageToken')
if not page_token:
break
return ids
def get_wordcount(service, file_ids):
"""Return the total wordcount of the list of files.
Ignore line breaks and underscores (titles)
"""
character_count = 0
for f in file_ids:
data = service.files().export(fileId=f, mimeType='text/plain').execute()
if data:
decoded = data.decode('utf-8')
character_count += len([i for i in decoded if i not in ['\n', '\r', '_']])
return character_count
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment