Skip to content

Instantly share code, notes, and snippets.

@xflr6
Last active October 11, 2025 21:25
Show Gist options
  • Save xflr6/57508d28adec1cd3cd047032e8d81266 to your computer and use it in GitHub Desktop.
Save xflr6/57508d28adec1cd3cd047032e8d81266 to your computer and use it in GitHub Desktop.
Download all sheets of a Google Docs spreadsheet and export to individual CSV files
"""Download all sheets of a Google Docs spreadsheet as CSV."""
from collections.abc import Sequence
import contextlib
import csv
import itertools
import os
import pathlib
# $ pip install google-api-python-client google-auth-oauthlib
from apiclient import discovery
from google.oauth2 import credentials
from google_auth_oauthlib import flow as flow_lib
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
SHEET = '1dR13B3Wi_KJGUJQ0BZa2frLAVxhZnbz0hpwCcWSvb20'
def get_credentials(scopes: Sequence[str], *,
secrets: os.PathLike[str] | str = '~/client_secrets.json',
storage: os.PathLike[str] | str | None = '~/authorized_user.json'
) -> credentials.Credentials:
creds = None
if storage is not None:
storage = pathlib.Path(storage).expanduser()
if storage.exists():
creds = credentials.Credentials.from_authorized_user_file(storage, scopes=scopes)
if creds is None or creds.token_state.name == 'INVALID':
secrets = pathlib.Path(secrets).expanduser()
flow = flow_lib.InstalledAppFlow.from_client_secrets_file(secrets, scopes=scopes)
flow.run_local_server()
creds = flow.credentials
if storage is not None:
authorized_user_info = creds.to_json()
storage.write_text(authorized_user_info)
return creds
def export_csv(doc_id: str, /, *,
filename_template: str = '{title} - {sheet}.csv') -> None:
for (doc, sheet), rows in itersheets(doc_id):
filename = filename_template.format(title=doc, sheet=sheet)
with open(filename, mode='w') as f:
write_csv(f, rows)
def itersheets(id: str, /):
doc = service.spreadsheets().get(spreadsheetId=id).execute()
title = doc['properties']['title']
sheets = [s['properties']['title'] for s in doc['sheets']]
ranges = [f"'{title}'" for title in sheets]
params = {'spreadsheetId': id, 'ranges': ranges, 'majorDimension': 'ROWS'}
result = service.spreadsheets().values().batchGet(**params).execute()
for name, vr in zip(sheets, result['valueRanges'], strict=True):
yield (title, name), vr['values']
def write_csv(file, /, rows, *,
dialect: csv.Dialect | type[csv.Dialect] | str = 'excel') -> None:
writer = csv.writer(file, dialect=dialect)
writer.writerows(rows)
creds = get_credentials(SCOPES)
service = discovery.build('sheets', version='v4', credentials=creds)
export_csv(SHEET)
def to_pandas(docid, **kwargs):
import io
import pandas as pd
for (doc, sheet), rows in itersheets(docid):
with io.StringIO() as f:
write_csv(f, rows)
f.seek(0)
df = pd.read_csv(f, **kwargs)
df.name = sheet
yield df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment