Last active
August 10, 2021 20:53
-
-
Save Lokno/4ca593b07d8d4bd8bf1a270b810bb707 to your computer and use it in GitHub Desktop.
Downloads a public Google Sheets spreadsheet and prints it to stdout in CSV format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Downloads a public Google Sheets spreadsheet and prints it to stdout in CSV format | |
| # | |
| # NOTE: Sometimes if the format of cells are set to "Automatic" and there are mixed types in a single | |
| # column, some values might be misinterpreted as empty. If this happens, set the format of all | |
| # cells in that column, or even the entire table, to "Plain Text" | |
| # | |
| # To make a spreasheet public: | |
| # In Google Sheets, click the Share button and then select "change to anyone with the link" | |
| from urllib.parse import quote | |
| from urllib import request | |
| import sys | |
| import re | |
| id_re = re.compile('[0-9A-Za-z-_]+') | |
| url_re = re.compile('https://docs.google.com/spreadsheets/d/([0-9A-Za-z-_]+)/edit(\?usp=sharing)?(#gid=[0-9]+)?') | |
| if len(sys.argv) < 2: | |
| print(f' usage: {sys.argv[0]:s} <url|sheet id> [<sheet name>]') | |
| sys.exit(-1) | |
| sheet_id = sys.argv[1] | |
| m = url_re.match(sheet_id) | |
| if not id_re.match(sheet_id) or not m: | |
| print('Error: Sheet URL or ID is invalid.') | |
| sys.exit(-1) | |
| sheet_name = '' | |
| if m: | |
| sheet_id = m.group(1) | |
| if m.group(3) is not None: | |
| sheet_name = '&' + m.group(3)[1:] | |
| if len(sys.argv) > 2: | |
| sheet_name = '&sheet=' + quote(sys.argv[2]) | |
| url = f'https://docs.google.com/spreadsheets/d/{sheet_id:s}/gviz/tq?tqx=out:csv&headers=0{sheet_name:s}' | |
| response = request.urlopen(url) | |
| response_str = response.read().decode('utf-8') | |
| if '<!DOCTYPE html>' in response_str: | |
| print('Error: HTML response. Is your Google Sheet set to public?') | |
| else: | |
| for l in response_str.split('\n'): | |
| print(','.join([item.strip('"\n') for item in l.split(',')])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment