Skip to content

Instantly share code, notes, and snippets.

@kelvinabrokwa
Created September 29, 2025 05:38
Show Gist options
  • Select an option

  • Save kelvinabrokwa/94f3f57dadb8705d0e25abb0f30b2c31 to your computer and use it in GitHub Desktop.

Select an option

Save kelvinabrokwa/94f3f57dadb8705d0e25abb0f30b2c31 to your computer and use it in GitHub Desktop.
Fetch Google Sheets
import logging
import re
from typing import Dict, Optional
import gspread
import polars as pl
from google.oauth2.service_account import Credentials
logger = logging.getLogger(__name__)
def get_spreadsheet_id(url: str) -> str:
patterns = [
r"/spreadsheets/d/([a-zA-Z0-9-_]+)",
r"[?&]id=([a-zA-Z0-9-_]+)",
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
raise ValueError(f"Could not extract spreadsheet ID from URL: {url}")
def fetch_all_sheets(
url: str,
credentials_path: Optional[str],
) -> Dict[str, pl.DataFrame]:
spreadsheet_id = get_spreadsheet_id(url)
scopes = [
"https://www.googleapis.com/auth/spreadsheets.readonly",
"https://www.googleapis.com/auth/drive.readonly",
]
creds = Credentials.from_service_account_file(credentials_path, scopes=scopes)
client = gspread.authorize(creds)
spreadsheet = client.open_by_key(spreadsheet_id)
worksheets = spreadsheet.worksheets()
dataframes = {}
for worksheet in worksheets:
values = worksheet.get_all_values()
headers = values[0]
data = values[1:]
df = pl.DataFrame(data, schema=headers)
dataframes[worksheet.title] = df
return dataframes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment