Skip to content

Instantly share code, notes, and snippets.

@tejastank
Created October 5, 2023 10:51
Show Gist options
  • Save tejastank/69408df77134695add700306023cc9d6 to your computer and use it in GitHub Desktop.
Save tejastank/69408df77134695add700306023cc9d6 to your computer and use it in GitHub Desktop.
Google Services
from dataclasses import dataclass
@dataclass
class ServiceAccountInfo:
private_key: str
client_email: str
token_uri: str
from __future__ import annotations
from ctypes import Union
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import List
import numpy as np
import pandas as pd
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import Resource
from pygsheets.client import Client
from pygsheets.spreadsheet import Spreadsheet
from src.settings import settings
@dataclass
class ServiceAccountInfo:
private_key: str
client_email: str
token_uri: str
class GoogleDrive:
def __init__(self, service: Resource) -> None:
self.service = service
def create_or_update_csv(self, fp: Path, folder_name: str = None):
parent_id = self.get_folder_id(folder_name)
existing = [
f for f in self.list_files(parent_id=parent_id) if f["name"] == fp.name
]
if len(existing) > 1:
raise Exception("More than one file matches")
elif len(existing) == 1:
print(f"Updating: {folder_name}/{fp.name}")
self.service.files().update(
fileId=existing[0]["id"],
media_body=fp.as_posix(),
supportsTeamDrives=True,
).execute()
else:
print(f"Creating: {fp.name}")
self.service.files().create(
body={
"name": fp.name,
"mimeType": "text/csv",
"parents": [parent_id],
},
media_body=fp.as_posix(),
supportsTeamDrives=True,
).execute()
def get_folder_id(self, name, parent_id: str = None) -> str:
folders = self.list_folders(parent_id=parent_id)
folder_ids = [f["id"] for f in folders if f["name"] == name]
if not len(folder_ids):
raise Exception("Folder not found")
elif len(folder_ids) > 1:
raise Exception("Multiple folders found")
return folder_ids[0]
def list_folders(self, parent_id: str = None) -> List:
query = "mimeType = 'application/vnd.google-apps.folder'"
if parent_id is not None:
query = f"'{parent_id}' in parents and mimeType = 'application/vnd.google-apps.folder'"
response = self._list_files(q=query)
return response
def list_files(self, parent_id: str = None) -> List:
query = f"'{parent_id}' in parents"
return self._list_files(q=query)
def _list_files(self, **kwargs) -> List:
response = (
self.service.files()
.list(
**kwargs,
supportsTeamDrives=True,
includeTeamDriveItems=True,
)
.execute()
)
return response["files"]
import dataclasses
import os
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Union
import pandas as pd
from apiclient.discovery import Resource, build
from google.oauth2.service_account import Credentials
class TimeDimension(str, Enum):
YEAR_MONTH = "ga:yearMonth"
class UserDimension(str, Enum):
USER_TYPE = "ga:userType"
class UserMetric(str, Enum):
USERS = "ga:users"
class HitDimension(str, Enum):
PAGE_PATH = "ga:pagePath"
class HitMetric(str, Enum):
PAGEVIEWS = "ga:pageviews"
class SessionMetric(str, Enum):
SESSIONS = "ga:sessions"
AVG_SESSION_DURATION = "ga:avgSessionDuration"
BOUNCES = "ga:bounces"
BOUNCE_RATE = "ga:bounceRate"
class SessionDimension(str, Enum):
LANDING_PAGE_PATH = "ga:landingPagePath"
MEDIUM = "ga:medium"
SOURCE = "ga:source"
SOURCE_MEDIUM = "ga:sourceMedium"
@dataclass
class ServiceAccountInfo:
private_key: str
client_email: str
token_uri: str
class GoogleAnalyticsReporting:
service: Resource = None
def __init__(self, service_account_info: ServiceAccountInfo):
credentials = Credentials.from_service_account_info(
dataclasses.asdict(service_account_info),
scopes=[
"https://www.googleapis.com/auth/analytics.readonly",
],
)
# Build the service object.
self.service = build(
"analyticsreporting",
"v4",
credentials=credentials,
cache_discovery=False,
)
@classmethod
def from_env(cls):
service_acocunt_info = ServiceAccountInfo(
private_key=os.environ["GA_PRIVATE_KEY"],
client_email=os.environ["GA_CLIENT_EMAIL"],
token_uri=os.environ["GA_TOKEN_URI"],
)
return cls(service_acocunt_info)
def fetch_report(
self, query: Dict, as_df: bool = False
) -> Union[pd.DataFrame, Dict]:
report = self.fetch_reports([query])[0]
return self.report_as_df(report) if as_df else report
def fetch_reports(self, queries: List[Dict]) -> List[dict]:
results = (
self.service.reports().batchGet(body={"reportRequests": queries}).execute()
)
return results.get("reports")
@staticmethod
def report_as_df(response) -> pd.DataFrame:
dimensions = response["columnHeader"]["dimensions"]
metrics = response["columnHeader"]["metricHeader"]["metricHeaderEntries"]
metrics = {m["name"]: map_dtype(m["type"]) for m in metrics}
cols = dimensions + list(metrics.keys())
rows = []
for row in response["data"]["rows"]:
data = row["dimensions"]
for metric in row["metrics"]:
data.extend(metric["values"])
rows.append(data)
dtypes = {k: v for k, v in metrics.items() if v is not None}
df = pd.DataFrame(rows, columns=cols).astype(dtypes)
df.columns = [format_column(c) for c in df.columns]
return df
def map_dtype(ga_dtype: str):
if ga_dtype == "INTEGER":
return int
if ga_dtype in ("PERCENT", "TIME"):
return float
else:
return None
def format_column(name: str) -> str:
formatted = name.replace("ga:", "")
formatted_chars = [c if c.islower() else f"_{c.lower()}" for c in formatted]
return "".join(formatted_chars)
[tool.poetry]
name = "web"
version = "0.1.0"
description = ""
authors = ["Ian <[email protected]>"]
readme = "README.md"
[tool.poetry.dependencies]
pandas = "^1.5.3"
pydantic = "^1.10.6"
pygsheets = "^2.0.6"
python = ">=3.10,<3.12"
python-dotenv = "^1.0.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
from pathlib import Path
from pydantic import BaseSettings
class Settings(BaseSettings):
GCP_PRIVATE_KEY: str
GCP_CLIENT_EMAIL: str
GCP_TOKEN_URI: str
class Config:
env_file = Path(__file__).resolve().parents[1] / ".env"
settings = Settings()
from __future__ import annotations
from ctypes import Union
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import List
import numpy as np
import pandas as pd
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import Resource
from pygsheets.client import Client
from pygsheets.spreadsheet import Spreadsheet
from src.settings import settings
class GoogleSheets(Client):
@classmethod
def from_account_info(
cls, service_account_info: ServiceAccountInfo
) -> GoogleSheets:
credentials = cls._get_credentials(service_account_info)
return cls(credentials)
def _get_credentials(service_account_info: ServiceAccountInfo):
return Credentials.from_service_account_info(
asdict(service_account_info),
scopes=[
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive",
],
)
def get_spreadsheet(self, name) -> Spreadsheet:
responses_ssh = self.open(name)
return responses_ssh
def get_worksheet_as_df(
self,
ssh: Union[Spreadsheet, str],
wsh: str,
has_header=True,
include_tailing_empty=False,
empty_value=np.NaN,
) -> pd.DataFrame:
if isinstance(ssh, str):
ssh = self.get_spreadsheet(ssh)
wsh = ssh.worksheet_by_title(wsh)
return wsh.get_as_df(
has_header=has_header,
include_tailing_empty=include_tailing_empty,
empty_value=empty_value,
)
def write_worsheet(self, df, ssh, wsh_name):
if isinstance(ssh, str):
ssh = self.get_spreadsheet(ssh)
wsh_names = [wsh.title for wsh in ssh.worksheets()]
if wsh_name not in wsh_names:
wsh = ssh.add_worksheet(wsh_name)
else:
wsh = ssh.worksheet_by_title(wsh_name)
wsh.clear()
wsh.set_dataframe(df, (1, 1), nan="")
google_sheets_client = GoogleSheets.from_account_info(
service_account_info=ServiceAccountInfo(
private_key=settings.GCP_PRIVATE_KEY,
client_email=settings.GCP_CLIENT_EMAIL,
token_uri=settings.GCP_TOKEN_URI,
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment