Skip to content

Instantly share code, notes, and snippets.

@bll-bobbygill
Last active August 20, 2024 14:15
Show Gist options
  • Save bll-bobbygill/45b3beb33dfe19d9c32efa9902abbc9c to your computer and use it in GitHub Desktop.
Save bll-bobbygill/45b3beb33dfe19d9c32efa9902abbc9c to your computer and use it in GitHub Desktop.
Python Code to Automatically Download Google Play Install Metrics and Publish to a Google Sheet
SPREADSHEET_ID=<Google Spreadsheet ID>
INSTALLS_WORKSHEET_NAME=Installs
PLAY_STORE_BUCKET_NAME= pubsite_prod_<google play account id>
START_DATE=2024-06-12
DAYS_TO_FETCH_INSTALLS_FOR=5
PACKAGE_NAME=<Android package name>
GOOGLE_CLOUD_PROJECT_ID=<Google Cloud Project ID>
# cron
# Run the command shell script to download the Install data every 15 minutes
*/15 * * * * /bin/bash /app/run.sh >> /var/log/cron.log 2>&1
FROM python:3.10-slim
# Keeps Python from generating .pyc files in the container
ENV PYTHONDONTWRITEBYTECODE 1
# Turns off buffering for easier container logging
ENV PYTHONUNBUFFERED 1
# Install dependencies
RUN apt-get update && apt-get install -y \
wget \
build-essential \
cron \
bash \
&& wget https://install.python-poetry.org -O get-poetry.py \
&& python get-poetry.py \
&& rm get-poetry.py
ENV PATH="/root/.local/bin:${PATH}"
WORKDIR /app
COPY . .
RUN poetry config virtualenvs.create false \
&& poetry install --no-interaction --no-ansi
# Copy the crontab file to the cron.d directory
COPY cron /etc/cron.d/mycron
# Give execution rights on the cron job
RUN chmod 0644 /etc/cron.d/mycron
# Apply cron job
RUN crontab /etc/cron.d/mycron
# Create the log file to be able to run tail
RUN touch /var/log/cron.log
# Ensure the wrapper script is executable
RUN chmod +x /app/run.sh
CMD cron -f
from google.cloud import storage
from google.oauth2 import service_account
from google.oauth2.credentials import Credentials
import os
import datetime
import pandas as pd
import gspread
#This snippet is intended to run in a Docker container and assumes the following data paths are mapped
#local folders that contain the relevant information.
credentials_path = "/app/data/google-credentials.json"
download_folder = "/app/data/installs/"
credentials = service_account.Credentials.from_service_account_file(credentials_path)
start_date = datetime.datetime.strptime(os.getenv('START_DATE'), '%Y-%m-%d')
start_date_time = datetime.datetime.now()
print(f"\INSTALLS UPDATE: Start of execution at {start_date_time}")
spreadsheet_id = os.getenv('SPREADSHEET_ID')
worksheet_id = os.getenv('INSTALLS_WORKSHEET_NAME')
package_name = os.getenv('PACKAGE_NAME')
# Initialize a client
client = storage.Client(credentials=credentials)
bucket_name = os.getenv('PLAY_STORE_BUCKET_NAME')
def download_blob(bucket_name, source_blob_name, destination_file_name):
"""Downloads a blob from the bucket."""
# Get the bucket object
bucket = client.bucket(bucket_name)
# Get the blob object
blob = bucket.blob(source_blob_name)
# Download the blob to a local file
blob.download_to_filename(destination_file_name)
print(f"Downloaded storage object {source_blob_name} from bucket {bucket_name} to local file {destination_file_name}.")
#Create a string that is a date in the format of YYYYMM based on today's date
today = datetime.datetime.today()
today = today.strftime('%Y%m')
#Get the days before today to update install data for
days_before_today = int(os.getenv('DAYS_TO_FETCH_INSTALLS_FOR'))
earliest_date_to_fetch = datetime.datetime.today() - datetime.timedelta(days=days_before_today)
#set to midnight of that date
earliest_date_to_fetch = earliest_date_to_fetch.replace(hour=0, minute=0, second=0, microsecond=0)
#Now lets get the installs for the current month
filename = f'installs_{packge_name}_{today}_overview.csv'
destination_filename = f'{download_folder}{filename}'
download_blob(bucket_name, f'stats/installs/{filename}', destination_filename)
#Now we need to convert this csv to a pandas dataframe
df = pd.read_csv(destination_filename, encoding='utf-16')
#We then want to take the dataframe and upload it to a google sheet, overwriting any data for the Date that
#already exists in the Google Sheet
#First we need to authenticate to Google Sheets
credentials = service_account.Credentials.from_service_account_file(credentials_path,
scopes=['https://www.googleapis.com/auth/spreadsheets'])
client = gspread.authorize(credentials)
# Open the Google Sheet
spreadsheet = client.open_by_key(spreadsheet_id)
# Get the worksheet
worksheet = spreadsheet.worksheet(worksheet_id)
# Go through each Date in the Pandas dataframe and update the Google Sheet with the data for that date
for index, row in df.iterrows():
date = row['Date']
date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
date = date_obj.strftime('%m/%d/%Y')
#If date is before the start date, skip
if date_obj < start_date:
print (f"Skipping date {date} as it is before the start date {start_date}")
continue
#If date is before the earliest date to fetch, skip
if date_obj < earliest_date_to_fetch:
print (f"Skipping date {date} as it is before the earliest date to fetch {earliest_date_to_fetch}")
continue
cell = worksheet.find(date, in_column=1)
if cell:
cell_row = cell.row
cell_col = cell.col
#remove the Date column from the row
row = row.drop('Date')
worksheet.update(f'A{cell_row}', [[date]+list(row)])
else:
#remove the Date column from the row
row = row.drop('Date')
worksheet.append_row([date]+list(row))
end_time = datetime.datetime.now()
duration = end_time - start_date_time
print(f"INSTALLS UPDATE: Finished at {end_time} and took {duration} to execute.")
[tool.poetry]
name = "google_play_install_downloader"
version = "0.1.0"
description = "A simple project to automatically download Google Play install metrics and publish to a Google Sheet"
authors = ["Your Name <[email protected]>"]
license = "MIT"
[tool.poetry.dependencies]
python = "^3.10"
sshtunnel = "^0.4.0"
psycopg2-binary = "^2.9.3"
paramiko = "^3.4.0"
gspread = "^6.1.2"
google-auth = "^2.32.0"
google-cloud = "^0.34.0"
google-cloud-storage = "^2.18.0"
pandas = "^2.2.2"
google-cloud-pubsub = "^2.23.0"
google-api-python-client = "^2.139.0"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
#!/bin/bash
# Print the current working directory
echo "Current working directory at the start of run.sh: $(pwd)"
# Source the environment variables file to make them available to the cron job
# Enable automatic export of all variables
set -a
# Source the environment variables from the .env file
source /app/data/.env
# Disable automatic export of variables
set +a
# Run the python script which will download the install data from Google Play
/usr/local/bin/python3 /app/download_installs.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment