-
-
Save bll-bobbygill/45b3beb33dfe19d9c32efa9902abbc9c to your computer and use it in GitHub Desktop.
Python Code to Automatically Download Google Play Install Metrics and Publish to a Google Sheet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SPREADSHEET_ID=<Google Spreadsheet ID> | |
INSTALLS_WORKSHEET_NAME=Installs | |
PLAY_STORE_BUCKET_NAME= pubsite_prod_<google play account id> | |
START_DATE=2024-06-12 | |
DAYS_TO_FETCH_INSTALLS_FOR=5 | |
PACKAGE_NAME=<Android package name> | |
GOOGLE_CLOUD_PROJECT_ID=<Google Cloud Project ID> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cron | |
# Run the command shell script to download the Install data every 15 minutes | |
*/15 * * * * /bin/bash /app/run.sh >> /var/log/cron.log 2>&1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM python:3.10-slim | |
# Keeps Python from generating .pyc files in the container | |
ENV PYTHONDONTWRITEBYTECODE 1 | |
# Turns off buffering for easier container logging | |
ENV PYTHONUNBUFFERED 1 | |
# Install dependencies | |
RUN apt-get update && apt-get install -y \ | |
wget \ | |
build-essential \ | |
cron \ | |
bash \ | |
&& wget https://install.python-poetry.org -O get-poetry.py \ | |
&& python get-poetry.py \ | |
&& rm get-poetry.py | |
ENV PATH="/root/.local/bin:${PATH}" | |
WORKDIR /app | |
COPY . . | |
RUN poetry config virtualenvs.create false \ | |
&& poetry install --no-interaction --no-ansi | |
# Copy the crontab file to the cron.d directory | |
COPY cron /etc/cron.d/mycron | |
# Give execution rights on the cron job | |
RUN chmod 0644 /etc/cron.d/mycron | |
# Apply cron job | |
RUN crontab /etc/cron.d/mycron | |
# Create the log file to be able to run tail | |
RUN touch /var/log/cron.log | |
# Ensure the wrapper script is executable | |
RUN chmod +x /app/run.sh | |
CMD cron -f |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.cloud import storage | |
from google.oauth2 import service_account | |
from google.oauth2.credentials import Credentials | |
import os | |
import datetime | |
import pandas as pd | |
import gspread | |
#This snippet is intended to run in a Docker container and assumes the following data paths are mapped | |
#local folders that contain the relevant information. | |
credentials_path = "/app/data/google-credentials.json" | |
download_folder = "/app/data/installs/" | |
credentials = service_account.Credentials.from_service_account_file(credentials_path) | |
start_date = datetime.datetime.strptime(os.getenv('START_DATE'), '%Y-%m-%d') | |
start_date_time = datetime.datetime.now() | |
print(f"\INSTALLS UPDATE: Start of execution at {start_date_time}") | |
spreadsheet_id = os.getenv('SPREADSHEET_ID') | |
worksheet_id = os.getenv('INSTALLS_WORKSHEET_NAME') | |
package_name = os.getenv('PACKAGE_NAME') | |
# Initialize a client | |
client = storage.Client(credentials=credentials) | |
bucket_name = os.getenv('PLAY_STORE_BUCKET_NAME') | |
def download_blob(bucket_name, source_blob_name, destination_file_name): | |
"""Downloads a blob from the bucket.""" | |
# Get the bucket object | |
bucket = client.bucket(bucket_name) | |
# Get the blob object | |
blob = bucket.blob(source_blob_name) | |
# Download the blob to a local file | |
blob.download_to_filename(destination_file_name) | |
print(f"Downloaded storage object {source_blob_name} from bucket {bucket_name} to local file {destination_file_name}.") | |
#Create a string that is a date in the format of YYYYMM based on today's date | |
today = datetime.datetime.today() | |
today = today.strftime('%Y%m') | |
#Get the days before today to update install data for | |
days_before_today = int(os.getenv('DAYS_TO_FETCH_INSTALLS_FOR')) | |
earliest_date_to_fetch = datetime.datetime.today() - datetime.timedelta(days=days_before_today) | |
#set to midnight of that date | |
earliest_date_to_fetch = earliest_date_to_fetch.replace(hour=0, minute=0, second=0, microsecond=0) | |
#Now lets get the installs for the current month | |
filename = f'installs_{packge_name}_{today}_overview.csv' | |
destination_filename = f'{download_folder}{filename}' | |
download_blob(bucket_name, f'stats/installs/{filename}', destination_filename) | |
#Now we need to convert this csv to a pandas dataframe | |
df = pd.read_csv(destination_filename, encoding='utf-16') | |
#We then want to take the dataframe and upload it to a google sheet, overwriting any data for the Date that | |
#already exists in the Google Sheet | |
#First we need to authenticate to Google Sheets | |
credentials = service_account.Credentials.from_service_account_file(credentials_path, | |
scopes=['https://www.googleapis.com/auth/spreadsheets']) | |
client = gspread.authorize(credentials) | |
# Open the Google Sheet | |
spreadsheet = client.open_by_key(spreadsheet_id) | |
# Get the worksheet | |
worksheet = spreadsheet.worksheet(worksheet_id) | |
# Go through each Date in the Pandas dataframe and update the Google Sheet with the data for that date | |
for index, row in df.iterrows(): | |
date = row['Date'] | |
date_obj = datetime.datetime.strptime(date, '%Y-%m-%d') | |
date = date_obj.strftime('%m/%d/%Y') | |
#If date is before the start date, skip | |
if date_obj < start_date: | |
print (f"Skipping date {date} as it is before the start date {start_date}") | |
continue | |
#If date is before the earliest date to fetch, skip | |
if date_obj < earliest_date_to_fetch: | |
print (f"Skipping date {date} as it is before the earliest date to fetch {earliest_date_to_fetch}") | |
continue | |
cell = worksheet.find(date, in_column=1) | |
if cell: | |
cell_row = cell.row | |
cell_col = cell.col | |
#remove the Date column from the row | |
row = row.drop('Date') | |
worksheet.update(f'A{cell_row}', [[date]+list(row)]) | |
else: | |
#remove the Date column from the row | |
row = row.drop('Date') | |
worksheet.append_row([date]+list(row)) | |
end_time = datetime.datetime.now() | |
duration = end_time - start_date_time | |
print(f"INSTALLS UPDATE: Finished at {end_time} and took {duration} to execute.") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[tool.poetry] | |
name = "google_play_install_downloader" | |
version = "0.1.0" | |
description = "A simple project to automatically download Google Play install metrics and publish to a Google Sheet" | |
authors = ["Your Name <[email protected]>"] | |
license = "MIT" | |
[tool.poetry.dependencies] | |
python = "^3.10" | |
sshtunnel = "^0.4.0" | |
psycopg2-binary = "^2.9.3" | |
paramiko = "^3.4.0" | |
gspread = "^6.1.2" | |
google-auth = "^2.32.0" | |
google-cloud = "^0.34.0" | |
google-cloud-storage = "^2.18.0" | |
pandas = "^2.2.2" | |
google-cloud-pubsub = "^2.23.0" | |
google-api-python-client = "^2.139.0" | |
[build-system] | |
requires = ["poetry-core>=1.0.0"] | |
build-backend = "poetry.core.masonry.api" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Print the current working directory | |
echo "Current working directory at the start of run.sh: $(pwd)" | |
# Source the environment variables file to make them available to the cron job | |
# Enable automatic export of all variables | |
set -a | |
# Source the environment variables from the .env file | |
source /app/data/.env | |
# Disable automatic export of variables | |
set +a | |
# Run the python script which will download the install data from Google Play | |
/usr/local/bin/python3 /app/download_installs.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment