stephenleo · March 22, 2022 12:53
diff --git a/00_make_money_on_the_side_with_data_science.md b/00_make_money_on_the_side_with_data_science.md
diff --git a/01_main.py b/01_main.py
 import pandas as pd
 import numpy as np
 import os
 import re
 from typing import List
 from tensorflow.keras.models import load_model

 import uvicorn
 from fastapi import FastAPI
 from fastapi.logger import logger

 from preprocess import preprocess

 # Load the model
 model_path = os.path.join(os.path.dirname(__file__), "models/boyorgirl.h5")
 pred_model = load_model(model_path)

 # Instantiate the app
 app = FastAPI()

 # Predict endpoint
 @app.post("/predict")
 def predict(names: List[str]):
    # Step 1: Input is a list of names
    logger.info(names)

    # Step 2: Split on all non-alphabet characters
    split_names = [re.findall(r"\w+", name) for name in names]
    names = [item for sublist in split_names for item in sublist]

    # Step 3: Keep only first 10 names
    names = names[:10]

    # Convert to dataframe
    pred_df = pd.DataFrame({"name": names})

    # Step 4: Preprocess the names
    pred_df = preprocess(pred_df)

    # Step 5: Run predictions
    result = pred_model.predict(np.asarray(pred_df["name"].values.tolist())).squeeze(
        axis=1
    )

    # Step 6: Convert the probabilities to predictions
    pred_df["boy_or_girl"] = ["boy" if logit > 0.5 else "girl" for logit in result]
    pred_df["probability"] = [logit if logit > 0.5 else 1.0 - logit for logit in result]

    # Step 7: Format the output
    pred_df["name"] = names
    pred_df["probability"] = pred_df["probability"].round(2)
    pred_df.drop_duplicates(inplace=True)

    return {"response": pred_df.to_dict(orient="records")}


 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
diff --git a/02_preprocess.py b/02_preprocess.py
 def preprocess(names_df):
    # Step 1: Lowercase
    names_df["name"] = names_df["name"].str.lower()

    # Step 2: Split individual characters
    names_df["name"] = [list(name) for name in names_df["name"]]

    # Step 3: Pad names with spaces to make all names same length
    name_length = 50
    names_df["name"] = [
        (name + [" "] * name_length)[:name_length] for name in names_df["name"]
    ]

    # Step 4: Encode Characters to Numbers
    names_df["name"] = [
        [max(0.0, ord(char) - 96.0) for char in name] for name in names_df["name"]
    ]

    return names_df
diff --git a/03_Dockerfile b/03_Dockerfile
 # Use the official lightweight Python image.
 # https://hub.docker.com/_/python
 FROM python:3.8-slim

 # Allow statements and log messages to immediately appear in the Knative logs
 ENV PYTHONUNBUFFERED True

 # Copy local code to the container image.
 ENV APP_HOME /app
 WORKDIR $APP_HOME
 COPY . ./

 # Install production dependencies.
 RUN pip install --no-cache-dir -r requirements.txt

 # Run the web service on container startup. Here we use the gunicorn
 # webserver, with one worker process and 8 threads.
 # For environments with multiple CPU cores, increase the number of workers
 # to be equal to the cores available.
 # Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
 CMD exec gunicorn -k uvicorn.workers.UvicornWorker --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
diff --git a/04_test_docker_image.sh b/04_test_docker_image.sh
 docker build -t boy_or_girl:latest .
 docker run -p 8080:8080 -e PORT=8080 --rm boy_or_girl:latest
diff --git a/05_test_docker.py b/05_test_docker.py
 import requests
 import pprint

 url = 'http://127.0.0.1:8080/predict'

 payload = ['Joe Biden', 'Kamala Harris']

 response = requests.post(url, json=payload)
 pprint.pprint(response.json())
diff --git a/06_gcp_cloud_run_deploy.sh b/06_gcp_cloud_run_deploy.sh
 gcloud run deploy boyorgirl --source . --region us-west1
	import pandas as pd
	import numpy as np
	import os
	import re
	from typing import List
	from tensorflow.keras.models import load_model

	import uvicorn
	from fastapi import FastAPI
	from fastapi.logger import logger

	from preprocess import preprocess

	# Load the model
	model_path = os.path.join(os.path.dirname(__file__), "models/boyorgirl.h5")
	pred_model = load_model(model_path)

	# Instantiate the app
	app = FastAPI()

	# Predict endpoint
	@app.post("/predict")
	def predict(names: List[str]):
	# Step 1: Input is a list of names
	logger.info(names)

	# Step 2: Split on all non-alphabet characters
	split_names = [re.findall(r"\w+", name) for name in names]
	names = [item for sublist in split_names for item in sublist]

	# Step 3: Keep only first 10 names
	names = names[:10]

	# Convert to dataframe
	pred_df = pd.DataFrame({"name": names})

	# Step 4: Preprocess the names
	pred_df = preprocess(pred_df)

	# Step 5: Run predictions
	result = pred_model.predict(np.asarray(pred_df["name"].values.tolist())).squeeze(
	axis=1
	)

	# Step 6: Convert the probabilities to predictions
	pred_df["boy_or_girl"] = ["boy" if logit > 0.5 else "girl" for logit in result]
	pred_df["probability"] = [logit if logit > 0.5 else 1.0 - logit for logit in result]

	# Step 7: Format the output
	pred_df["name"] = names
	pred_df["probability"] = pred_df["probability"].round(2)
	pred_df.drop_duplicates(inplace=True)

	return {"response": pred_df.to_dict(orient="records")}


	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
	def preprocess(names_df):
	# Step 1: Lowercase
	names_df["name"] = names_df["name"].str.lower()

	# Step 2: Split individual characters
	names_df["name"] = [list(name) for name in names_df["name"]]

	# Step 3: Pad names with spaces to make all names same length
	name_length = 50
	names_df["name"] = [
	(name + [" "] * name_length)[:name_length] for name in names_df["name"]
	]

	# Step 4: Encode Characters to Numbers
	names_df["name"] = [
	[max(0.0, ord(char) - 96.0) for char in name] for name in names_df["name"]
	]

	return names_df
	# Use the official lightweight Python image.
	# https://hub.docker.com/_/python
	FROM python:3.8-slim

	# Allow statements and log messages to immediately appear in the Knative logs
	ENV PYTHONUNBUFFERED True

	# Copy local code to the container image.
	ENV APP_HOME /app
	WORKDIR $APP_HOME
	COPY . ./

	# Install production dependencies.
	RUN pip install --no-cache-dir -r requirements.txt

	# Run the web service on container startup. Here we use the gunicorn
	# webserver, with one worker process and 8 threads.
	# For environments with multiple CPU cores, increase the number of workers
	# to be equal to the cores available.
	# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
	CMD exec gunicorn -k uvicorn.workers.UvicornWorker --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
	docker build -t boy_or_girl:latest .
	docker run -p 8080:8080 -e PORT=8080 --rm boy_or_girl:latest
	import requests
	import pprint

	url = 'http://127.0.0.1:8080/predict'

	payload = ['Joe Biden', 'Kamala Harris']

	response = requests.post(url, json=payload)
	pprint.pprint(response.json())