Code for the Medium post Link
Last active
March 22, 2022 12:53
-
-
Save stephenleo/848b83a9ac2cfc38e25acbe3d34543e1 to your computer and use it in GitHub Desktop.
[Medium] Make Money on the side with Data Science!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import os | |
import re | |
from typing import List | |
from tensorflow.keras.models import load_model | |
import uvicorn | |
from fastapi import FastAPI | |
from fastapi.logger import logger | |
from preprocess import preprocess | |
# Load the model | |
model_path = os.path.join(os.path.dirname(__file__), "models/boyorgirl.h5") | |
pred_model = load_model(model_path) | |
# Instantiate the app | |
app = FastAPI() | |
# Predict endpoint | |
@app.post("/predict") | |
def predict(names: List[str]): | |
# Step 1: Input is a list of names | |
logger.info(names) | |
# Step 2: Split on all non-alphabet characters | |
split_names = [re.findall(r"\w+", name) for name in names] | |
names = [item for sublist in split_names for item in sublist] | |
# Step 3: Keep only first 10 names | |
names = names[:10] | |
# Convert to dataframe | |
pred_df = pd.DataFrame({"name": names}) | |
# Step 4: Preprocess the names | |
pred_df = preprocess(pred_df) | |
# Step 5: Run predictions | |
result = pred_model.predict(np.asarray(pred_df["name"].values.tolist())).squeeze( | |
axis=1 | |
) | |
# Step 6: Convert the probabilities to predictions | |
pred_df["boy_or_girl"] = ["boy" if logit > 0.5 else "girl" for logit in result] | |
pred_df["probability"] = [logit if logit > 0.5 else 1.0 - logit for logit in result] | |
# Step 7: Format the output | |
pred_df["name"] = names | |
pred_df["probability"] = pred_df["probability"].round(2) | |
pred_df.drop_duplicates(inplace=True) | |
return {"response": pred_df.to_dict(orient="records")} | |
if __name__ == "__main__": | |
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def preprocess(names_df): | |
# Step 1: Lowercase | |
names_df["name"] = names_df["name"].str.lower() | |
# Step 2: Split individual characters | |
names_df["name"] = [list(name) for name in names_df["name"]] | |
# Step 3: Pad names with spaces to make all names same length | |
name_length = 50 | |
names_df["name"] = [ | |
(name + [" "] * name_length)[:name_length] for name in names_df["name"] | |
] | |
# Step 4: Encode Characters to Numbers | |
names_df["name"] = [ | |
[max(0.0, ord(char) - 96.0) for char in name] for name in names_df["name"] | |
] | |
return names_df |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use the official lightweight Python image. | |
# https://hub.docker.com/_/python | |
FROM python:3.8-slim | |
# Allow statements and log messages to immediately appear in the Knative logs | |
ENV PYTHONUNBUFFERED True | |
# Copy local code to the container image. | |
ENV APP_HOME /app | |
WORKDIR $APP_HOME | |
COPY . ./ | |
# Install production dependencies. | |
RUN pip install --no-cache-dir -r requirements.txt | |
# Run the web service on container startup. Here we use the gunicorn | |
# webserver, with one worker process and 8 threads. | |
# For environments with multiple CPU cores, increase the number of workers | |
# to be equal to the cores available. | |
# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling. | |
CMD exec gunicorn -k uvicorn.workers.UvicornWorker --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker build -t boy_or_girl:latest . | |
docker run -p 8080:8080 -e PORT=8080 --rm boy_or_girl:latest |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pprint | |
url = 'http://127.0.0.1:8080/predict' | |
payload = ['Joe Biden', 'Kamala Harris'] | |
response = requests.post(url, json=payload) | |
pprint.pprint(response.json()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gcloud run deploy boyorgirl --source . --region us-west1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment