Skip to content

Instantly share code, notes, and snippets.

@awmatheson
Created November 30, 2018 02:28
Show Gist options
  • Save awmatheson/f9fd5394e7addf962aabd29ff68b5035 to your computer and use it in GitHub Desktop.
Save awmatheson/f9fd5394e7addf962aabd29ff68b5035 to your computer and use it in GitHub Desktop.
template for model.py file for model deployment
class Model():
"""This defines the necessary attributes for the model object
"""
def __init__(self):
# specify your data for fitting here
self.X_fit = []
self.Y_fit = []
self.model_store = "really.awesome.fast.model.store"
self.feature_store = "really.awesome.fast.feature.store"
self.pre_processing = True
self.FEATURE_COLUMNS = ['']
def fit(self):
"""Fit the model based on input data
Args:
X (list of dict): list of dictionaries containing training data information
y (list of int): list of relevance scores for each input feature vector
"""
print("fitting model ...")
print("model fit success")
def get_features(self,*args):
"""Extract the required features and manipulate as necessary
Best practice is to move the manipulation to a preprocessing job done in the ETL tool.
Use this to manipulate as a last resort
Args:
X (list of dict): list of dictionaries containing training data information
y (list of int): list of relevance scores for each input feature vector
"""
print("extracting features ... from {}".format(self.feature_store))
print("feature extraction success")
return({})
def pre_process(self, payload, features=None):
"""tertiary processing prior to predictions
Args:
payload from request
Return:
prediction payload
"""
processed = payload
return processed
def predict(self, X):
"""Predict relevance score for given search results
Args:
X (list of dict): feature vectors for results in a search session
Return:
list of float: relevance score for given content
"""
# Just use heuristic for now
return X
def save(self):
"""Save trained model to HDFS
This method will be run after a full training build on the remote instance to save the
completed model to a place where it can be easily retrieved
Args:
model_store (str): Directory in HDFS within which the model should be saved
"""
print("saving model to {}...".format(self.model_store))
print("model save success")
def load(self):
"""Load trained model from HDFS
This method will be run upon each worker being spun up to retrieve the latest trained model
Args:
model_store (str): Directory in HDFS within which the model should be saved
"""
print("loading model from {}...".format(self.model_store))
print("model load success")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment