This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standardsql | |
WITH bicycle_rentals AS ( | |
SELECT | |
COUNT(starttime) as num_trips, | |
EXTRACT(DATE from starttime) as trip_date, | |
MAX(EXTRACT(DAYOFWEEK from starttime)) as day_of_week, | |
start_station_id | |
FROM `bigquery-public-data.new_york.citibike_trips` | |
GROUP BY trip_date, start_station_id | |
), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standardsql | |
with zipcodes as ( | |
SELECT | |
zip_census.zipcode as zipcode, | |
population, | |
WKT as geometry, | |
ST_CENTROID(ST_GeogFromText(WKT)) as centroid | |
FROM | |
`bigquery-public-data.census_bureau_usa.population_by_zip_2010` AS zip_census | |
join `bigquery-public-data-staging.zcta_test.2017` as zip_geom |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
EXPORTDIR=./model_trained/export/exporter/ | |
MODELDIR=$(ls $EXPORTDIR | tail -1) | |
gcloud ml-engine local predict --model-dir=${EXPORTDIR}/${MODELDIR} --json-instances=./test.json |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def model_fn(features, labels, mode): | |
# linear model | |
station_col = tf.feature_column.categorical_column_with_hash_bucket('start_station_id', 5000, tf.int32) | |
station_embed = tf.feature_column.embedding_column(station_col, 2) # embed dimension | |
embed_layer = tf.feature_column.input_layer(features, station_embed) | |
cat_cols = [ | |
tf.feature_column.categorical_column_with_identity('day_of_week', num_buckets = 8), | |
tf.feature_column.categorical_column_with_vocabulary_list('rainy', ['false', 'true']) | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
my_head = tf.contrib.estimator.regression_head() | |
spec = my_head.create_estimator_spec( | |
features = features, mode = mode, | |
labels = labels, logits = predictions, | |
optimizer = tf.train.FtrlOptimizer(learning_rate = 0.1) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 3. Create predictions | |
predictions_dict = { | |
"predicted": predictions, | |
"station_embed": embed_layer | |
} | |
# 4. Create export outputs | |
export_outputs = { | |
"predict_export_outputs": tf.estimator.export.PredictOutput(outputs = predictions_dict) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 5. Return EstimatorSpec | |
return spec._replace(predictions = predictions_dict, | |
export_outputs = export_outputs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
station_embed = tf.feature_column.embedding_column( | |
tf.feature_column.categorical_column_with_hash_bucket('start_station_id', 5000, tf.int32), 2) | |
feature_cols = [ | |
tf.feature_column.categorical_column_with_identity('day_of_week', num_buckets = 8), | |
station_embed, | |
tf.feature_column.categorical_column_with_vocabulary_list('rainy', ['false', 'true']) | |
] | |
estimator = tf.estimator.LinearRegressor( | |
model_dir = output_dir, | |
feature_columns = feature_cols) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def train_and_evaluate(output_dir, nsteps): | |
estimator = tf.estimator.Estimator( | |
model_fn = model_fn, | |
model_dir = output_dir) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compute_prediction(rowdict, numeric_weights, scaling_df, categorical_weights): | |
input_values = rowdict | |
# numeric inputs | |
pred = 0 | |
for column_name in numeric_weights['input'].unique(): | |
wt = numeric_weights[ numeric_weights['input'] == column_name ]['input_weight'].values[0] | |
if column_name != '__INTERCEPT__': | |
meanv = scaling_df[ scaling_df['input'] == column_name ]['mean'].values[0] | |
stddev = scaling_df[ scaling_df['input'] == column_name ]['stddev'].values[0] | |
scaled_value = (input_values[column_name] - meanv)/stddev |