Skip to content

Instantly share code, notes, and snippets.

@ericness
ericness / .py
Created January 6, 2018 17:18
Transform CSV file into JSON string
def transform_to_json(row):
passenger_dict['Survived'] = row['Survived']
ticket_dict['Pclass'] = row['Pclass']
ticket_dict['Fare'] = row['Fare']
passenger_dict['Ticket'] = ticket_dict
return json.dump(passenger_dict)
@ericness
ericness / convert_dataframe_to_dataset.py
Last active August 26, 2022 20:25
Convert pandas DataFrame into TensorFlow Dataset
import numpy as np
import pandas as pd
import tensorflow as tf
tf.enable_eager_execution()
training_df: pd.DataFrame = pd.DataFrame(
data={
'feature1': np.random.rand(10),
'feature2': np.random.rand(10),
@ericness
ericness / convert_dataframe_to_dataset_output.txt
Created August 1, 2018 23:06
Converting pandas DataFrame into TensorFlow Dataset Output
feature1 feature2 feature3 target
0 0.474587 0.036684 0.625155 1
1 0.157049 0.430315 0.659612 2
2 0.525072 0.106430 0.524478 2
3 0.012953 0.287160 0.742407 1
4 0.613194 0.767960 0.967474 2
5 0.199329 0.304863 0.677769 0
6 0.956099 0.330080 0.928238 0
7 0.659677 0.920559 0.894692 1
8 0.956639 0.781366 0.224493 0
@ericness
ericness / basic_multiindex.py
Created August 17, 2018 21:36
Displays the results of running from_product on two short lists
import pandas as pd
basic_index = pd.MultiIndex.from_product([[1, 2, 3], ['a', 'b', 'c']])
print(basic_index.values)
@ericness
ericness / create_order_data.py
Created August 17, 2018 21:47
Create a sample of order data
from datetime import datetime
import pandas as pd
orders = pd.DataFrame(
data={
'customer': [1, 2, 3, 2, 3, 1, 1],
'order_date': [
datetime(2018, 1, 3),
datetime(2018, 1, 5),
datetime(2018, 1, 7),
@ericness
ericness / group_order_data.py
Created August 19, 2018 22:37
Sum order amounts by customer and order date.
daily_orders = orders.groupby(['customer', 'order_date']).sum()
print(daily_orders)
@ericness
ericness / build_customer_date_multi_index.py
Created August 19, 2018 22:45
Builds a MultiIndex of customer and date combinations
unique_customers = daily_orders.index.unique(level='customer')
date_range = pd.DatetimeIndex(
start=datetime(2018, 1, 1),
end=datetime(2018, 1, 10),
freq='D'
)
customer_date_index = (
pd.MultiIndex
@ericness
ericness / fill_in_data_gaps.py
Created August 22, 2018 21:55
Reindex the DataFrame so that missing dates are filled in will zero values.
daily_orders = daily_orders.reindex(customer_date_index, fill_value=0)
daily_orders['running_amount'] = (
daily_orders
.reindex()
.groupby('customer')
.cumsum()
)
print(daily_orders)
@ericness
ericness / MockBinaryClassifier.py
Last active September 17, 2018 22:24
Class that emulates a scikit-learn estimator.
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
class MockBinaryClassifier(BaseEstimator):
"""Class to emulate a predictive model using a simple heuristic."""
@ericness
ericness / MockBinaryClassifier_test1.py
Created September 17, 2018 22:07
Checks that a MockBinaryClassifier can run predict.
model = MockBinaryClassifier()
test_feature = np.array([[0], [0.5], [3], [-1]])
predictions = model.predict(test_feature)
print(predictions)