Created
August 1, 2021 17:21
-
-
Save zackster/5eb062f1fd2ef3ca845ee65769f5e980 to your computer and use it in GitHub Desktop.
linear regression to predict solpunk price based on sales data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pandas as pd | |
from sklearn import linear_model | |
from sklearn.model_selection import train_test_split | |
def list_traits(a): | |
return list(map(lambda x: " ".join(x.split(' ')[0:-1]), a.split(','))) | |
# Load list of all punks | |
with open('solpunks.json', 'r') as pr: | |
data = pr.read() | |
punk_json = json.loads(data) | |
punks = {} | |
all_traits = set() | |
for punk in punk_json['punks']: | |
# Skip SatoshiStreetBets | |
if 'id' not in punk: | |
continue | |
if punk['attributes'] == 'NULL': | |
punk_traits = ['NoTraits'] | |
else: | |
punk_traits = list_traits(punk['attributes']) | |
punks[punk['id']] = { | |
"ranking": punk["ranking"], | |
"skin": punk["skin"], | |
"type": punk["type"], | |
"traits": punk_traits | |
} | |
for trait in punk_traits: | |
all_traits.add(trait) | |
for punk_id,punk in punks.items(): | |
my_traits = punk['traits'] | |
del punk['traits'] | |
for trait in all_traits: | |
punk[trait] = False | |
for trait in my_traits: | |
punk[trait] = True | |
# Load pricing data | |
with open('all_solpunks_sold.json') as sales_data: | |
sales = json.loads(sales_data.read()) | |
sales.reverse() | |
for sale in sales: | |
punk_id = sale['name'] | |
# Skip SatoshiStreetBets | |
try: | |
punk_id = punk_id.split('#')[1].zfill(4) | |
except: | |
continue | |
punks[punk_id]['price'] = sale['price'] | |
df = pd.DataFrame(punks.values()) | |
non_dummy_cols = ['ranking', 'price'] | |
X = df | |
dummy_cols = list(set(X.columns) - set(non_dummy_cols)) | |
all_X = pd.get_dummies(X, columns=dummy_cols, drop_first=True) | |
X = all_X[all_X.price.notnull()] | |
Y = X['price'] | |
# We don't want to train on price, we want to predict it | |
del X['price'] | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = .20, random_state = 40) | |
regr = linear_model.LinearRegression() | |
regr.fit(X_train, Y_train) | |
predicted = regr.predict(X_test) | |
import pdb | |
pdb.set_trace() | |
# To get prediction for punk id #9203, use | |
# regr.predict(all_X)[9203] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment