This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*- coding: utf-8 -*- | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import html | |
import requests | |
from time import sleep | |
import json | |
import argparse | |
from collections import OrderedDict | |
from time import sleep | |
def parse(ticker): | |
url = "http://finance.yahoo.com/quote/%s?p=%s"%(ticker,ticker) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LinearRegression, Ridge | |
from sklearn.model_selection import train_test_split | |
X = df_1.drop(['revenue', 'above_ave_rev_yr', 'original_language', 'original_title', 'overview', 'release_date', 'status', 'tagline', 'title'], axis=1) | |
y = df_1['revenue'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
lm_model = LinearRegression(normalize=True) | |
r_model = Ridge(normalize=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LinearRegression, Ridge | |
from sklearn.model_selection import train_test_split | |
X = df_final.drop(['revenue', 'release_date'], axis=1) | |
y = df_final['revenue'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
lm_model = LinearRegression(normalize=True) | |
r_model = Ridge(normalize=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.impute import KNNImputer | |
imputer = KNNImputer(n_neighbors=int(round(np.sqrt(df_1.shape[0])))) | |
df_1.reset_index(inplace = True, drop = True) | |
impute_df = df_1.drop(columns='release_date') | |
impute_df_filled = imputer.fit_transform(X=impute_df) | |
imputed_cols = list(df_1.drop(columns='release_date').columns) | |
df_impute = pd.DataFrame(data=impute_df_filled, columns=imputed_cols) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import MinMaxScaler | |
scaler = MinMaxScaler(feature_range=(0, 1)) | |
numerical = ['orig_title_len', 'overview_len', 'tagline_len', 'title_len', | |
'popularity', 'runtime', 'vote_average', 'vote_count'] | |
df_impute_log_minmax = pd.DataFrame(data = df_1_impute_fill).copy() | |
df_impute_log_minmax[numerical] = scaler.fit_transform(df_1_impute_fill[numerical]) | |
# Preserve final df transformation: | |
df_impute_transformed = df_impute_log_minmax |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV | |
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.metrics import fbeta_score, accuracy_score | |
models = {'log_model': LogisticRegression(random_state=0), | |
'log_cv_model': LogisticRegressionCV(), | |
'ab_model':AdaBoostClassifier(random_state=0), | |
'rf_model': RandomForestClassifier(random_state=0), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LinearRegression, Ridge | |
from sklearn.model_selection import train_test_split | |
X = df_final.drop(['vote_average', 'release_date'], axis=1) | |
y = df_final['vote_average'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
lm_model = LinearRegression(normalize=True) | |
r_model = Ridge(normalize=True) |