Skip to content

Instantly share code, notes, and snippets.

View gabriel19913's full-sized avatar
🏠
Working from home

Gabriel Aparecido Fonseca gabriel19913

🏠
Working from home
View GitHub Profile
#!/bin/bash
# Adicione um novo remote; pode chamá-lo de "upstream":
git remote add upstream https://github.com/usuario/projeto.git
# Obtenha todos os branches deste novo remote,
# como o upstream/master por exemplo:
git fetch upstream
@gabriel19913
gabriel19913 / alacritty.yml
Created July 3, 2020 11:44
Alacritty file configuration to work with PowerLine fonts
# Configuration for Alacritty, the GPU enhanced terminal emulator.
# Any items in the `env` entry below will be added as
# environment variables. Some entries may override variables
# set by alacritty itself.
env:
# TERM variable
#
# This value is used to set the `$TERM` environment variable for
# each instance of Alacritty. If it is not present, alacritty will
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
from urllib.request import urlretrieve
base_url = 'https://link.springer.com/'
books_path = '/content/drive/My Drive/Springer Books/'
books = pd.read_csv(books_path + 'Springer Free Books - Data, Stats, Math & Tech.csv')
def simulate_final_points(final_prediction, std_error, current_points, max_possible_poins):
points_simulated = np.random.normal(final_prediction, std_error)
if points_simulated >= max_possible_points:
points_simulated = max_possible_points
if points_simulated <= current_points:
points_simulated = current_points
return points_simulated
y_pred_bra_2019 = xgb_reg.predict(pca_X_bra_2019)
y_pred_bra_2019 = pd.DataFrame(np.round(y_pred_bra_2019,0))
pred_table = pd.concat([teams_df.reset_index(), y_pred_bra_2019.reset_index()], axis=1).drop(['index'], axis=1).sort_values(0, ascending=False).reset_index()
pred_table['index'] = np.arange(1,21)
pred_table.columns = ['pos', 'team', 'points']
teams = df_bra_2019['team']
teams_df = pd.DataFrame(teams)
X_bra_2019 = df_bra_2019.drop(['team'], axis=1)
scaled_X_bra_2019 = scaler.fit_transform(X_bra_2019)
pca_X_bra_2019 = pca.transform(scaled_X_bra_2019)
error_model= xgb.XGBRegressor()
model_errors = np.square(np.subtract(y_pred, y_test.reshape(1, -1)[0]))
parameters = {'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.5, 0.9],
'n_estimators': [100, 200, 300, 400, 500],
'max_depth': [3,4,6],
'min_child_weight': [1, 2, 3]
}
grid_xgb = RandomizedSearchCV(error_model, parameters, cv=5, n_jobs=-1)
grid_xgb = grid_xgb.fit(X_test, model_errors)
error_model = grid_xgb.best_estimator_
import xgboost as xgb
xgb_reg = xgb.XGBRegressor()
parameters = {'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3, 0.5, 0.9],
'n_estimators': [100, 200, 300, 400, 500],
'max_depth': [3,4,6],
'min_child_weight': [1, 2, 3]
}
grid_xgb = RandomizedSearchCV(xgb_reg, parameters, cv=5, n_jobs=-1)
grid_xgb = grid_xgb.fit(X_train, y_train)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
from sklearn.decomposition import PCA
pca = PCA(.99, random_state = seed)
pca.fit(scaled_X)
data = pca.transform(scaled_X)
print(f'Percentual de variância explicada por cada componente: {np.round(pca.explained_variance_ratio_*100,3)}')
print(f'Percentual total de variância explicada pelas componente: {np.round(pca.explained_variance_ratio_*100,3).sum():.3f}%')