joaopcnogueira’s gists

joaopcnogueira / subfloat.tex

Last active June 28, 2018 15:50 — forked from ashwin/subfloat.tex

Sub-figures using subfloat in LaTeX

	% Package for subfloat
	\usepackage{subfig}

	% Figure with two sub-figures
	\begin{figure}
	\centering
	\subfloat[caption of the figure goes here]
	{
	\includegraphics[scale=.5]{fig-1.pdf}
	\label{fig:foo-1}

joaopcnogueira / latlon2km.py

Created August 16, 2018 15:32

Function to calculate the euclidian distance between to points given by their latlon coordinates

	import numpy as np
	def latlon2km(latlon1,latlon2):
	# approximate radius of earth in km
	R = 6373.0

	lat1 = np.radians(latlon1[0])
	lon1 = np.radians(latlon1[1])
	lat2 = np.radians(latlon2[0])
	lon2 = np.radians(latlon2[1])

joaopcnogueira / backward_elimination.py

Last active December 18, 2023 14:58

Feature selection by Backward Elimination using p-value

	import numpy as np
	import statsmodels.formula.api as sm
	def backward_elimination(X, y, sl):
	"""
	X: the data matrix with the independent variables (predictors)
	y: the matrix of the dependent variable (target)
	sl: statistical level, by default the user should add 0.05 (5%)
	"""
	X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
	while(True):

joaopcnogueira / backward_elimination2.py

Last active October 9, 2023 12:26

Feature selection by Backward Elimination using both the p-value and the adjusted r-squared

	import numpy as np
	import statsmodels.formula.api as sm
	def backward_elimination2(X, y, sl):
	"""
	X: the data matrix with the independent variables (predictors)
	y: the matrix of the dependent variable (target)
	sl: statistical level, by default the user should add 0.05 (5%)
	"""
	X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)

joaopcnogueira / cpf2number.py

Last active June 12, 2019 11:59

CPF string to number

	import re

	my_string = "012.345.678-09"
	digits_pattern = r"\d+"

	digits = re.findall(digits_pattern, my_string)
	number = int(''.join(digits))
	print(number)

	# output: 1234567809

joaopcnogueira / custom_groupby_functions.py

Last active July 13, 2019 14:42

Custom groupby function

	"""
	Defining a custom function to be applied in pandas groupby
	"""

	import numpy as np
	import pandas as pd

	clients = ['joao', 'joao', 'joao', 'lucas', 'lucas', 'julia', 'julia', 'julia', 'julia']
	products = ['smartphone', 'notebook', 'book', 'ball', 'car', 'hat', 'bike', 'mouse', 'pen']

joaopcnogueira / custom_groupby_functions.R

Last active July 19, 2019 17:01

Custom group_by function in R

	library(tidyverse)

	# toy dataset
	df <- tibble(
	clientes = c('joao', 'joao', 'joao', 'lucas', 'lucas', 'julia', 'julia', 'julia', 'julia'),
	produtos = c('celular', 'notebook', 'livro', 'bola', 'carro', 'chapéu', 'moto', 'moto', 'caneta')
	)

	# função customizada
	get_produtos <- function(produtos){

joaopcnogueira / groupkfold_example.py

Last active July 11, 2019 13:53

Simple Example using GroupKFold with Cross-Validate

	import numpy as np
	import pandas as pd
	from sklearn import datasets
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.metrics import accuracy_score
	from sklearn.model_selection import GroupKFold

	# Loading the data
	iris = datasets.load_iris()
	design_matrix = np.concatenate((iris['data'], iris['target'].reshape(150,1)), axis=1)

joaopcnogueira / titanic-pipeline.py

Last active July 11, 2019 14:08

Refactored titanic code with pipelines

	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.impute import SimpleImputer
	from category_encoders import OneHotEncoder

	# lendo o dataset
	df = pd.read_csv("train.csv")

joaopcnogueira / titanic-pipeline2.py

Last active July 11, 2019 14:55

K-fold cross-validation with pipeline

	import pandas as pd
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.impute import SimpleImputer
	from category_encoders import OneHotEncoder
	from sklearn.model_selection import KFold
	from sklearn.model_selection import cross_validate

	# lendo o dataset

João Paulo Nogueira joaopcnogueira