Emre Can emredjan

Long time computer enthusiast, MBA Big Data & Business Analytics, Data specialist / engineer / scientist, Finance professional

emredjan / cactivate.ps1

Last active November 22, 2018 13:51

Activate conda environment in PowerShell

	Param
	(
	[parameter(Position=0, Mandatory=$true)]
	[String]
	$env_name
	)

	# simulate activation of Conda environment
	$cenv = "C:\Path\to\anaconda\installation\envs\$env_name"
	$Env:CONDA_PREFIX = $cenv

emredjan / json_to_csv.py

Last active October 21, 2022 01:46

Yelp Dataset Challenge JSON to CSV conversion

	'''
	Load Yelp JSON files and spit out CSV files
	Does not try to reinvent the wheel and uses pandas json_normalize
	Kinda hacky and requires a bit of RAM. But works, albeit naively.

	Tested with Yelp JSON files in dataset challenge round 12:
	https://www.yelp.com/dataset/challenge
	'''

	import json

emredjan / emulate_r_plot_9.py

Created April 23, 2018 17:21

	plot_lm_4 = plt.figure(4)
	plot_lm_4.set_figheight(8)
	plot_lm_4.set_figwidth(12)

	plt.scatter(model_leverage, model_norm_residuals, alpha=0.5)
	sns.regplot(model_leverage, model_norm_residuals,
	scatter=False,
	ci=False,
	lowess=True,
	line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})

emredjan / emulate_r_plot_8.py

Created April 23, 2018 17:20

	plot_lm_3 = plt.figure(3)
	plot_lm_3.set_figheight(8)
	plot_lm_3.set_figwidth(12)

	plt.scatter(model_fitted_y, model_norm_residuals_abs_sqrt, alpha=0.5)
	sns.regplot(model_fitted_y, model_norm_residuals_abs_sqrt,
	scatter=False,
	ci=False,
	lowess=True,
	line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})

emredjan / emulate_r_plot_7.py

Created April 23, 2018 17:19

	QQ = ProbPlot(model_norm_residuals)
	plot_lm_2 = QQ.qqplot(line='45', alpha=0.5, color='#4C72B0', lw=1)

	plot_lm_2.set_figheight(8)
	plot_lm_2.set_figwidth(12)

	plot_lm_2.axes[0].set_title('Normal Q-Q')
	plot_lm_2.axes[0].set_xlabel('Theoretical Quantiles')
	plot_lm_2.axes[0].set_ylabel('Standardized Residuals');

emredjan / emulate_r_plot_6.py

Created April 23, 2018 17:19

	plot_lm_1 = plt.figure(1)
	plot_lm_1.set_figheight(8)
	plot_lm_1.set_figwidth(12)

	plot_lm_1.axes[0] = sns.residplot(model_fitted_y, 'mpg', data=auto,
	lowess=True,
	scatter_kws={'alpha': 0.5},
	line_kws={'color': 'red', 'lw': 1, 'alpha': 0.8})

	plot_lm_1.axes[0].set_title('Residuals vs Fitted')

emredjan / emulate_r_plot_5.py

Created April 23, 2018 17:18

	# fitted values (need a constant term for intercept)
	model_fitted_y = model_fit.fittedvalues

	# model residuals
	model_residuals = model_fit.resid

	# normalized residuals
	model_norm_residuals = model_fit.get_influence().resid_studentized_internal

	# absolute squared normalized residuals

emredjan / emulate_r_plot_4.py

Created April 23, 2018 17:17

	model_f = 'mpg ~ cylinders + \
	displacement + \
	horsepower + \
	weight + \
	acceleration + \
	year + \
	origin'

	model = smf.ols(formula=model_f, data=auto)
	model_fit = model.fit()

emredjan / emulate_r_plot_3.py

Created April 23, 2018 17:16

	auto = pd.read_csv('Auto.csv', na_values=['?'])
	auto.dropna(inplace=True)
	auto.reset_index(drop=True, inplace=True)

emredjan / emulate_r_plot_2.py

Created April 23, 2018 17:15

	%matplotlib inline

	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import statsmodels.formula.api as smf

	from statsmodels.graphics.gofplots import ProbPlot