Akram Zaytar Akramz

🎯

Focusing

Geospatial ML @microsoft AI for Good. Previously @IBMResearch. Interested in data science, machine learning, and Computer Vision.

Akramz / rain-vs-nonrain.py

Last active October 25, 2015 00:55

math analysis of the null hypothesis

	import numpy as np
	from ggplot import *
	import scipy
	import scipy.stats
	import pandas

	df = pandas.read_csv('new.csv')
	with_rain_mean = np.mean(df['ENTRIESn_hourly'][df['rain'] == 1])
	without_rain_mean = np.mean(df['ENTRIESn_hourly'][df['rain'] == 0])
	U, p = scipy.stats.mannwhitneyu(df['ENTRIESn_hourly'][df['rain'] == 1], df['ENTRIESn_hourly'][df['rain'] == 0])

Akramz / distro_visualisation.py

Created October 23, 2015 15:33

To Check distributions of different vars

	import numpy as np
	from ggplot import *
	import scipy
	import scipy.stats
	import pandas
	from data_model import turnstileData

	one = turnstileData(filePath='improved_data_set/old.csv')
	justEntriesH = one.select(['ENTRIESn_hourly'])
	justExistsH = one.select(['EXITSn_hourly'])

Akramz / linear_regression.py

Created October 23, 2015 15:40

	import numpy as np
	from ggplot import *
	import scipy
	import scipy.stats
	import pandas
	from data_model import turnstileData

	def plot_cost_history(alpha, cost_history):
	"""This function is for viewing the plot of your cost history.
	You can run it by uncommenting this

Akramz / data_model.py

Created October 23, 2015 15:53

	import pandas
	import numpy as np

	class turnstileData(object):
	"""Just a Class to easily get data and manipulate it"""
	def __init__(self, filePath):
	self.df = pandas.read_csv(filePath)

	def select(self, elements):
	return self.df[elements]

Akramz / linear_model_OLS.py

Created October 24, 2015 12:11

to do linear regression of tempi -> hourly entries

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import statsmodels.api as sm

	df = pd.read_csv('../improved_data_set/turnstile_weather_v2.csv', index_col=0)
	dk = pd.DataFrame(df.groupby('tempi')['ENTRIESn_hourly'].mean())
	dk['tempi'] = dk.index

	y = dk.ENTRIESn_hourly # response

Akramz / hour_linear_regression.py

Created October 24, 2015 16:20

	import pandas as pd
	import numpy as np
	import statsmodels.api as sm
	import matplotlib.pyplot as plt
	from sys import exit

	df = pd.read_csv('../improved_data_set/turnstile_weather_v2.csv', index_col=0)
	dk = pd.DataFrame(df.groupby('hour')['ENTRIESn_hourly'].mean())
	dk['hour'] = dk.index

Akramz / multivars_linear_aggression.py

Last active October 25, 2015 02:50

	import pandas as pd
	import numpy as np
	import statsmodels.api as sm
	from sys import exit


	# try whatever you want
	listX = ['day_week', 'fog', 'rain', 'weekday']

	df = pd.read_csv('../improved_data_set/turnstile_weather_v2.csv', index_col=0)

Akramz / hourly_entries_distro.py

Created October 24, 2015 22:09

	import numpy as np
	from ggplot import *
	import pandas

	df = pandas.read_csv('../../improved_data_set/old.csv')
	hourly_entries = df[['ENTRIESn_hourly']]
	p = ggplot(aes(x='ENTRIESn_hourly'), data=hourly_entries) + geom_histogram(color='white', fill='red') + xlab("hourly entries") + ylab("Frequencies")
	print p

Akramz / rain-vs-nonrain-distros.py

Last active October 24, 2015 22:44

	import numpy as np
	import pandas
	import matplotlib.pyplot as plt

	df = pandas.read_csv('../../improved_data_set/old.csv')

	plt.figure()
	df['ENTRIESn_hourly'][df['rain'] == 0].hist()
	df['ENTRIESn_hourly'][df['rain'] == 1].hist()
	plt.xlabel('hourly entries')

Akramz / linear_model.py

Created October 25, 2015 00:44

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import statsmodels.api as sm
	import sys

	# try whatever you want
	element = 'hour'

	df = pd.read_csv('../improved_data_set/turnstile_weather_v2.csv', index_col=0)