pytholabs_bot1 pytholabsbot1

😊

Empowering OpenSource & Research

An institute that emphasizes on the future

pytholabsbot1 / ols.py

Created January 30, 2019 17:31

	##create a nodel to estimate the relationship b/w x and y
	##optimization model = OLS estimator

	def ols(x,y):

	y_ = y.mean()
	x_ = x.mean()


	b1 = np.sum((y-y_)(x-x_))/np.sum((x-x_)*2)

pytholabsbot1 / cd

Created February 15, 2019 05:40

	def ent(df,attribute):
	target_variables = df.play.unique() #This gives all 'Yes' and 'No'
	variables = df[attribute].unique() #This gives different features in that attribute (like 'Sweet')


	entropy_attribute = 0
	for variable in variables:
	entropy_each_feature = 0
	for target_variable in target_variables:
	num = len(df[attribute][df[attribute]==variable][df.play ==target_variable]) #numerator

pytholabsbot1 / askjdj

Created February 15, 2019 06:01

	##1. claculate entropy o the whole dataset

	entropy_node = 0 #Initialize Entropy
	values = df.play.unique() #Unique objects - 'Yes', 'No'
	for value in values:
	fraction = df.play.value_counts()[value]/len(df.play)
	entropy_node += -fraction*np.log2(fraction)

pytholabsbot1 / sdmnfj

Created February 15, 2019 06:03

	outlook = 'overcast,overcast,overcast,overcast,rainy,rainy,rainy,rainy,rainy,sunny,sunny,sunny,sunny,sunny'.split(',')
	temp = 'hot,cool,mild,hot,mild,cool,cool,mild,mild,hot,hot,mild,cool,mild'.split(',')
	humidity = 'high,normal,high,normal,high,normal,normal,normal,high,high,high,high,normal,normal'.split(',')
	windy = 'FALSE,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,FALSE,TRUE,FALSE,TRUE,FALSE,FALSE,TRUE'.split(',')
	play = 'yes,yes,yes,yes,yes,yes,no,yes,no,no,no,no,yes,yes'.split(',')

pytholabsbot1 / adskjfhksd

Created February 15, 2019 06:04

	dataset ={'outlook':outlook,'temp':temp,'humidity':humidity,'windy':windy,'play':play}
	df = pd.DataFrame(dataset,columns=['outlook','temp','humidity','windy','play'])

pytholabsbot1 / kmdnvknxckv

Created February 21, 2019 14:42

	import tweepy

	# Replace the API_KEY and API_SECRET with your application's key and secret.
	auth = tweepy.AppAuthHandler(API_KEY, API_SECRET)

	api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

pytholabsbot1 / sdkjfhkjsh

Last active February 21, 2019 15:03

	import sys
	import jsonpickle
	import os

	searchQuery = 'brexit' # this is what we're searching for
	maxTweets = 1000 # Some arbitrary large number
	tweetsPerQry = 100 # this is the max the API permits
	fName = 'tweets.txt' # We'll store the tweets in a text file.

pytholabsbot1 / gist:c3a3c334d25f27e099acb8d040eda632

Last active March 27, 2019 05:58

	import pandas as pd
	%matplotlib inline
	import matplotlib
	import matplotlib.pyplot as plt
	import numpy as np

	# pass in column names for each CSV as the column name is not given in the file and read them using pandas.
	# You can check the column names from the readme file
	#Reading users file:
	u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']

pytholabsbot1 / gist:ea27704ff41cf7d52dce627cdabaafc1

Created March 27, 2019 05:59

	r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']

	ratings_train = pd.read_csv('ml-100k/ua.base', sep='\t', names=r_cols, encoding='latin-1')

	ratings_test = pd.read_csv('ml-100k/ua.test', sep='\t', names=r_cols, encoding='latin-1')

	ratings_train.shape, ratings_test.shape

pytholabsbot1 / gist:67900c435877925a3159124e5b7f614b

Last active March 27, 2019 14:54

	def predict(ratings, similarity, type='user'):

	if type == 'user':
	mean_user_rating = ratings.mean(axis=1).reshape(-1,1)
	#We use np.newaxis so that mean_user_rating has same format as ratings

	ratings_diff = (ratings - mean_user_rating)
	pred = mean_user_rating + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T

	elif type == 'item':