Dylan Friedmann dylanjf

Data Scientist

dylanjf / gist:5769125

Created June 12, 2013 21:08

	library(Matrix)
	amazon_train = sparse.model.matrix(~. - 1, data = amazon_train)

dylanjf / gist:5774446

Last active December 18, 2015 11:19

	amazon_test$ACTION1 = 1

	#this is really just a placeholder variable so that the training examples wont be removed when I call match.
	#also, make sure you put this in the same column as it is in the training set...



	amazon_test = sparse.model.matrix(~. - 1, data = amazon_test)

dylanjf / gist:5832136

Last active December 18, 2015 19:19

greedy selection R

	#read and separate data

	amazon_train = read.csv("C:/Users/dylanjf/Desktop/amazon/train.csv")
	amazon_train = amazon_train[,-10]
	amazon_Ytrain = as.matrix(amazon_train[,1])
	amazon_train = amazon_train[,-1]

	amazon_test = read.csv("C:/Users/dylanjf/Desktop/amazon/test.csv")
	amazon_test = amazon_test[,c(1:8)]

dylanjf / gist:5879356

Last active December 19, 2015 01:58

	###"naive bayesian" approach... if matched in training to test, return the probability that the person
	was accepted over all training examples###

	#concatenate all base variables except for RESOURCE

	train_person = paste(amazon_Xtrain[,2],amazon_Xtrain[,3],amazon_Xtrain[,4],
	amazon_Xtrain[,5],amazon_Xtrain[,6],amazon_Xtrain[,7],amazon_Xtrain[,8], sep = "")

	test_person = paste(amazon_Xtest[,2],amazon_Xtest[,3],amazon_Xtest[,4],
	amazon_Xtest[,5],amazon_Xtest[,6],amazon_Xtest[,7],amazon_Xtest[,8], sep = "")

dylanjf / gist:7011219

Last active December 25, 2015 17:09

3 rep 10 fold CV

	########3 rep 10 fold CV to determine feature sparsity percentage via RFE#########

	#X = concatenated text features for training set (title, body, url) transformed via TfIdfVectorizer
	#y = training set classification (0, 1)

	import numpy as np
	import pandas as pd
	import sklearn.linear_model as lm
	from sklearn.cross_validation import KFold
	from sklearn import metrics

dylanjf / gist:bfdf0ce9633f67815574

Last active August 29, 2015 14:06

	import numpy as np


	class CorrMatrix():
	"""
	creates and displays the correlation matrix for a data set in a
	memory efficient manner.

	additionally, allows the option to enable feature selection, cutting off
	highly correlated features at a given threshold.

dylanjf / gist:389f7b56ef73ad6ae75b

Last active August 29, 2015 14:06

rare count by column

	from csv import DictReader
	import numpy as np
	import pandas as pd


	class NestedDictCreator(dict):
	"""
	Implementation of perl's autovivification feature.
	Used to generate nested dictionaries on the fly
	"""

dylanjf / inheritance_ex.py

Created June 10, 2020 19:28

	from abc import ABCMeta, abstractmethod
	from typing import Dict

	class Base(metaclass=ABCMeta):
	def __init__(self, a):
	self.a = a

	@abstractmethod
	def sub_thing(self, **data) -> Dict:
	pass

dylanjf / inheritance_ex2.py

Created June 10, 2020 19:42

	from abc import ABCMeta, abstractmethod
	from typing import Dict

	class Base(metaclass=ABCMeta):
	def __init__(self, a):
	self.a = a

	@abstractmethod
	def sub_thing(self, **data) -> Dict:
	pass