Skip to content

Instantly share code, notes, and snippets.

View csetzkorn's full-sized avatar
🎯
Focusing

Chris Setzkorn csetzkorn

🎯
Focusing
View GitHub Profile
@csetzkorn
csetzkorn / gist:7b134cd25ccf08c508aeb002ddf699a9
Last active September 5, 2018 12:39
simple text classification example using keras and word embedding
from numpy import array
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
# see also: https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/
@csetzkorn
csetzkorn / gist:0440cf16f27011609aceda8d052f7877
Created August 29, 2018 08:28
monte carlo integration - see data camp
# Define the sim_integrate function
def sim_integrate(func, xmin, xmax, sims):
x = np.random.uniform(xmin, xmax, sims)
y = np.random.uniform(min(min(func(x)), 0), max(func(x)), sims)
area = (max(y) - min(y))*(xmax-xmin)
result = area * sum(abs(y) < abs(func(x)))/sims
return result
# Call the sim_integrate function and print results
result = sim_integrate(func = lambda x: x*np.exp(x), xmin = 0, xmax = 1, sims = 50)
@csetzkorn
csetzkorn / gist:f5bbe72d5225151730d60734be3dc4a6
Created August 29, 2018 07:43
jackknife estimate of median and CI
# Leave one observation out to get the jackknife sample and store the median length
median_lengths = []
for i in range(n):
jk_sample = wrench_lengths[index != i]
median_lengths.append(np.median(jk_sample))
median_lengths = np.array(median_lengths)
# Calculate jackknife estimate and it's variance
jk_median_length = np.mean(median_lengths)
rsquared_boot, coefs_boot, sims = [], [], 1000
reg_fit = sm.OLS(df['y'], df.iloc[:,1:]).fit()
# Run 1K iterations
for i in range(sims):
# First create a bootstrap sample with replacement with n=df.shape[0]
bootstrap = df.sample(n=df.shape[0], replace=True)
# Fit the regression and append the r square to rsquared_boot
rsquared_boot.append(sm.OLS(bootstrap['y'],bootstrap.iloc[:,1:]).fit().rsquared)
@csetzkorn
csetzkorn / gist:88fccf1fdb5bfc27bc406f52d763a631
Created September 23, 2017 20:00
iris hierarchical clustering
library(dplyr)
library(ggplot2)
setwd('D:\\ToyData')
OrginalData <- read.table("https://s3.amazonaws.com/christiandata887342ac-a3ce-4600-94d0-9092f4a6bd20/IrisTabSepData/IrisData.txt",
header = TRUE, sep = "\t")
head(OrginalData)
@csetzkorn
csetzkorn / gist:628621c87fd97fb018f14882a8497dbf
Created September 23, 2017 19:27
Hierarchical Clustering Iris
library(dplyr)
library(ggplot2)
setwd('D:\\ToyData')
OrginalData <- read.table("IrisData.txt",
header = TRUE, sep = "\t")
SubsetData <- subset(OrginalData, select = c(
#"SepalLength"
@csetzkorn
csetzkorn / gist:5b58794316c1983f85d39c29153034ec
Created September 17, 2017 20:35
Compare pngs in python
import os
from scipy.misc import imread
from scipy.linalg import norm
from scipy import sum, average
def compare_images(img1, img2):
# normalize to compensate for exposure difference, this may be unnecessary
# consider disabling it
img1 = normalize(img1)
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd
from fbprophet import Prophet
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
df = pd.read_csv('D:/PyCharmProjects/Prophet/Data/AirPassengers.csv')
df['Month'] = pd.DatetimeIndex(df['Month'])
#Prophet also imposes the strict condition that the input columns be named ds (the time column) and y (the metric column)
df = df.rename(columns={'Month': 'ds',
@csetzkorn
csetzkorn / gist:8ab0c61b06107f10ed5bc542da47240a
Created May 21, 2017 11:59
Fit SOM, cluster prototypes and add cluster membership to original dataset
library(dplyr)
library(kohonen)
setwd('C:\\Users\\Christian\\Source\\Repos\\RClusteringMixedDataPam')
OrginalData <- read.table("IrisData.txt",
header = TRUE, sep = "\t")
SubsetData <- subset(OrginalData, select = c("SepalLength", "SepalWidth", "PetalLength", "PetalWidth"))
#TrainingMatrix <- as.matrix(scale(SubsetData))