This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# FFN to model game outcome | |
import pandas as pd | |
from keras.layers import Dense | |
from keras.models import Sequential | |
import numpy as np | |
if __name__ == "__main__": | |
import sqlite3 | |
with sqlite3.connect("data/heroes.db") as conn: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import py4j | |
from pyspark.sql.functions import monotonically_increasing_id | |
# very important to cache this. | |
df = df.select(monotonically_increasing_id().alias("index"), "*")\ | |
.cache() | |
MAX = 34359738368 | |
def mod_binary_search(round, previous_winner, dataset): | |
# round starts at 0, previous_winner starts at 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def mod_binary_search(round, previous_winner, dataset): | |
# round starts at 0, previous_winner starts at 0 | |
if 2**round >= len(dataset): | |
return previous_winner | |
mod = 2 ** (round + 1) | |
if test(dataset, previous_winner, mod): | |
return mod_binary_search(round+1, previous_winner, dataset) | |
else: | |
return mod_binary_search(round+1, previous_winner + 2**round, dataset) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def sample(N=2): | |
while True: | |
S_N_minus_1 = sum([random() for _ in xrange(N-1)]) | |
if S_N_minus_1 >= 1: | |
continue | |
uN = random() | |
if S_N_minus_1 + uN >= 1: | |
return uN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from operator import add | |
from itertools import combinations | |
from math import sqrt | |
def emit_pairs(words): | |
for pair in combinations(words, 2): | |
yield pair, 1 | |
def cosine_similarity((w1,w2), cross_product, magnitudes): | |
similarity = cross_product/sqrt(magnitudes.value[w1])/sqrt(magnitudes.value[w2]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
declare @VoteStats table (parentid int, id int, U float, D float) | |
insert @VoteStats | |
SELECT | |
a.parentid, | |
a.id, | |
CAST(SUM(case when (VoteTypeID = 2) then 1. else 0. end) + 1. as float) as U, | |
CAST(SUM(case when (VoteTypeID = 3) then 1. else 0. end) + 1. as float) as D | |
FROM Posts q | |
JOIN PostTags qt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
documents = sc.parallelize([ | |
('0', "frequency: the frequency vector of customers' purchases (denoted x in literature)."), | |
('1', "recency: the recency vector of customers' purchases (denoted t_x in literature)."), | |
('2', "T: the vector of customers' age (time since first purchase)"), | |
('3', 'iterative_fitting: perform `iterative_fitting` additional fits to find the best'), | |
('4', 'parameters for the model. Setting to 0 will improve peformance but possibly'), | |
('5', 'hurt estimates.'), | |
('6', 'initial_params: set initial params for the iterative fitter.'), | |
('7', 'verbose: set to true to print out convergence diagnostics.'), |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from patsy import dmatrix | |
from lifelines import CoxPHFitter | |
import pandas as pd | |
df = pd.read_csv('/Users/camerondavidson-pilon/Downloads/prostate1.csv') | |
X = dmatrix('age + hg + sz + sg + rx + pf + status1 + dtime', df, return_type='dataframe') | |
print X.head() | |
""" | |
Notice patsy has removed the redundant variables: `0.2 mg estrogen` and `in bed < 50% daytime`. This is what R does too. | |
Patsy has introduced an Intercept column, though. We don't want this. |
We can make this file beautiful and searchable if this error is corrected: No tabs found in this TSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pairid lbwt age lastwt race smoke ptd ht ui race1 race2 race3 | |
1 0 14 135 1 0 0 0 0 1 0 0 | |
1 1 14 101 3 1 1 0 0 0 0 1 | |
2 0 15 98 2 0 0 0 0 0 1 0 | |
2 1 15 115 3 0 0 0 1 0 0 1 | |
3 0 16 95 3 0 0 0 0 0 0 1 | |
3 1 16 130 3 0 0 0 0 0 0 1 | |
4 0 17 103 3 0 0 0 0 0 0 1 | |
4 1 17 130 3 1 1 0 1 0 0 1 | |
5 0 17 122 1 1 0 0 0 1 0 0 |