Skip to content

Instantly share code, notes, and snippets.

@ixxra
Created August 1, 2014 23:59
Show Gist options
  • Select an option

  • Save ixxra/199dc41ada857a0b3150 to your computer and use it in GitHub Desktop.

Select an option

Save ixxra/199dc41ada857a0b3150 to your computer and use it in GitHub Desktop.
Naive bayes with pandas, not working yet!... see https://higgsml.lal.in2p3.fr/software/starting-kit/
import pandas as pd
TRAINING_SET = 'data/training.csv'
TEST_SET = 'data/test.csv'
training = pd.read_csv(TRAINING_SET)
#background and signal total weights
total_weights = traininig.groupby(training.Label).sum()
training.Weight[training.Label == 's'] *= 0.5 / total_weights.s
training.Weight[training.Label == 'b'] *= 0.5 / total_weights.b
numFeatures = 30
numBins = 10
logPs = np.empty((numFeatures, numBins))
training_feats = training.drop(['EventId', 'Weight', 'Label'], axis=1)
for idx, fI in enumerate(training_feats.keys()):
groups = training.groupby(pd.cut(training_feats[fI], bins=10))
for bI, (group, weights) in enumerate(groups.Weight):
wS = weights[training.Label == 's'].sum()
wB = weights[training.Label == 'b'].sum()
logPs[idx, bI] = np.log(wS / (wS + wB))
def score(x):
logP = 0
for fI in range(numFeatures):
logP += logPs[fI, bI] - np.log(0.5)
return logP
def AMS(s, b):
assert s >= 0
assert b >= 0
bReg = 10.
return np.sqrt(2 * ((s + b + bReg) *
np.log(1 + s / (b + bReg)) - s))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment