Last active
November 19, 2017 06:46
-
-
Save CountChu/99d7fb6e3d321ebaa2453acb84469232 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Initially infer Hoeffding's Inequality. | |
# | |
from scipy import signal | |
import numpy as np | |
import math | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
def sampling (mu, samplesCount, epsilon): | |
checkProb = 0 | |
badP = 0 | |
columns = ['D', 'R', 'G', 'N', 'P(D)', 'nu', '|nu - mu|', 'Is BAD?'] | |
data = { | |
'D': [], | |
'R': [], | |
'G': [], | |
'N': [], | |
'P(D)': [], | |
'nu': [], | |
'|nu - mu|': [], | |
'Is BAD?': []} | |
for green in range(samplesCount+1): | |
d = "D%s" % green | |
red = samplesCount - green | |
n = math.factorial (samplesCount) // math.factorial (green) // math.factorial (red) | |
p = mu**red * (1-mu)**green * n | |
nu = red / samplesCount | |
diff = abs (nu - mu) | |
diff = round (diff, 6) # because 0.4 - 0.1 = 0.30000000000000004 | |
isBad = '' | |
if diff > epsilon: | |
badP += p | |
isBad = 'V' | |
checkProb += p | |
data['D'].append(d) | |
data['R'].append(red) | |
data['G'].append(green) | |
data['N'].append(n) | |
data['P(D)'].append(p) | |
data['nu'].append(nu) | |
data['|nu - mu|'].append(diff) | |
data['Is BAD?'].append(isBad) | |
frame = pd.DataFrame (data, columns=columns) | |
return (frame, badP, checkProb) | |
def samplingBadP (mu, samplesCount, epsilon): | |
(frame, badP, checkProb) = sampling (mu, samplesCount, epsilon) | |
return badP | |
def hoeffding (x, epsilon): | |
y = 2.0 * math.exp (-2*epsilon*epsilon*x) | |
return y | |
# | |
# Print the probability of the bad data in 10 samples. | |
# | |
mu = 0.4 | |
samplesCount = 10 | |
epsilon = 0.3 | |
(frame, badP, checkProb) = sampling (mu, samplesCount, epsilon) | |
print (frame) | |
print ("Sum of P(D) = ", checkProb) | |
print ("Sum of P(Bad D) = ", badP) | |
# | |
# Draw the probability of the bad data in samples of which number range | |
# is from 0 to 30. | |
# | |
plt.xlabel ('N') | |
plt.ylabel ('P') | |
latex1 = r'\varepsilon = %f, \mu = %f' % (epsilon, mu) | |
plt.title ("Hoeffding's Inequality and Probability of Bad Data \n$ %s $" % latex1) | |
x = np.arange (0, 30, 1) | |
y2 = [hoeffding (x, epsilon) for x in x] | |
latex2 = r'P\leq2e^{\left( -2\varepsilon ^{2}N\right)}' | |
plt.plot (x, y2, label = "Method 1: Hoeffding's Inequality, $%s$" % latex2) | |
y = [samplingBadP (mu, x, epsilon) for x in x] | |
plt.plot (x, y, label = "Method 2: P = Prob [Bad Data in N Samples]") | |
plt.legend () | |
plt.show () |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment