Skip to content

Instantly share code, notes, and snippets.

@CountChu
Last active November 19, 2017 06:46
Show Gist options
  • Save CountChu/99d7fb6e3d321ebaa2453acb84469232 to your computer and use it in GitHub Desktop.
Save CountChu/99d7fb6e3d321ebaa2453acb84469232 to your computer and use it in GitHub Desktop.
#
# Initially infer Hoeffding's Inequality.
#
from scipy import signal
import numpy as np
import math
import pandas as pd
import matplotlib.pyplot as plt
def sampling (mu, samplesCount, epsilon):
checkProb = 0
badP = 0
columns = ['D', 'R', 'G', 'N', 'P(D)', 'nu', '|nu - mu|', 'Is BAD?']
data = {
'D': [],
'R': [],
'G': [],
'N': [],
'P(D)': [],
'nu': [],
'|nu - mu|': [],
'Is BAD?': []}
for green in range(samplesCount+1):
d = "D%s" % green
red = samplesCount - green
n = math.factorial (samplesCount) // math.factorial (green) // math.factorial (red)
p = mu**red * (1-mu)**green * n
nu = red / samplesCount
diff = abs (nu - mu)
diff = round (diff, 6) # because 0.4 - 0.1 = 0.30000000000000004
isBad = ''
if diff > epsilon:
badP += p
isBad = 'V'
checkProb += p
data['D'].append(d)
data['R'].append(red)
data['G'].append(green)
data['N'].append(n)
data['P(D)'].append(p)
data['nu'].append(nu)
data['|nu - mu|'].append(diff)
data['Is BAD?'].append(isBad)
frame = pd.DataFrame (data, columns=columns)
return (frame, badP, checkProb)
def samplingBadP (mu, samplesCount, epsilon):
(frame, badP, checkProb) = sampling (mu, samplesCount, epsilon)
return badP
def hoeffding (x, epsilon):
y = 2.0 * math.exp (-2*epsilon*epsilon*x)
return y
#
# Print the probability of the bad data in 10 samples.
#
mu = 0.4
samplesCount = 10
epsilon = 0.3
(frame, badP, checkProb) = sampling (mu, samplesCount, epsilon)
print (frame)
print ("Sum of P(D) = ", checkProb)
print ("Sum of P(Bad D) = ", badP)
#
# Draw the probability of the bad data in samples of which number range
# is from 0 to 30.
#
plt.xlabel ('N')
plt.ylabel ('P')
latex1 = r'\varepsilon = %f, \mu = %f' % (epsilon, mu)
plt.title ("Hoeffding's Inequality and Probability of Bad Data \n$ %s $" % latex1)
x = np.arange (0, 30, 1)
y2 = [hoeffding (x, epsilon) for x in x]
latex2 = r'P\leq2e^{\left( -2\varepsilon ^{2}N\right)}'
plt.plot (x, y2, label = "Method 1: Hoeffding's Inequality, $%s$" % latex2)
y = [samplingBadP (mu, x, epsilon) for x in x]
plt.plot (x, y, label = "Method 2: P = Prob [Bad Data in N Samples]")
plt.legend ()
plt.show ()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment