CountChu · November 19, 2017 06:46
diff --git a/InferHoeffding.py b/InferHoeffding.py
 #
 # Initially infer Hoeffding's Inequality.
 #

 from scipy import signal
 import numpy as np
 import math
 import pandas as pd
 import matplotlib.pyplot as plt

 def sampling (mu, samplesCount, epsilon):

    checkProb = 0
    badP = 0
    columns = ['D', 'R', 'G', 'N', 'P(D)', 'nu', '|nu - mu|', 'Is BAD?']
    data = {
        'D': [],
        'R': [],
        'G': [],
        'N': [], 
        'P(D)': [], 
        'nu': [], 
        '|nu - mu|': [], 
        'Is BAD?': []}

    for green in range(samplesCount+1):
        d = "D%s" % green
        red = samplesCount - green
        n = math.factorial (samplesCount) // math.factorial (green) // math.factorial (red)
        p = mu**red * (1-mu)**green * n
        nu = red / samplesCount
        diff = abs (nu - mu)
        diff = round (diff, 6)          # because 0.4 - 0.1 = 0.30000000000000004 
        isBad = ''
        if diff > epsilon:
            badP += p
            isBad = 'V'
        checkProb += p

        data['D'].append(d)
        data['R'].append(red)
        data['G'].append(green)
        data['N'].append(n)
        data['P(D)'].append(p)
        data['nu'].append(nu)
        data['|nu - mu|'].append(diff)
        data['Is BAD?'].append(isBad)

    frame = pd.DataFrame (data, columns=columns)
    return (frame, badP, checkProb)

 def samplingBadP (mu, samplesCount, epsilon):
    (frame, badP, checkProb) = sampling (mu, samplesCount, epsilon)
    return badP 

 def hoeffding (x, epsilon):
    y = 2.0 * math.exp (-2*epsilon*epsilon*x)
    return y

 #
 # Print the probability of the bad data in 10 samples.
 #   

 mu = 0.4
 samplesCount = 10
 epsilon = 0.3
 (frame, badP, checkProb) = sampling (mu, samplesCount, epsilon) 

 print (frame)   
 print ("Sum of P(D) = ", checkProb)
 print ("Sum of P(Bad D) = ", badP)

 #
 # Draw the probability of the bad data in samples of which number range 
 # is from 0 to 30.
 #   

 plt.xlabel ('N')
 plt.ylabel ('P')
 latex1 = r'\varepsilon = %f, \mu = %f' % (epsilon, mu)
 plt.title ("Hoeffding's Inequality and Probability of Bad Data \n$ %s $" % latex1)
 x = np.arange  (0, 30, 1)

 y2 = [hoeffding (x, epsilon) for x in x]
 latex2 = r'P\leq2e^{\left( -2\varepsilon ^{2}N\right)}'
 plt.plot (x, y2, label = "Method 1: Hoeffding's Inequality, $%s$" % latex2)

 y = [samplingBadP (mu, x, epsilon) for x in x]
 plt.plot (x, y, label = "Method 2: P = Prob [Bad Data in N Samples]")

 plt.legend ()
 plt.show ()
	#
	# Initially infer Hoeffding's Inequality.
	#

	from scipy import signal
	import numpy as np
	import math
	import pandas as pd
	import matplotlib.pyplot as plt

	def sampling (mu, samplesCount, epsilon):

	checkProb = 0
	badP = 0
	columns = ['D', 'R', 'G', 'N', 'P(D)', 'nu', '\|nu - mu\|', 'Is BAD?']
	data = {
	'D': [],
	'R': [],
	'G': [],
	'N': [],
	'P(D)': [],
	'nu': [],
	'\|nu - mu\|': [],
	'Is BAD?': []}

	for green in range(samplesCount+1):
	d = "D%s" % green
	red = samplesCount - green
	n = math.factorial (samplesCount) // math.factorial (green) // math.factorial (red)
	p = mu*red (1-mu)*green n
	nu = red / samplesCount
	diff = abs (nu - mu)
	diff = round (diff, 6) # because 0.4 - 0.1 = 0.30000000000000004
	isBad = ''
	if diff > epsilon:
	badP += p
	isBad = 'V'
	checkProb += p

	data['D'].append(d)
	data['R'].append(red)
	data['G'].append(green)
	data['N'].append(n)
	data['P(D)'].append(p)
	data['nu'].append(nu)
	data['\|nu - mu\|'].append(diff)
	data['Is BAD?'].append(isBad)

	frame = pd.DataFrame (data, columns=columns)
	return (frame, badP, checkProb)

	def samplingBadP (mu, samplesCount, epsilon):
	(frame, badP, checkProb) = sampling (mu, samplesCount, epsilon)
	return badP

	def hoeffding (x, epsilon):
	y = 2.0 * math.exp (-2epsilonepsilon*x)
	return y

	#
	# Print the probability of the bad data in 10 samples.
	#

	mu = 0.4
	samplesCount = 10
	epsilon = 0.3
	(frame, badP, checkProb) = sampling (mu, samplesCount, epsilon)

	print (frame)
	print ("Sum of P(D) = ", checkProb)
	print ("Sum of P(Bad D) = ", badP)

	#
	# Draw the probability of the bad data in samples of which number range
	# is from 0 to 30.
	#

	plt.xlabel ('N')
	plt.ylabel ('P')
	latex1 = r'\varepsilon = %f, \mu = %f' % (epsilon, mu)
	plt.title ("Hoeffding's Inequality and Probability of Bad Data \n$ %s $" % latex1)
	x = np.arange (0, 30, 1)

	y2 = [hoeffding (x, epsilon) for x in x]
	latex2 = r'P\leq2e^{\left( -2\varepsilon ^{2}N\right)}'
	plt.plot (x, y2, label = "Method 1: Hoeffding's Inequality, $%s$" % latex2)

	y = [samplingBadP (mu, x, epsilon) for x in x]
	plt.plot (x, y, label = "Method 2: P = Prob [Bad Data in N Samples]")

	plt.legend ()
	plt.show ()