Skip to content

Instantly share code, notes, and snippets.

@ipashchenko
Created May 29, 2017 16:07
Show Gist options
  • Save ipashchenko/4ec83135a8803b83827c0d5b87cd9a5b to your computer and use it in GitHub Desktop.
Save ipashchenko/4ec83135a8803b83827c0d5b87cd9a5b to your computer and use it in GitHub Desktop.
import numpy as np
from scipy.stats import gaussian_kde
def get_cdf_of_difference(data_1, data_2, diff_value=0):
data_1 = np.asarray(data_1)
data_2 = np.asarray(data_2)
kde_1 = gaussian_kde(data_1)
data_1.sort()
data_2.sort()
cdf_diff = list()
for i, x in enumerate(data_1):
print(" {}th point is {}".format(i, x))
cdf_2 = float(data_2[data_2 <= x].size) / data_2.size
if cdf_2 <= 0:
print("Skipping because at {} CDF of data_2 is 0.0".format(x))
continue
print("CDF of data_2 is {}".format(cdf_2))
kde_1_x = kde_1.pdf(diff_value+x)[0]
print("KDE of data_1 is {}".format(kde_1_x))
print("ADDING {}".format(cdf_2*kde_1_x))
cdf_diff.append(cdf_2*kde_1_x)
return sum(cdf_diff)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment