import numpy as np

def hist_pdf(x, data, n_bins=10, minv=None, maxv=None):
    """Histogram density estimator
    
    [minv, minv+delta, , minv+delta*2, ..., 1]
    
    for any x in B_l=[minv+delta*j, minv+delta*(j+1)] density is estimated in the following way
    
    p(x) = p(x | x \in B_l) * p(x \in B_l) = (1/delta) * (\sum_{x_j}{I(x_j \in B_l)} / N)
    
    where N - number of points in dataset
    
    See lecture notes for details: 
    
    http://faculty.washington.edu/yenchic/18W_425/Lec6_hist_KDE.pdf
    
    Parameters
    ----------
    x: float
        point to estimate density at
    data: numpy array
        data points used to construct the density
    n_bins: int
        number of bins
    minv: float or None
        minimum value of the domain. If None, estimated from data
    maxv: float or None
        maximum value of the domain. If None, estimated from data
    
    Returns
    -------
    pdf: float
        computed density at point x given data
    """
    
    if minv is None:
        minv = np.min(data)
    if maxv is None:
        maxv = np.max(data)

    delta = (maxv-minv) / n_bins

    bins = np.arange(minv, maxv, delta)
    
    bin_id = int((x-minv)/delta)
    bin_minv = minv+delta*bin_id
    bin_maxv = minv+delta*(bin_id+1)
    
    n_data = len(data)
    n_data_in_bin = len(data[np.where((data>bin_minv) & (data<bin_maxv))])
    
    pdf = (1.0/delta) * (n_data_in_bin / n_data)
    
    return pdf
  

# Demo
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt

ds = load_boston()
data = ds['target']
xvals = np.arange(min(data), max(data), 1)
n_bins=15
pdf = [hist_pdf(x, data, n_bins=n_bins) for x in xvals]
plt.plot(xvals, pdf)