Created
January 21, 2017 14:27
-
-
Save m-manu/9342c90c04da2c6af022a382a4815869 to your computer and use it in GitHub Desktop.
Compute statistical averages (mean, median and mode) from a stream of numbers. Optimized for 'fetch'.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package manu.sandbox.utils; | |
import java.util.*; | |
public class AverageCalculator { | |
private static class FrequencyComparator implements Comparator<Double> { | |
private final Map<Double, Integer> histogram; | |
private FrequencyComparator(Map<Double, Integer> histogram) { | |
this.histogram = histogram; | |
} | |
@Override | |
public int compare(Double a, Double b) { | |
Integer sa = this.histogram.get(b), sb = this.histogram.get(a); | |
if (sa == null || sb == null || sa.equals(sb)) { | |
return b.compareTo(a); | |
} | |
return sa.compareTo(sb); | |
} | |
} | |
private final PriorityQueue<Double> minHeap, maxHeap; | |
private final Map<Double, Integer> histogram; | |
private final PriorityQueue<Double> heap; | |
private int count = 0; | |
private double mean; | |
public AverageCalculator() { | |
this(100); | |
} | |
public AverageCalculator(int initSize) { | |
minHeap = new PriorityQueue<>(initSize); | |
maxHeap = new PriorityQueue<>(initSize, Collections.reverseOrder()); | |
histogram = new HashMap<>(initSize); | |
heap = new PriorityQueue<>(initSize, new FrequencyComparator(histogram)); | |
} | |
public AverageCalculator insertBulk(Iterable<? extends Number> collection) { | |
for (Number e : collection) insert(e.doubleValue()); | |
return this; | |
} | |
public void insert(double e) { | |
Integer frequency = histogram.get(e); | |
heap.remove(e); | |
histogram.put(e, frequency == null ? 1 : frequency + 1); | |
heap.add(e); | |
mean += (e - mean) / (count + 1); | |
if (count == 0) { | |
minHeap.add(e); | |
} else if (count == 1) { | |
if (e > minHeap.peek()) { | |
maxHeap.add(e); | |
} else { | |
maxHeap.add(minHeap.poll()); | |
minHeap.add(e); | |
} | |
} else { | |
if (e > minHeap.peek()) { | |
minHeap.add(e); | |
} else if (e < maxHeap.peek()) { | |
maxHeap.add(e); | |
} else { | |
if (minHeap.size() > maxHeap.size()) | |
maxHeap.add(e); | |
else | |
minHeap.add(e); | |
} | |
if (maxHeap.size() > minHeap.size()) | |
minHeap.add(maxHeap.poll()); | |
else | |
maxHeap.add(minHeap.poll()); | |
} | |
count++; | |
} | |
public double getMedian() { | |
if (count == 0) | |
return Double.NaN; | |
if (maxHeap.size() > minHeap.size()) { | |
return maxHeap.peek(); | |
} else if (maxHeap.size() < minHeap.size()) { | |
return minHeap.peek(); | |
} else { | |
return (maxHeap.peek() + minHeap.peek()) / 2.0; | |
} | |
} | |
public double getMean() { | |
if (count == 0) | |
return Double.NaN; | |
else | |
return mean; | |
} | |
public double getMode() { | |
if (count == 0) | |
return Double.NaN; | |
return heap.peek(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment