Created
December 12, 2012 16:03
-
-
Save anandankm/4268992 to your computer and use it in GitHub Desktop.
Stream sample: based on an evenly distributed random sample.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Stream sample: based on an evenly distributed random sample. | |
*/ | |
public class StreamSample | |
{ | |
private static int sampleSize = 500; | |
private static int streamCount = 0; | |
private static int sampleIndex = 0; | |
private StreamElement[] streamSample = new StreamElement[sampleSize]; | |
public void nextElement(StreamElement x) { | |
/** | |
* Initially fill in the sample until the specified | |
* sampleSize is reached. | |
*/ | |
streamCount++; | |
if (sampleIndex <= sampleSize - 1) { | |
streamSample[sampleIndex] = x; | |
sampleIndex++; | |
return; | |
} | |
/** | |
* Probability that a stream element at (sampleSize + i)th instant is chosen to | |
* be added to the stream sample is probabilityForX = sampleSize/(sampleSize + i). | |
* | |
*/ | |
double probabilityForX = sampleSize/(double)streamCount; | |
double randomProbability = Math.random(); // random probability [0.0, 1.0) | |
/** | |
* if random probability falls under probability for stream element at | |
* (sampleSize + i)th instant, add the element to the stream array. | |
*/ | |
if (randomProbability <= probabilityForX) { | |
/** | |
* Choose a random index in the sample size range. | |
* #randomRange(s, e) - see below. | |
*/ | |
int randomIndex = randomRange(0, sampleSize-1); | |
streamSample[randomIndex] = x; | |
/** | |
* 1: Probability that any element in the array would be replaced | |
* by the newer element is (probabilityForX * 1/sampleSize) | |
* 2: Probability that any element will not be replaced is | |
* 1 - (probabilityForX * 1/sampleSize) | |
* | |
* @example - 1. For 501th element to be chosen for sampling, the probability is 500/501. | |
* 2. For any element in the array to be replaced, the probability is 500/501 * 1/500 = 1/501. | |
* 3. For any element in the array to remain in the array, the probability is 1 - (1/501) = 500/501. | |
* From 1 and 2, at the end of the 501th instant, we have same probabilities for all the elements | |
* in the sample, ie, 500/501. | |
*/ | |
} | |
} | |
public StreamElement[] getSampleStream() { | |
return this.streamSample; | |
} | |
/** | |
* Get a random number in the range of [s, e] | |
*/ | |
private int randomRange(int s, int e) { | |
if (s >= e) { | |
return 0; | |
} | |
long range = (long) e - (long) s + 1; | |
range = (long) (range * Math.random()); | |
return (int) (range + s); | |
} | |
/** | |
* Corresponds to a stream element object streamed at | |
* a particular instant 't'. | |
* This can have any data that is streamed. | |
*/ | |
public class StreamElement { | |
long streamID; | |
long data1; | |
String data2; | |
// .. | |
// .. so on and so forth | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment