Last active
September 21, 2016 15:54
-
-
Save cogmission/c4cb8feaba19595dae8ff964e18b05d0 to your computer and use it in GitHub Desktop.
UniversalRandom Using George Marsaglia's elegant Xorshift (In Python and Java)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on Jul 31, 2016 | |
@author: cogmission | |
''' | |
import numpy | |
import unittest | |
from ctypes import c_longlong as ll | |
uintType = "uint32" | |
class UniversalRandom(object): | |
''' | |
classdocs | |
''' | |
def __init__(self, seed, compatibility_mode=True): | |
''' | |
Constructor | |
''' | |
self.seed = seed | |
self.uintType = "uint32" | |
self.max_java_int = 2147483647 | |
self.compatibility_mode = compatibility_mode | |
def setSeed(self, seed): | |
self.seed = seed | |
def getSeed(self): | |
return self.seed | |
def rshift(self, val, n): | |
#return val>>n if val >= 0 else (val+0x100000000)>>n | |
if val >= 0: | |
return val>>n | |
else: | |
return (val+0x100000000)>>n | |
def _private_sampleWithPrintouts(self, choices, selectedIndices, collectedRandoms): | |
""" | |
Private method which is identical to the sample() method of this class. | |
This method is meant for testing of identical behavior with the Java | |
method of the same class. | |
Normal use would entail calling sample() instead of this method | |
""" | |
choiceSupply = list(choices) | |
sampleSize = int(selectedIndices.size) | |
upperBound = len(choices) | |
for i in xrange(sampleSize): | |
randomIdx = self.nextInt(upperBound) | |
print "randomIdx: " + str(randomIdx) | |
collectedRandoms.append(randomIdx) | |
selectedIndices.itemset(i, choiceSupply[randomIdx]) | |
choiceSupply.remove(choiceSupply[randomIdx]) | |
upperBound -= 1 | |
selectedIndices.sort() | |
return selectedIndices; | |
def sample(self, choices, selectedIndices): | |
""" | |
Returns a random, sorted, and unique list of the specified sample size of | |
selections from the specified list of choices. | |
""" | |
choiceSupply = list(choices) | |
sampleSize = int(selectedIndices.size) | |
upperBound = len(choices) | |
for i in xrange(sampleSize): | |
randomIdx = self.nextInt(upperBound) | |
selectedIndices.itemset(i, choiceSupply[randomIdx]) | |
choiceSupply.remove(choiceSupply[randomIdx]) | |
upperBound -= 1 | |
selectedIndices.sort() | |
print "sample: " + str(list(selectedIndices)) | |
return selectedIndices; | |
def shuffle(self, collection): | |
""" | |
Modeled after Fisher - Yates implementation | |
""" | |
index = None | |
for i in range(len(collection) - 1, 0, -1): | |
index = self.nextInt(i + 1) | |
if index != i: | |
collection[index] ^= collection[i] | |
collection[i] ^= collection[index] | |
collection[index] ^= collection[i] | |
return collection | |
def rand(self, rows, cols): | |
""" | |
Returns an array of floating point values of the specified shape | |
@param rows (int) the number of rows | |
@param cols (int) the number of columns | |
""" | |
retval = numpy.empty((0, cols)) | |
for _ in range(rows): | |
row = numpy.empty((cols)) | |
for j in range(cols): | |
row[j] = self.nextDouble() | |
retval = numpy.append(retval, [row], axis=0) | |
return retval | |
def bin_distrib(self, rows, cols, sparsity): | |
""" | |
Returns an array of binary values of the specified shape whose | |
total number of "1's" will reflect the sparsity specified. | |
@param rows (int) the number of rows | |
@param cols (int) the number of columns | |
@param sparsity (float) number between 0 and 1, indicating percentage | |
of "on" bits | |
""" | |
if sparsity < 0 or sparsity > 1: | |
raise ValueError('Sparsity must be a number between 0 - 1') | |
retval = self.rand(rows, cols) | |
for i in range(len(retval)): | |
sub = numpy.where(retval[i] >= sparsity)[0] | |
sublen = len(sub) | |
target = int(sparsity * cols) | |
if sublen < target: | |
full = numpy.arange(0, cols, 1) | |
to_fill = numpy.delete(full, sub) | |
cnt = len(to_fill) | |
for _ in range(target - sublen): | |
ind = self.nextInt(cnt) | |
item = to_fill[ind] | |
to_fill = numpy.delete(to_fill, ind) | |
retval[i][item] = sparsity | |
print "retval = " + str(list(retval[i])) | |
cnt -= 1 | |
elif sublen > target: | |
cnt = sublen | |
for _ in range(sublen - target): | |
ind = self.nextInt(cnt) | |
item = sub[ind] | |
sub = numpy.delete(sub, ind) | |
retval[i][item] = 0.0 | |
print "retval = " + str(list(retval[i])) | |
cnt -= 1 | |
retval = (retval >= sparsity).astype(uintType) | |
return retval | |
def nextDouble(self): | |
nd = self.nextInt(10000) | |
retVal = nd * .0001 | |
print("nextDouble: " + str(retVal)) | |
return retVal | |
def nextIntNB(self): | |
""" | |
Next int - No Bounds | |
Uses maximum Java integer value. | |
""" | |
retVal = self.nextInt(self.max_java_int) | |
print("nextIntNB: " + str(retVal)) | |
return retVal | |
def nextInt(self, bound): | |
''' doc ''' | |
if bound <= 0: | |
raise ValueError('bound must be positive') | |
r = self._next_java_compatible(31) \ | |
if self.compatibility_mode else self._next(31) | |
m = bound - 1 | |
if (bound & m) == 0: | |
r = (bound * r) >> 31 | |
else: | |
r = r % bound | |
# u = r | |
# r = u % bound | |
# while u - r + m > self.max_java_int: | |
# u = self._next_java_compatible(31) \ | |
# if self.compatibility_mode else self._next(31) | |
# r = u % bound | |
print("nextInt(" + str(bound) + "): " + str(r)) | |
return r | |
def _next_java_compatible(self, nbits): | |
''' doc ''' | |
x = self.seed & 0xffffffffffffffff #Preserve 64 bits | |
x ^= ll(x << 21).value & 0xffffffffffffffff #Preserve 64 bits | |
x ^= ll(self.rshift(x, 35)).value | |
x ^= ll(x << 4).value | |
self.seed = x | |
x &= ((1 << nbits) - 1) | |
return x | |
def _next(self, nbits): | |
''' doc ''' | |
x = self.seed | |
x ^= x << 21 | |
x ^= self.rshift(x, 35) | |
x ^= x << 4 | |
self.seed = x | |
x &= ((1 << nbits) - 1) | |
return x | |
if __name__ == '__main__': | |
random = UniversalRandom(42) | |
s = 2858730232218250 | |
e = random.rshift(s, 35) | |
print "e = " + str(e) | |
x = random.nextInt(50) | |
print "x = " + str(x) | |
x = random.nextInt(50) | |
print "x = " + str(x) | |
x = random.nextInt(50) | |
print "x = " + str(x) | |
x = random.nextInt(50) | |
print "x = " + str(x) | |
x = random.nextInt(50) | |
print "x = " + str(x) | |
for i in xrange(0, 10): | |
o = random.nextInt(50) | |
print "x = " + str(o) | |
###################################### | |
## Values Seen in Java ## | |
###################################### | |
random = UniversalRandom(42) | |
for i in range(10): | |
o = random.nextDouble() | |
print "d = " + str(o) | |
''' | |
e = 83200 | |
x = 0 | |
x = 26 | |
x = 14 | |
x = 15 | |
x = 38 | |
x = 47 | |
x = 13 | |
x = 9 | |
x = 15 | |
x = 31 | |
x = 6 | |
x = 3 | |
x = 0 | |
x = 21 | |
x = 45 | |
d = 0.945 | |
d = 0.2426 | |
d = 0.5214 | |
d = 0.0815 | |
d = 0.0988 | |
d = 0.5497 | |
d = 0.4013 | |
d = 0.4559 | |
d = 0.5415 | |
d = 0.2381 | |
''' | |
# The "expected" values are the same as produced in Java | |
random = UniversalRandom(42) | |
choices = [1,2,3,4,5,6,7,8,9] | |
sampleSize = 6 | |
selectedIndices = numpy.empty(sampleSize, dtype="uint32") | |
collectedRandoms = [] | |
expectedSample = [1,2,3,7,8,9] | |
expectedRandoms = [0,0,0,5,3,3] | |
retVal = random._private_sampleWithPrintouts(choices, selectedIndices, collectedRandoms) | |
print "samples are equal ? " + str(retVal == expectedSample) + " --- " + str(selectedIndices) | |
print "used randoms are equal ? " + str(collectedRandoms == expectedRandoms) + " --- " + str(collectedRandoms) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* --------------------------------------------------------------------- | |
* Numenta Platform for Intelligent Computing (NuPIC) | |
* Copyright (C) 2016, Numenta, Inc. Unless you have an agreement | |
* with Numenta, Inc., for a separate license for this software code, the | |
* following terms and conditions apply: | |
* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU Affero Public License version 3 as | |
* published by the Free Software Foundation. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
* See the GNU Affero Public License for more details. | |
* | |
* You should have received a copy of the GNU Affero Public License | |
* along with this program. If not, see http://www.gnu.org/licenses. | |
* | |
* http://numenta.org/licenses/ | |
* --------------------------------------------------------------------- | |
*/ | |
package org.numenta.nupic.util; | |
import java.math.BigDecimal; | |
import java.math.BigInteger; | |
import java.math.MathContext; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.List; | |
import java.util.Random; | |
import java.util.stream.Collectors; | |
import java.util.stream.IntStream; | |
import gnu.trove.list.array.TIntArrayList; | |
import gnu.trove.set.hash.TIntHashSet; | |
/** | |
* <p> | |
* This also has a Python version which is guaranteed to output the same random | |
* numbers if given the same initial seed value. | |
* </p><p> | |
* Implementation of George Marsaglia's elegant Xorshift random generator | |
* 30% faster and better quality than the built-in java.util.random. | |
* <p> | |
* see http://www.javamex.com/tutorials/random_numbers/xorshift.shtml. | |
* @author cogmission | |
*/ | |
public class UniversalRandom extends Random { | |
/** serial version */ | |
private static final long serialVersionUID = 1L; | |
private static final MathContext MATH_CONTEXT = new MathContext(9); | |
long seed; | |
static final String BadBound = "bound must be positive"; | |
public UniversalRandom(long seed) { | |
this.seed = seed; | |
} | |
/** | |
* Sets the long value used as the initial seed | |
* | |
* @param seed the value with which to be initialized | |
*/ | |
@Override | |
public void setSeed(long seed) { | |
this.seed = seed; | |
} | |
/** | |
* Returns the long value used as the initial seed | |
* | |
* @return the initial seed value | |
*/ | |
public long getSeed() { | |
return seed; | |
} | |
/* | |
* Internal method used for testing | |
*/ | |
private int[] sampleWithPrintout(TIntArrayList choices, int[] selectedIndices, List<Integer> collectedRandoms) { | |
TIntArrayList choiceSupply = new TIntArrayList(choices); | |
int upperBound = choices.size(); | |
for (int i = 0; i < selectedIndices.length; i++) { | |
int randomIdx = nextInt(upperBound); | |
//System.out.println("randomIdx: " + randomIdx); | |
collectedRandoms.add(randomIdx); | |
selectedIndices[i] = (choiceSupply.removeAt(randomIdx)); | |
upperBound--; | |
} | |
Arrays.sort(selectedIndices); | |
return selectedIndices; | |
} | |
/** | |
* Returns a random, sorted, and unique list of the specified sample size of | |
* selections from the specified list of choices. | |
* | |
* @param choices | |
* @param selectedIndices | |
* @return an array containing a sampling of the specified choices | |
*/ | |
public int[] sample(TIntArrayList choices, int[] selectedIndices) { | |
TIntArrayList choiceSupply = new TIntArrayList(choices); | |
int upperBound = choices.size(); | |
for (int i = 0; i < selectedIndices.length; i++) { | |
int randomIdx = nextInt(upperBound); | |
selectedIndices[i] = (choiceSupply.removeAt(randomIdx)); | |
upperBound--; | |
} | |
Arrays.sort(selectedIndices); | |
//System.out.println("sample: " + Arrays.toString(selectedIndices)); | |
return selectedIndices; | |
} | |
/** | |
* Fisher-Yates implementation which shuffles the array contents. | |
* | |
* @param array the array of ints to shuffle. | |
* @return shuffled array | |
*/ | |
public int[] shuffle(int[] array) { | |
int index; | |
for (int i = array.length - 1; i > 0; i--) { | |
index = nextInt(i + 1); | |
if (index != i) { | |
array[index] ^= array[i]; | |
array[i] ^= array[index]; | |
array[index] ^= array[i]; | |
} | |
} | |
return array; | |
} | |
/** | |
* Returns an array of floating point values of the specified shape | |
* | |
* @param rows the number of rows | |
* @param cols the number of cols | |
* @return | |
*/ | |
public double[][] rand(int rows, int cols) { | |
double[][] retval = new double[rows][cols]; | |
for(int i = 0;i < rows;i++) { | |
for(int j = 0;j < cols;j++) { | |
retval[i][j] = nextDouble(); | |
} | |
} | |
return retval; | |
} | |
/** | |
* Returns an array of binary values of the specified shape whose | |
* total number of "1's" will reflect the sparsity specified. | |
* | |
* @param rows the number of rows | |
* @param cols the number of cols | |
* @param sparsity number between 0 and 1, indicating percentage | |
* of "on" bits | |
* @return | |
*/ | |
public int[][] binDistrib(int rows, int cols, double sparsity) { | |
double[][] rand = rand(rows, cols); | |
for(int i = 0;i < rand.length;i++) { | |
TIntArrayList sub = new TIntArrayList( | |
ArrayUtils.where(rand[i], new Condition.Adapter<Double>() { | |
@Override public boolean eval(double d) { | |
return d >= sparsity; | |
} | |
})); | |
int sublen = sub.size(); | |
int target = (int)(sparsity * cols); | |
if(sublen < target) { | |
int[] full = IntStream.range(0, cols).toArray(); | |
TIntHashSet subSet = new TIntHashSet(sub); | |
TIntArrayList toFill = new TIntArrayList( | |
Arrays.stream(full) | |
.filter(d -> !subSet.contains(d)) | |
.toArray()); | |
int cnt = toFill.size(); | |
for(int x = 0;x < target - sublen;x++, cnt--) { | |
int ind = nextInt(cnt); | |
int item = toFill.removeAt(ind); | |
rand[i][item] = sparsity; | |
} | |
}else if(sublen > target) { | |
int cnt = sublen; | |
for(int x = 0;x < sublen - target;x++, cnt--) { | |
int ind = nextInt(cnt); | |
int item = sub.removeAt(ind); | |
rand[i][item] = 0.0; | |
} | |
} | |
} | |
int[][] retval = Arrays.stream(rand) | |
.map(da -> Arrays.stream(da).mapToInt(d -> d >= sparsity ? 1 : 0).toArray()) | |
.toArray(int[][]::new); | |
return retval; | |
} | |
@Override | |
public double nextDouble() { | |
int nd = nextInt(10000); | |
double retVal = new BigDecimal(nd * .0001d, MATH_CONTEXT).doubleValue(); | |
//System.out.println("nextDouble: " + retVal); | |
return retVal; | |
} | |
@Override | |
public int nextInt() { | |
int retVal = nextInt(Integer.MAX_VALUE); | |
//System.out.println("nextIntNB: " + retVal); | |
return retVal; | |
} | |
@Override | |
public int nextInt(int bound) { | |
if (bound <= 0) | |
throw new IllegalArgumentException(BadBound); | |
int r = next(31); | |
int m = bound - 1; | |
if ((bound & m) == 0) // i.e., bound is a power of 2 | |
r = (int)((bound * (long)r) >> 31); | |
else { | |
r = r % bound; | |
/* | |
THIS CODE IS COMMENTED TO WORK IDENTICALLY WITH THE PYTHON VERSION | |
for (int u = r; | |
u - (r = u % bound) + m < 0; | |
u = next(31)) | |
; | |
*/ | |
} | |
//System.out.println("nextInt(" + bound + "): " + r); | |
return r; | |
} | |
/** | |
* Implementation of George Marsaglia's elegant Xorshift random generator | |
* 30% faster and better quality than the built-in java.util.random see also | |
* see http://www.javamex.com/tutorials/random_numbers/xorshift.shtml | |
*/ | |
protected int next(int nbits) { | |
long x = seed; | |
x ^= (x << 21) & 0xffffffffffffffffL; | |
x ^= (x >>> 35); | |
x ^= (x << 4); | |
seed = x; | |
x &= ((1L << nbits) - 1); | |
return (int) x; | |
} | |
BigInteger bigSeed; | |
/** | |
* PYTHON COMPATIBLE (Protected against overflows) | |
* | |
* Implementation of George Marsaglia's elegant Xorshift random generator | |
* 30% faster and better quality than the built-in java.util.random see also | |
* see http://www.javamex.com/tutorials/random_numbers/xorshift.shtml | |
*/ | |
protected int nextX(int nbits) { | |
long x = seed; | |
BigInteger bigX = bigSeed == null ? BigInteger.valueOf(seed) : bigSeed; | |
bigX = bigX.shiftLeft(21).xor(bigX).and(new BigInteger("ffffffffffffffff", 16)); | |
bigX = bigX.shiftRight(35).xor(bigX).and(new BigInteger("ffffffffffffffff", 16)); | |
bigX = bigX.shiftLeft(4).xor(bigX).and(new BigInteger("ffffffffffffffff", 16)); | |
bigSeed = bigX; | |
bigX = bigX.and(BigInteger.valueOf(1L).shiftLeft(nbits).subtract(BigInteger.valueOf(1))); | |
x = bigX.intValue(); | |
//System.out.println("x = " + x + ", seed = " + seed); | |
return (int)x; | |
} | |
public static void main(String[] args) { | |
UniversalRandom random = new UniversalRandom(42); | |
long s = 2858730232218250L; | |
long e = (s >>> 35); | |
System.out.println("e = " + e); | |
int x = random.nextInt(50); | |
System.out.println("x = " + x); | |
x = random.nextInt(50); | |
System.out.println("x = " + x); | |
x = random.nextInt(50); | |
System.out.println("x = " + x); | |
x = random.nextInt(50); | |
System.out.println("x = " + x); | |
x = random.nextInt(50); | |
System.out.println("x = " + x); | |
for(int i = 0;i < 10;i++) { | |
int o = random.nextInt(50); | |
System.out.println("x = " + o); | |
} | |
random = new UniversalRandom(42); | |
for(int i = 0;i < 10;i++) { | |
double o = random.nextDouble(); | |
System.out.println("d = " + o); | |
} | |
/////////////////////////////////// | |
// Values Seen in Python // | |
/////////////////////////////////// | |
/* | |
* e = 83200 | |
x = 0 | |
x = 26 | |
x = 14 | |
x = 15 | |
x = 38 | |
x = 47 | |
x = 13 | |
x = 9 | |
x = 15 | |
x = 31 | |
x = 6 | |
x = 3 | |
x = 0 | |
x = 21 | |
x = 45 | |
d = 0.945 | |
d = 0.2426 | |
d = 0.5214 | |
d = 0.0815 | |
d = 0.0988 | |
d = 0.5497 | |
d = 0.4013 | |
d = 0.4559 | |
d = 0.5415 | |
d = 0.2381 | |
*/ | |
random = new UniversalRandom(42); | |
TIntArrayList choices = new TIntArrayList(new int[] { 1,2,3,4,5,6,7,8,9 }); | |
int sampleSize = 6; | |
int[] selectedIndices = new int[sampleSize]; | |
List<Integer> collectedRandoms = new ArrayList<>(); | |
int[] expectedSample = {1,2,3,7,8,9}; | |
List<Integer> expectedRandoms = Arrays.stream(new int[] {0,0,0,5,3,3}).boxed().collect(Collectors.toList()); | |
random.sampleWithPrintout(choices, selectedIndices, collectedRandoms); | |
System.out.println("samples are equal ? " + Arrays.equals(expectedSample, selectedIndices)); | |
System.out.println("used randoms are equal ? " + collectedRandoms.equals(expectedRandoms)); | |
random = new UniversalRandom(42); | |
int[] coll = ArrayUtils.range(0, 10); | |
int[] before = Arrays.copyOf(coll, coll.length); | |
random.shuffle(coll); | |
System.out.println("collection before: " + Arrays.toString(before)); | |
System.out.println("collection shuffled: " + Arrays.toString(coll)); | |
int[] expected = { 5, 1, 8, 6, 2, 4, 7, 3, 9, 0 }; | |
System.out.println(Arrays.equals(expected, coll)); | |
System.out.println(!Arrays.equals(expected, before)); // not equal | |
} | |
} |
Added universal nextDouble()
method to both
- Added universal
sample()
method to both - Changed Java version of compatibility with the "fast" mode of Python via
nextX()
- Added Optional mode
compatibility_mode
argument to the Python UniversalRandom constructor which will then afford the option of using the fastest mode (compatibility_mode == False
), or the compatible mode (compatibility_mode == True
) which calls a different method which emulates Java overflows so that their outputs are identical.
Added tweak to plain java: nextInt()
, python: nextIntNB()
calls to internally call the bounded method with Java's maximum integer value.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Added ability to set the seed value after construction.