Last active
August 29, 2015 14:04
-
-
Save edvakf/ec98cd4ff184b8222520 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class BloomFilter | |
{ | |
const BYTE = 8; | |
private $filter; | |
private $filter_length; | |
private $bit_num; | |
private $hash_num; | |
public function __construct($byte_size, $hash_num, $filter = null) | |
{ | |
$this->filter_length = $byte_size; | |
$this->bit_num = $this->filter_length * self::BYTE; | |
$this->hash_num = $hash_num; | |
if ($filter === null) { | |
$this->filter = str_repeat("\0", $this->filter_length); | |
} else { | |
$this->filter = $filter; | |
} | |
} | |
public function add($val) | |
{ | |
for ($k = 0; $k < $this->hash_num; $k++) { | |
$hash64 = $this->hash($val . $k); | |
$flag_up_block = (int) ($hash64 / self::BYTE); | |
$flag_up_bit = $hash64 % self::BYTE; | |
$this->filter[$flag_up_block] = chr(ord($this->filter[$flag_up_block]) | (1 << $flag_up_bit)); | |
} | |
} | |
public function query($val) | |
{ | |
for ($k = 0; $k < $this->hash_num; $k++) { | |
$hash64 = $this->hash($val . $k); | |
$flag_up_block = (int) ($hash64 / self::BYTE); | |
$flag_up_bit = $hash64 % self::BYTE; | |
if ((ord($this->filter[$flag_up_block]) & (1 << $flag_up_bit)) === 0) { | |
return false; | |
} | |
} | |
return true; | |
} | |
private function hash($val) | |
{ | |
$hash = hexdec(substr(md5($val), 0, 15)); // 16文字までカットすると、負の値になることがあって以後の処理がうまく動かない | |
return $hash % ($this->bit_num); | |
} | |
public function toArray() | |
{ | |
return array( | |
'bit_num' => $this->filter_length * self::BYTE, | |
'hash_num' => $this->hash_num, | |
'filter' => $this->filter, | |
); | |
} | |
public static function retrieveFromArray(array $filter_status) | |
{ | |
return new BloomFilter($filter_status['bit_num'] / self::BYTE, $filter_status['hash_num'], $filter_status['filter']); | |
} | |
public function getPositiveProbability() | |
{ | |
$bitcount = 0; | |
for ($i = 0; $i < $this->filter_length; $i++) { | |
$byte = ord($this->filter[$i]); | |
for ($k = 0; $k < self::BYTE; $k++) { | |
if ((($byte >> $k) & 1) === 1) $bitcount++; | |
} | |
} | |
$bitup_probability = $bitcount / ($this->filter_length * self::BYTE); | |
return pow($bitup_probability, $this->hash_num); | |
} | |
} | |
//use Example | |
/* | |
$records = 500; | |
$Q = 1000; | |
$f = new BloomFilter(1024, 4); | |
for($i=0;$i<$records;$i++) $f->add(md5($i)); | |
$corrects = 0; | |
$errors = 0; | |
for($i=0;$i<$Q;$i++) { | |
if ($f->query(md5($i))) { | |
if ($i < $records)$corrects++; | |
else $errors++; | |
//existence !! | |
} | |
} | |
echo "CORRECTS: $corrects ERRORS: $errors" . PHP_EOL; | |
$array = $f->toArray(); | |
$f = BloomFilter::retrieveFromArray($array); | |
echo "RETRIEVED!" . PHP_EOL; | |
$corrects = 0; | |
$errors = 0; | |
for($i=0;$i<$Q;$i++) { | |
if ($f->query(md5($i))) { | |
if ($i < $records)$corrects++; | |
else $errors++; | |
//existence !! | |
} | |
} | |
echo "CORRECTS: $corrects ERRORS: $errors" . PHP_EOL; | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment