Created
September 29, 2019 18:15
-
-
Save bkrukowski/cb4636ded8d7f7f2144710a6abb086ff to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class MedianData | |
{ | |
private $data = []; | |
private $count = 0; | |
public function count(): int | |
{ | |
return $this->count; | |
} | |
public function getData(): iterable | |
{ | |
return $this->data; | |
} | |
public function append(int $value, int $occurrences = 1): void | |
{ | |
$this->count += $occurrences; | |
$this->initValue($value); | |
$this->data[$value][1] += $occurrences; // 0 -> value, equal to key, 1 -> counter | |
} | |
public function appendData(MedianData $medianData): void | |
{ | |
foreach ($medianData->getData() as [$value, $counter]) { | |
$this->initValue($value); | |
$this->data[$value][1] += $counter; | |
$this->count += $counter; | |
} | |
} | |
private function initValue(int $value): void | |
{ | |
$this->data[$value] = $this->data[$value] ?? [$value, 0]; | |
} | |
public function rebuildStructure(): void | |
{ | |
ksort($this->data); | |
} | |
} | |
class MedianRange extends MedianData | |
{ | |
/** | |
* @var int | |
*/ | |
private $min; | |
/** | |
* @var int | |
*/ | |
private $max; | |
/** | |
* @var int | |
*/ | |
private $level; | |
/** | |
* @var int | |
*/ | |
private $maxChildren; | |
/** | |
* @var MedianRange[] | |
*/ | |
protected $ranges = []; | |
/** | |
* @var int | |
*/ | |
private $count = 0; | |
public function count(): int | |
{ | |
return $this->count; | |
} | |
public function __construct(int $min, int $max, int $level, int $maxChildren) | |
{ | |
$this->min = $min; | |
$this->max = $max; | |
$this->level = $level; | |
$this->maxChildren = $maxChildren; | |
} | |
public function getLevel(): int | |
{ | |
return $this->level; | |
} | |
public function append(int $value, int $occurrences = 1): void | |
{ | |
$this->count += $occurrences; | |
if ($this->level === 1) { | |
parent::append($value, $occurrences); | |
return; | |
} | |
$current = $this->min; | |
$delta = $this->getDelta(); | |
for ($i = 1; $i < $this->maxChildren; $i++) { | |
if ($current + $delta > $value) { | |
break; | |
} | |
$current += $delta; | |
} | |
$this->initRange($current); | |
$this->ranges[$current]->append($value, $occurrences); | |
} | |
/** | |
* @return MedianRange[] | |
*/ | |
public function getRanges(): iterable | |
{ | |
return $this->ranges; | |
} | |
/** | |
* @param MedianData|MedianRange $medianData | |
*/ | |
public function appendData(MedianData $medianData): void | |
{ | |
// lvl must be the same | |
// if ($this->getLevel() != $medianData->getLevel()) { | |
// throw new \Exception("invalid data"); | |
// } | |
if ($this->getLevel() == 1) { | |
parent::appendData($medianData); | |
} | |
$this->count += $medianData->count(); | |
foreach ($medianData->getRanges() as $r) { | |
$this->initRange($r->getMin()); | |
$this->ranges[$r->getMin()]->appendData($r); | |
} | |
} | |
public function rebuildStructure(): void | |
{ | |
ksort($this->ranges); | |
foreach ($this->ranges as $range) { | |
$range->rebuildStructure(); | |
} | |
} | |
private function getDelta(): int | |
{ | |
return (int) floor(($this->max - $this->min) / $this->maxChildren); | |
} | |
private function initRange($min) | |
{ | |
if (empty($this->ranges[$min])) { | |
$max = $min + $this->getDelta()-1; | |
if ($max + $this->getDelta() > $this->max) { | |
$max = $this->max; | |
} | |
$this->ranges[$min] = new MedianRange($min, $max, $this->level-1, $this->maxChildren); | |
} | |
} | |
/** | |
* @return int | |
*/ | |
public function getMin(): int | |
{ | |
return $this->min; | |
} | |
/** | |
* @return int | |
*/ | |
public function getMax(): int | |
{ | |
return $this->max; | |
} | |
} | |
class AsyncRange extends MedianRange | |
{ | |
/** | |
* @var MedianRange[] | |
*/ | |
private $asyncRanges; | |
/** | |
* @param MedianRange[] $ranges | |
*/ | |
public function __construct(array $ranges) | |
{ | |
$this->asyncRanges = $ranges; | |
} | |
public function count(): int | |
{ | |
$result = 0; | |
foreach ($this->asyncRanges as $r) { | |
$result += $r->count(); | |
} | |
return $result; | |
} | |
public function getLevel(): int | |
{ | |
foreach ($this->asyncRanges as $r) { | |
return $r->getLevel(); | |
} | |
} | |
public function append(int $value, int $occurrences = 1): void | |
{ | |
throw new \Exception("not allowed"); | |
} | |
public function getData(): iterable | |
{ | |
if ($this->getLevel() !== 1) { | |
throw new \Exception("not allowed"); | |
} | |
$d = new MedianData(); | |
foreach ($this->asyncRanges as $as) { | |
$d->appendData($as); | |
} | |
return $d->getData(); | |
} | |
/** | |
* @return MedianRange[] | |
*/ | |
public function getRanges(): iterable | |
{ | |
$all = []; | |
foreach ($this->asyncRanges as $r) { | |
foreach ($r->getRanges() as $r2) { | |
$all[] = $r2->getMin(); | |
} | |
} | |
$keys = array_values(array_unique($all)); | |
sort($keys); | |
foreach ($keys as $key) { | |
$result = []; | |
foreach ($this->asyncRanges as $r) { | |
if (isset($r->ranges[$key])) { | |
$result[] = $r->ranges[$key]; | |
} | |
} | |
yield new AsyncRange($result); | |
} | |
} | |
public function appendData(MedianData $medianData): void | |
{ | |
throw new \Exception("not allowed"); | |
} | |
public function rebuildStructure(): void | |
{ | |
throw new \Exception("not allowed"); | |
} | |
public function getMin(): int | |
{ | |
foreach ($this->asyncRanges as $r) { | |
return $r->getMin(); | |
} | |
} | |
public function getMax(): int | |
{ | |
foreach ($this->asyncRanges as $r) { | |
return $r->getMax(); | |
} | |
} | |
} | |
class Calculator | |
{ | |
public function median(MedianRange $first, MedianRange... $ranges) | |
{ | |
// $data = $first; | |
// foreach ($ranges as $r) { | |
// $data->appendData($r); | |
// } | |
// $data->rebuildStructure(); | |
$data = new AsyncRange(func_get_args()); | |
$count = $data->count(); | |
if ($count % 2) { | |
$indexA = $indexB = (int) floor($count/2); | |
} else { | |
$indexA = (int) floor($count/2); | |
$indexB = $indexA - 1; | |
} | |
return ($this->getValueOfIndex($data, $indexA) + $this->getValueOfIndex($data, $indexB))/2; | |
} | |
private function getValueOfIndex(MedianRange $d, int $index, $leftIndex = 0) | |
{ | |
if ($d->getLevel() == 1) { | |
return $this->getValueOfIndexInData($d, $index, $leftIndex); | |
} | |
foreach ($d->getRanges() as $r) { | |
if ($r->count() + $leftIndex > $index) { | |
return $this->getValueOfIndex($r, $index, $leftIndex); | |
} | |
$leftIndex += $r->count(); | |
} | |
} | |
public function getValueOfIndexInData(MedianData $d, int $index, $leftIndex) | |
{ | |
foreach ($d->getData() as [$value, $rowNums]) { | |
$leftIndex += $rowNums; | |
if ($index < $leftIndex) { | |
return $value; | |
} | |
} | |
} | |
} | |
ini_set("memory_limit", '8G'); | |
$min = 0; | |
$max = 1000000; | |
$maxChildren = 10; | |
$maxLevel = 6; | |
$numOfValues = floor(200000/30); | |
$numOfDays = 7; | |
/** @var MedianRange[] $days */ | |
$days = []; | |
for ($i = 0; $i < $numOfDays; $i++) { | |
$days[$i] = new MedianRange($min, $max, $maxLevel, $maxChildren); | |
} | |
for ($d = 0; $d < $numOfDays; $d++) { | |
for ($i = 0; $i < $numOfValues; $i++) { | |
$days[$d]->append(mt_rand($min, $max)); | |
} | |
} | |
for ($i = 0; $i < $numOfDays; $i++) { | |
$days[$i]->rebuildStructure(); | |
} | |
$s = microtime(true); | |
echo $x = (new Calculator())->median(...$days), "\n"; | |
echo microtime(true) - $s, "\n"; | |
// output 0.00053191184997559 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment