Skip to content

Instantly share code, notes, and snippets.

@bkrukowski
Created September 29, 2019 18:15
Show Gist options
  • Save bkrukowski/cb4636ded8d7f7f2144710a6abb086ff to your computer and use it in GitHub Desktop.
Save bkrukowski/cb4636ded8d7f7f2144710a6abb086ff to your computer and use it in GitHub Desktop.
<?php
class MedianData
{
private $data = [];
private $count = 0;
public function count(): int
{
return $this->count;
}
public function getData(): iterable
{
return $this->data;
}
public function append(int $value, int $occurrences = 1): void
{
$this->count += $occurrences;
$this->initValue($value);
$this->data[$value][1] += $occurrences; // 0 -> value, equal to key, 1 -> counter
}
public function appendData(MedianData $medianData): void
{
foreach ($medianData->getData() as [$value, $counter]) {
$this->initValue($value);
$this->data[$value][1] += $counter;
$this->count += $counter;
}
}
private function initValue(int $value): void
{
$this->data[$value] = $this->data[$value] ?? [$value, 0];
}
public function rebuildStructure(): void
{
ksort($this->data);
}
}
class MedianRange extends MedianData
{
/**
* @var int
*/
private $min;
/**
* @var int
*/
private $max;
/**
* @var int
*/
private $level;
/**
* @var int
*/
private $maxChildren;
/**
* @var MedianRange[]
*/
protected $ranges = [];
/**
* @var int
*/
private $count = 0;
public function count(): int
{
return $this->count;
}
public function __construct(int $min, int $max, int $level, int $maxChildren)
{
$this->min = $min;
$this->max = $max;
$this->level = $level;
$this->maxChildren = $maxChildren;
}
public function getLevel(): int
{
return $this->level;
}
public function append(int $value, int $occurrences = 1): void
{
$this->count += $occurrences;
if ($this->level === 1) {
parent::append($value, $occurrences);
return;
}
$current = $this->min;
$delta = $this->getDelta();
for ($i = 1; $i < $this->maxChildren; $i++) {
if ($current + $delta > $value) {
break;
}
$current += $delta;
}
$this->initRange($current);
$this->ranges[$current]->append($value, $occurrences);
}
/**
* @return MedianRange[]
*/
public function getRanges(): iterable
{
return $this->ranges;
}
/**
* @param MedianData|MedianRange $medianData
*/
public function appendData(MedianData $medianData): void
{
// lvl must be the same
// if ($this->getLevel() != $medianData->getLevel()) {
// throw new \Exception("invalid data");
// }
if ($this->getLevel() == 1) {
parent::appendData($medianData);
}
$this->count += $medianData->count();
foreach ($medianData->getRanges() as $r) {
$this->initRange($r->getMin());
$this->ranges[$r->getMin()]->appendData($r);
}
}
public function rebuildStructure(): void
{
ksort($this->ranges);
foreach ($this->ranges as $range) {
$range->rebuildStructure();
}
}
private function getDelta(): int
{
return (int) floor(($this->max - $this->min) / $this->maxChildren);
}
private function initRange($min)
{
if (empty($this->ranges[$min])) {
$max = $min + $this->getDelta()-1;
if ($max + $this->getDelta() > $this->max) {
$max = $this->max;
}
$this->ranges[$min] = new MedianRange($min, $max, $this->level-1, $this->maxChildren);
}
}
/**
* @return int
*/
public function getMin(): int
{
return $this->min;
}
/**
* @return int
*/
public function getMax(): int
{
return $this->max;
}
}
class AsyncRange extends MedianRange
{
/**
* @var MedianRange[]
*/
private $asyncRanges;
/**
* @param MedianRange[] $ranges
*/
public function __construct(array $ranges)
{
$this->asyncRanges = $ranges;
}
public function count(): int
{
$result = 0;
foreach ($this->asyncRanges as $r) {
$result += $r->count();
}
return $result;
}
public function getLevel(): int
{
foreach ($this->asyncRanges as $r) {
return $r->getLevel();
}
}
public function append(int $value, int $occurrences = 1): void
{
throw new \Exception("not allowed");
}
public function getData(): iterable
{
if ($this->getLevel() !== 1) {
throw new \Exception("not allowed");
}
$d = new MedianData();
foreach ($this->asyncRanges as $as) {
$d->appendData($as);
}
return $d->getData();
}
/**
* @return MedianRange[]
*/
public function getRanges(): iterable
{
$all = [];
foreach ($this->asyncRanges as $r) {
foreach ($r->getRanges() as $r2) {
$all[] = $r2->getMin();
}
}
$keys = array_values(array_unique($all));
sort($keys);
foreach ($keys as $key) {
$result = [];
foreach ($this->asyncRanges as $r) {
if (isset($r->ranges[$key])) {
$result[] = $r->ranges[$key];
}
}
yield new AsyncRange($result);
}
}
public function appendData(MedianData $medianData): void
{
throw new \Exception("not allowed");
}
public function rebuildStructure(): void
{
throw new \Exception("not allowed");
}
public function getMin(): int
{
foreach ($this->asyncRanges as $r) {
return $r->getMin();
}
}
public function getMax(): int
{
foreach ($this->asyncRanges as $r) {
return $r->getMax();
}
}
}
class Calculator
{
public function median(MedianRange $first, MedianRange... $ranges)
{
// $data = $first;
// foreach ($ranges as $r) {
// $data->appendData($r);
// }
// $data->rebuildStructure();
$data = new AsyncRange(func_get_args());
$count = $data->count();
if ($count % 2) {
$indexA = $indexB = (int) floor($count/2);
} else {
$indexA = (int) floor($count/2);
$indexB = $indexA - 1;
}
return ($this->getValueOfIndex($data, $indexA) + $this->getValueOfIndex($data, $indexB))/2;
}
private function getValueOfIndex(MedianRange $d, int $index, $leftIndex = 0)
{
if ($d->getLevel() == 1) {
return $this->getValueOfIndexInData($d, $index, $leftIndex);
}
foreach ($d->getRanges() as $r) {
if ($r->count() + $leftIndex > $index) {
return $this->getValueOfIndex($r, $index, $leftIndex);
}
$leftIndex += $r->count();
}
}
public function getValueOfIndexInData(MedianData $d, int $index, $leftIndex)
{
foreach ($d->getData() as [$value, $rowNums]) {
$leftIndex += $rowNums;
if ($index < $leftIndex) {
return $value;
}
}
}
}
ini_set("memory_limit", '8G');
$min = 0;
$max = 1000000;
$maxChildren = 10;
$maxLevel = 6;
$numOfValues = floor(200000/30);
$numOfDays = 7;
/** @var MedianRange[] $days */
$days = [];
for ($i = 0; $i < $numOfDays; $i++) {
$days[$i] = new MedianRange($min, $max, $maxLevel, $maxChildren);
}
for ($d = 0; $d < $numOfDays; $d++) {
for ($i = 0; $i < $numOfValues; $i++) {
$days[$d]->append(mt_rand($min, $max));
}
}
for ($i = 0; $i < $numOfDays; $i++) {
$days[$i]->rebuildStructure();
}
$s = microtime(true);
echo $x = (new Calculator())->median(...$days), "\n";
echo microtime(true) - $s, "\n";
// output 0.00053191184997559
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment