Skip to content

Instantly share code, notes, and snippets.

@vertexvaar
Created December 3, 2020 15:56
Show Gist options
  • Save vertexvaar/ce83e54761e0970e04b1f7a27fd0a99a to your computer and use it in GitHub Desktop.
Save vertexvaar/ce83e54761e0970e04b1f7a27fd0a99a to your computer and use it in GitHub Desktop.
<?php
class Cache
{
public function has(string $id): bool
{
}
public function get(string $id): array
{
}
public function set(string $id, array $values): bool
{
}
}
<?php
// Libarary Code
class Client
{
public function __construct(WebSocket $webSocket)
{
}
public function on(string $event, Closure $closure): void
{
}
public function send(string $message): void
{
}
public function getId(): string
{
}
}
<?php
class Controller
{
private $cache;
/** @var CrawlRequest[] */
private $queue = [];
public function __construct()
{
$this->cache = new Cache();
$this->crawler = new Crawler();
}
public function connect(WebSocket $webSocket)
{
$client = new Client($webSocket);
$client->on('message', $this->getOnMessageClosure($client));
$client->on('disconnect', $this->getOnDisconnectClosure($client));
}
/**
* If a client closes the window remove all waiting crawling requests because the client
* will probably not come back
*
* @param Client $client
* @return Closure
*/
private function getOnDisconnectClosure(Client $client): Closure
{
return function () use ($client) {
foreach ($this->queue as $index => $crawlRequest) {
if ($crawlRequest->getClient() === $client) {
unset($this->queue[$index]);
}
}
};
}
private function getOnMessageClosure(Client $client): Closure
{
return function (string $message) use ($client) {
$url = $message['url'];
$forceRefresh = $message['force_refresh'] ?? false;
$crawlRequest = new CrawlRequest($client, $url);
// TODO: Check if the given URL is either 1. already cache or 2. already queued and act accordingly
if (isset($this->queue[$crawlRequest->getId()])) {
// TODO: figure out what to do if a client tries to request a crawl a second time.
// Probably send back a "you're already enqueued, please be patient" message
}
if (!$this->cache->has($url) || $forceRefresh) {
$this->queue[$crawlRequest->getId()] = $crawlRequest;
while (true) {
$queuePosition = $this->getQueuePosition($crawlRequest);
if ($queuePosition > 1) {
// Tell client: You are on position X
$client->send(json_encode(['pos' => $queuePosition]));
sleep(5);
} else {
$url = $this->queue->dequeue();
// Crawl pages and send state to client over websocket
$results = $this->crawler->crawl($url, $client);
$this->cache->set($url, $results);
}
}
}
$result = $this->cache->get($url);
$client->send(json_encode($result));
};
}
private function getQueuePosition(CrawlRequest $crawlRequest)
{
$count = 0;
$searched = $crawlRequest->getId();
foreach (array_keys($this->queue) as $id) {
if ($id === $searched) {
return $count;
}
$count++;
}
throw new \Exception('not found');
}
}
<?php
class CrawlRequest
{
private Client $client;
private string $url;
private string $id;
public function __construct(Client $client, string $url)
{
$this->client = $client;
$this->url = $url;
$this->id = sha1(
json_encode(
[
$client->getId(),
$url,
]
)
);
}
// Returns a unique id for client/url combination
public function getId(): string
{
return $this->id;
}
public function getClient(): Client
{
return $this->client;
}
public function getUrl(): string
{
return $this->url;
}
}
<?php
class Server
{
private Controller $controller;
private SocketIO $socketIO;
public function __construct(SocketIO $socketIO)
{
$this->controller = new Controller();
$this->socketIO = $socketIO;
}
public function run()
{
$this->socketIO->on(
'connect',
function (WebSocket $webSocket) {
$this->controller->connect($webSocket);
}
);
}
}
<?php
// Libarary Code
interface SocketIO
{
public function on(string $event, Closure $closure): void;
}
<?php
// Libarary Code
interface WebSocket
{
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment