<?php class T5KParser { private $primelistURL = 'https://t5k.org/primes/lists/all.txt'; private $primelistRaw; public $primes = []; public $proofcodes = []; public function parse() { $this->getList(); $this->parseList(); } private function getList() { $ch = curl_init(); $curlOpts = [ CURLOPT_URL => $this->primelistURL, CURLOPT_RETURNTRANSFER => true ]; curl_setopt_array($ch, $curlOpts); $rawlist = curl_exec($ch); curl_close($ch); $this->primelistRaw = $rawlist; } private function parseList() { if (is_null($this->primelistRaw)) { throw new Exception('Raw file is empty'); return; } $this->parsePrimes(); $this->parseProofcodes(); } private function getPrimesFromRawList() { $lines = explode(PHP_EOL, $this->primelistRaw); $lineCount = count($lines); $foundlines = []; $line_idx = 31; // Skip intro header $prime_idx = 1; while ($line_idx < $lineCount) { $line = trim($lines[$line_idx]); $line_idx++; if (!str_starts_with($line, $prime_idx)) { if (str_ends_with($foundlines[$prime_idx - 1], '\\')) { $foundlines[$prime_idx - 1] = str_replace('\\', '', $foundlines[$prime_idx - 1]) . $line; } else { $foundlines[$prime_idx - 1] .= ' ' . $line; } } else { $foundlines[$prime_idx] = $line; $prime_idx++; } } return $foundlines; } private function parsePrimes() { $rawlines = $this->getPrimesFromRawList(); /* 99% of cases Regex 1: ^(\d{1,4})([a-z])?\s+(\S*)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*) Cases with sum like "10000000000000000000...(34053 other digits)...00000000000000532669" Regex 2: ^(\d{1,4})([a-z])?\s+(\d+\.\.\.[\(\)\w\s]+\.\.\.\d+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)? Case "5163e Ramanujan tau function at 199^4518 ECPP 57125 E3 2022 ECPP" Regex 3: ^(\d{1,4})([a-z])?\s+([\w\s]+[\d\^]+[\sA-Za-z]+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)? */ $regex1 = '/^(\d{1,4})([a-z])?\s+(\S*)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)/'; $regex2 = '/^(\d{1,4})([a-z])?\s+(\d+\.\.\.[\(\)\w\s]+\.\.\.\d+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)?/'; $regex3 = '/^(\d{1,4})([a-z])?\s+([\w\s]+[\d\^]+[\sA-Za-z]+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)?/'; foreach ($rawlines as $line) { preg_match($regex1, $line, $matches); if (count($matches) <= 1) preg_match($regex2, $line, $matches); if (count($matches) <= 1) preg_match($regex3, $line, $matches); if (count($matches) <= 1) { var_dump("Could not parse line '" . $line . "'"); continue; } $rank = intval($matches[1]); $ranknote = $matches[2]; $sum = trim($matches[3]); $digitlength = intval($matches[4]); $proofcode = $matches[5]; $year = intval($matches[6]); $comment = $matches[7] ? trim($matches[7]) : ''; $checksum = dechex(crc32($sum)); array_push($this->primes, [ 'rank' => $rank, 'ranknote' => $ranknote, 'sum' => $sum, 'digitlength' => $digitlength, 'proofcode' => $proofcode, 'year' => $year, 'comment' => $comment, 'checksum' => $checksum ]); } } private function parseProofcodes() { $lines = explode(PHP_EOL, $this->primelistRaw); $lineCount = count($lines); $sectionStarted = false; $proofcodeRegex = '/^([A-Za-z0-9]+)\s+(.*)$/'; $line_idx = 31; // Skip intro header while ($line_idx < $lineCount) { $line = $lines[$line_idx]; $line_idx++; if ($sectionStarted === true) { preg_match($proofcodeRegex, $line, $matches); if (count($matches) > 1 && $matches[1] !== 'KEY') { $proofcode = $matches[1]; $provers = $matches[2]; array_push($this->proofcodes, [ 'proofcode' => $proofcode, 'provers' => $provers ]); } } if ($sectionStarted === false && str_starts_with('KEY TO PROOF-CODES', $line)) { $sectionStarted = true; } } } } $t5kparser = new T5KParser(); $t5kparser->parse(); /* Primes: $t5kparser->primes Proofcodes: $t5kparser->proofcodes */