<?php

class T5KParser {
    private $primelistURL = 'https://t5k.org/primes/lists/all.txt';
    private $primelistRaw;

    public $primes = [];
    public $proofcodes = [];

    public function parse() {
        $this->getList();
        $this->parseList();
    }

    private function getList() {
        $ch = curl_init();

        $curlOpts = [
            CURLOPT_URL => $this->primelistURL,
            CURLOPT_RETURNTRANSFER => true
        ];

        curl_setopt_array($ch, $curlOpts);

        $rawlist = curl_exec($ch);

        curl_close($ch);

        $this->primelistRaw = $rawlist;
    }

    private function parseList() {
        if (is_null($this->primelistRaw)) {
            throw new Exception('Raw file is empty');
            return;
        }

        $this->parsePrimes();

        $this->parseProofcodes();
    }

    private function getPrimesFromRawList() {
        $lines = explode(PHP_EOL, $this->primelistRaw);
        $lineCount = count($lines);

        $foundlines = [];

        $line_idx = 31; // Skip intro header
        $prime_idx = 1;

        while ($line_idx < $lineCount) {
            $line = trim($lines[$line_idx]);
            $line_idx++;

            if (!str_starts_with($line, $prime_idx)) {
                if (str_ends_with($foundlines[$prime_idx - 1], '\\')) {
                    $foundlines[$prime_idx - 1] = str_replace('\\', '', $foundlines[$prime_idx - 1]) . $line;
                } else {
                    $foundlines[$prime_idx - 1] .= ' ' . $line;
                }
            } else {
                $foundlines[$prime_idx] = $line;
                $prime_idx++;
            }
        }

        return $foundlines;
    }

    private function parsePrimes() {
        $rawlines = $this->getPrimesFromRawList();
        /*
        99% of cases
        Regex 1: ^(\d{1,4})([a-z])?\s+(\S*)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)

        Cases with sum like "10000000000000000000...(34053 other digits)...00000000000000532669"
        Regex 2: ^(\d{1,4})([a-z])?\s+(\d+\.\.\.[\(\)\w\s]+\.\.\.\d+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)?

        Case "5163e Ramanujan tau function at 199^4518 ECPP 57125 E3    2022 ECPP"
        Regex 3: ^(\d{1,4})([a-z])?\s+([\w\s]+[\d\^]+[\sA-Za-z]+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)?
        */
        $regex1 = '/^(\d{1,4})([a-z])?\s+(\S*)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)/';
        $regex2 = '/^(\d{1,4})([a-z])?\s+(\d+\.\.\.[\(\)\w\s]+\.\.\.\d+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)?/';
        $regex3 = '/^(\d{1,4})([a-z])?\s+([\w\s]+[\d\^]+[\sA-Za-z]+)\s+(\d+)\s([A-Za-z0-9]+)\s+(\d+)(.*)?/';

        foreach ($rawlines as $line) {
            preg_match($regex1, $line, $matches);
            if (count($matches) <= 1) preg_match($regex2, $line, $matches);
            if (count($matches) <= 1) preg_match($regex3, $line, $matches);

            if (count($matches) <= 1) {
                var_dump("Could not parse line '" . $line . "'");
                continue;
            }

            $rank = intval($matches[1]);
            $ranknote = $matches[2];
            $sum = trim($matches[3]);
            $digitlength = intval($matches[4]);
            $proofcode = $matches[5];
            $year = intval($matches[6]);
            $comment = $matches[7] ? trim($matches[7]) : '';
            $checksum = dechex(crc32($sum));

            array_push($this->primes, [
                'rank' => $rank,
                'ranknote' => $ranknote,
                'sum' => $sum,
                'digitlength' => $digitlength,
                'proofcode' => $proofcode,
                'year' => $year,
                'comment' => $comment,
                'checksum' => $checksum
            ]);
        }
    }

    private function parseProofcodes() {
        $lines = explode(PHP_EOL, $this->primelistRaw);
        $lineCount = count($lines);

        $sectionStarted = false;

        $proofcodeRegex = '/^([A-Za-z0-9]+)\s+(.*)$/';

        $line_idx = 31; // Skip intro header

        while ($line_idx < $lineCount) {
            $line = $lines[$line_idx];
            $line_idx++;

            if ($sectionStarted === true) {
                preg_match($proofcodeRegex, $line, $matches);

                if (count($matches) > 1 && $matches[1] !== 'KEY') {
                    $proofcode = $matches[1];
                    $provers = $matches[2];

                    array_push($this->proofcodes, [
                        'proofcode' => $proofcode,
                        'provers' => $provers
                    ]);
                }
            }

            if ($sectionStarted === false && str_starts_with('KEY TO PROOF-CODES', $line)) {
                $sectionStarted = true;
            }
        }
    }
}

$t5kparser = new T5KParser();
$t5kparser->parse();

/*
Primes: $t5kparser->primes
Proofcodes: $t5kparser->proofcodes
*/