Skip to content

Instantly share code, notes, and snippets.

@pjaudiomv
Last active January 11, 2023 02:23
Show Gist options
  • Save pjaudiomv/3b10ed5ef0d7f94c8d1942ea1a7c3e5b to your computer and use it in GitHub Desktop.
Save pjaudiomv/3b10ed5ef0d7f94c8d1942ea1a7c3e5b to your computer and use it in GitHub Desktop.
get spad as json
<?php
$jft_url = 'https://spadna.org';
libxml_use_internal_errors(true);
$url = get($jft_url);
libxml_clear_errors();
libxml_use_internal_errors(false);
$d = new DOMDocument();
$d->validateOnParse = true;
$d->loadHTML($url);
$jftKeys = array('date', 'title', 'page', 'quote', 'source', 'content', 'divider', 'thought', 'copyright');
$i = 0;
$k = 1;
$jftArray = [];
foreach ($d->getElementsByTagName('tr') as $element) {
if ($i != 5) {
$formated_element = trim($element->nodeValue);
$jftArray[$jftKeys[$i]] = $formated_element;
} else {
$values = array();
$xpath = new DOMXPath($d);
foreach ($xpath->query('//tr') as $row) {
$row_values = array();
foreach ($xpath->query('td', $row) as $cell) {
$innerHTML = '';
$children = $cell->childNodes;
foreach ($children as $child) {
$innerHTML .= $child->ownerDocument->saveXML($child);
}
$row_values[] = $innerHTML;
}
$values[] = $row_values;
}
$break_array = preg_replace('/<br[^>]*>/i', ' ', $values[5]);
$jftArray["content"] = trim($break_array[0]);
}
$i++;
}
$jftArray["copyright"] = str_replace("\n","",$jftArray["copyright"]);
$jftArray["copyright"] = preg_replace('/\s+/', ' ', $jftArray["copyright"]);
echo json_encode($jftArray);
function get($url)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:105.0) Gecko/20100101 Firefox/105.0');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($ch);
$errorno = curl_errno($ch);
curl_close($ch);
if ($errorno > 0) {
throw new Exception(curl_strerror($errorno));
}
return $data;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment