Last active
January 11, 2023 02:23
-
-
Save pjaudiomv/3b10ed5ef0d7f94c8d1942ea1a7c3e5b to your computer and use it in GitHub Desktop.
get spad as json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$jft_url = 'https://spadna.org'; | |
libxml_use_internal_errors(true); | |
$url = get($jft_url); | |
libxml_clear_errors(); | |
libxml_use_internal_errors(false); | |
$d = new DOMDocument(); | |
$d->validateOnParse = true; | |
$d->loadHTML($url); | |
$jftKeys = array('date', 'title', 'page', 'quote', 'source', 'content', 'divider', 'thought', 'copyright'); | |
$i = 0; | |
$k = 1; | |
$jftArray = []; | |
foreach ($d->getElementsByTagName('tr') as $element) { | |
if ($i != 5) { | |
$formated_element = trim($element->nodeValue); | |
$jftArray[$jftKeys[$i]] = $formated_element; | |
} else { | |
$values = array(); | |
$xpath = new DOMXPath($d); | |
foreach ($xpath->query('//tr') as $row) { | |
$row_values = array(); | |
foreach ($xpath->query('td', $row) as $cell) { | |
$innerHTML = ''; | |
$children = $cell->childNodes; | |
foreach ($children as $child) { | |
$innerHTML .= $child->ownerDocument->saveXML($child); | |
} | |
$row_values[] = $innerHTML; | |
} | |
$values[] = $row_values; | |
} | |
$break_array = preg_replace('/<br[^>]*>/i', ' ', $values[5]); | |
$jftArray["content"] = trim($break_array[0]); | |
} | |
$i++; | |
} | |
$jftArray["copyright"] = str_replace("\n","",$jftArray["copyright"]); | |
$jftArray["copyright"] = preg_replace('/\s+/', ' ', $jftArray["copyright"]); | |
echo json_encode($jftArray); | |
function get($url) | |
{ | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:105.0) Gecko/20100101 Firefox/105.0'); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
$data = curl_exec($ch); | |
$errorno = curl_errno($ch); | |
curl_close($ch); | |
if ($errorno > 0) { | |
throw new Exception(curl_strerror($errorno)); | |
} | |
return $data; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment