Created
March 4, 2025 09:07
-
-
Save Kcko/61b4ca853ee556c593433ec7c402f861 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
libxml_use_internal_errors(true); | |
$data = file_get_contents('https://www.psidetektiv.cz/ztracena-zvirata/'); | |
// Load HTML into DOMDocument | |
$dom = new DOMDocument(); | |
$dom->loadHTML($data, LIBXML_NOERROR | LIBXML_NOWARNING); | |
$finder = new DomXPath($dom); | |
$wrapper = $finder->query("//div[@id='category-list']"); | |
if ($wrapper->length > 0) { | |
$catalogItems = $finder->query(".//div[contains(@class, 'catalog-item')]", $wrapper->item(0)); | |
$savedItems = []; | |
foreach ($catalogItems as $index => $item) { | |
// Safely get link | |
$linkElement = $finder->query(".//a[contains(@href, '/zvire/')]", $item)->item(0); | |
$link = $linkElement ? $linkElement->getAttribute('href') : ''; | |
// Safely get background image | |
$bgImageElement = $finder->query(".//span[@class='bg-image']", $item)->item(0); | |
$bgImageStyle = $bgImageElement ? $bgImageElement->getAttribute('style') : ''; | |
// Extract image URL from style | |
preg_match('/background-image:url\((.*?)\)/', $bgImageStyle, $matches); | |
$imageUrl = isset($matches[1]) ? $matches[1] : ''; | |
$name = trim($finder->query(".//div[contains(@class, 'name')]/span[contains(@class, 'label') and contains(text(), 'Jméno:')]/following::text()[1]", $item)->item(0)->nodeValue); | |
$breed = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Plemeno:')]/following::text()[1]", $item)->item(0)->nodeValue); | |
$lostLocation = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Místo ztráty:')]/following::text()[1]", $item)->item(0)->nodeValue); | |
$region = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Kraj:')]/following::text()[1]", $item)->item(0)->nodeValue); | |
$gender = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Pohlaví:')]/following::text()[1]", $item)->item(0)->nodeValue); | |
$color = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Barva:')]/following::text()[1]", $item)->item(0)->nodeValue); | |
$size = trim($finder->query(".//div[contains(@class, 'line')]/span[contains(@class, 'label') and contains(text(), 'Velikost:')]/following::text()[1]", $item)->item(0)->nodeValue); | |
$animalData = [ | |
'odkaz' => $link, | |
'jmeno' => $name, | |
'plemeno' => $breed, | |
'misto_ztraty' => $lostLocation, | |
'kraj' => $region, | |
'pohlavi' => $gender, | |
'barva' => $color, | |
'velikost' => $size, | |
'obrazek' => $imageUrl | |
]; | |
$savedItems[] = $animalData; | |
} | |
} else { | |
echo "No elements found\n"; | |
foreach (libxml_get_errors() as $error) { | |
echo "Line {$error->line}: {$error->message}\n"; | |
} | |
} | |
echo "<pre>"; | |
print_r($savedItems); | |
echo "</pre>"; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment