Skip to content

Instantly share code, notes, and snippets.

@marcus-at-localhost
Last active May 21, 2020 14:28
Show Gist options
  • Select an option

  • Save marcus-at-localhost/2881630444fbd69039c8 to your computer and use it in GitHub Desktop.

Select an option

Save marcus-at-localhost/2881630444fbd69039c8 to your computer and use it in GitHub Desktop.
[Relative to absolute links with regex and DOMDocument] #domdocument #regex
<?php
function regexToAbs($text, $base)
{
if (empty($base))
return $text;
// base url needs trailing /
if (substr($base, -1, 1) != "/")
$base .= "/";
$text = (string) trim($text);
if (empty($text)) return $text;
// http://wintermute.com.au/bits/2005-09/php-relative-absolute-links/
$text=preg_replace('#(href|src)="([^:"]*)(?:")#','$1="'.$base.'$2"',$text);
//dd($text);
return $text;
}
function domdocToAbs($text, $base)
{
if (empty($base))
return $text;
// base url needs trailing /
if (substr($base, -1, 1) != "/")
$base .= "/";
$text = (string) trim($text);
if (empty($text)) return $text;
// parsing HTML with RegEx is bad they say
// now compare this sensitive pile of crap, that chokes on
// string encodings with the one line of regex on top of the page
$html = $text;
// you should know what you feed into
$DOM = new DOMDocument();
$DOM->encoding = 'utf-8';
$DOM->loadHTML(utf8_decode($html));
// promised to leave out the additional DOCTYPE>html>body the result is wrapped with
// but results in faulty HTML
//$DOM->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$imgs = $DOM->getElementsByTagName('img');
foreach($imgs as $img){
$src = $img->getAttribute('src');
if(strpos($src, 'http') !== 0 || strpos($src, '/') !== 0){
$img->setAttribute('src', $base.$src);
}
}
// remove the redundant doctype/html/body DOMDocument added with Regex
// http://stackoverflow.com/a/10023094
$html = preg_replace('~<(?:!DOCTYPE|/?(?:html|body))[^>]*>\s*~i', '', $DOM->saveHTML());
return $html;
}
$input = <<<HTM
<img src="relative.html" />
HTM;
var_dump(regexToAbs($input,"http://www.de"), domdocToAbs($input,"http://www.de"));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment