Last active
December 9, 2020 00:52
-
-
Save srcmaker/2e73d969820f29ae938bf4789390240f to your computer and use it in GitHub Desktop.
get ascii-based url from a url which contains unicode
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @param $url | |
* @param bool $validate | |
* @return string | |
*/ | |
function get_linkable_url($url, $validate = true) | |
{ | |
$url = trim($url); | |
if(empty($url)) return null; | |
if(substr($url,0,4) != 'http'){ | |
$url = 'http://'.$url; | |
} | |
$prs = parse_url($url); | |
$url = strtolower($prs['scheme']).'://'. idn_to_ascii(strtolower($prs['host']),IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46) ; | |
if(isset($prs['port'])){ | |
$url .= ':'.$prs['port']; | |
} | |
$non_ascii = '/[^\x00-\x7F|\s]/'; | |
if(isset($prs['path'])){ | |
$parts = explode('/',$prs['path']); | |
array_walk($parts,function(&$part) use ($non_ascii){ | |
preg_match($non_ascii,$part,$matches); | |
if( !empty($matches)){ | |
$part = urlencode($part); | |
} | |
}); | |
$url .= implode('/',$parts); | |
} | |
if(isset($prs['query'])){ | |
$queryset = explode('&',$prs['query']); | |
foreach($queryset as $qset){ | |
$kv = explode('=',$qset); | |
preg_match($non_ascii,$kv[1],$matches); | |
$val = empty($matches)? $kv[1]:urlencode($kv[1]); | |
$newQuery[] = sprintf('%s=%s',$kv[0],$val); | |
} | |
$url .= '?'. implode('&',$newQuery); | |
} | |
if(isset($prs['fragment'])){ | |
preg_match($non_ascii,$prs['fragment'],$matches); | |
if(empty($matches)){ | |
$url .= '#'.$prs['fragment']; | |
} else { | |
$url .= '#'.urlencode($prs['fragment']); | |
} | |
} | |
if($validate){ | |
$url = filter_var($url,FILTER_VALIDATE_URL); | |
} | |
return $url; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment