Skip to content

Instantly share code, notes, and snippets.

@srcmaker
Last active December 9, 2020 00:52
Show Gist options
  • Save srcmaker/2e73d969820f29ae938bf4789390240f to your computer and use it in GitHub Desktop.
Save srcmaker/2e73d969820f29ae938bf4789390240f to your computer and use it in GitHub Desktop.
get ascii-based url from a url which contains unicode
<?php
/**
* @param $url
* @param bool $validate
* @return string
*/
function get_linkable_url($url, $validate = true)
{
$url = trim($url);
if(empty($url)) return null;
if(substr($url,0,4) != 'http'){
$url = 'http://'.$url;
}
$prs = parse_url($url);
$url = strtolower($prs['scheme']).'://'. idn_to_ascii(strtolower($prs['host']),IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46) ;
if(isset($prs['port'])){
$url .= ':'.$prs['port'];
}
$non_ascii = '/[^\x00-\x7F|\s]/';
if(isset($prs['path'])){
$parts = explode('/',$prs['path']);
array_walk($parts,function(&$part) use ($non_ascii){
preg_match($non_ascii,$part,$matches);
if( !empty($matches)){
$part = urlencode($part);
}
});
$url .= implode('/',$parts);
}
if(isset($prs['query'])){
$queryset = explode('&',$prs['query']);
foreach($queryset as $qset){
$kv = explode('=',$qset);
preg_match($non_ascii,$kv[1],$matches);
$val = empty($matches)? $kv[1]:urlencode($kv[1]);
$newQuery[] = sprintf('%s=%s',$kv[0],$val);
}
$url .= '?'. implode('&',$newQuery);
}
if(isset($prs['fragment'])){
preg_match($non_ascii,$prs['fragment'],$matches);
if(empty($matches)){
$url .= '#'.$prs['fragment'];
} else {
$url .= '#'.urlencode($prs['fragment']);
}
}
if($validate){
$url = filter_var($url,FILTER_VALIDATE_URL);
}
return $url;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment