Last active
September 26, 2015 16:08
-
-
Save Fil/1123845 to your computer and use it in GitHub Desktop.
Outil pour créer une copie locale statique de "mes" seens
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ma config | |
seen-local-config.php | |
# stockage de mon backup xml | |
tmp/ | |
# mes fichiers exportes | |
seen/ | |
# mes fichiers downloades | |
files/ | |
# mac os x | |
.DS_Store | |
*~ | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
# download all files in a list | |
# to download/[md5 of url].[extension] | |
define('_LOG', false); | |
foreach( array_map('trim', file('seen/urls.txt')) as $url ) { | |
# supprimer l'ancre ! | |
$url = preg_replace(',#.*$,S', '', $url); | |
# bidouille dropbox | |
$url = preg_replace("/^(https\:\/\/)(www)(\.dropbox\.com\/.*\/.*\/.*)$/", '\1dl\3', $url); | |
$dest = dest($url); | |
if ($old | |
AND $g = glob("$old.*")) { | |
# rename to new | |
$ext = array_pop(explode(".", $g[0])); | |
echo "rename($g[0], $dest.$ext);\n"; | |
if (TRUE) { | |
@mkdir(dirname($dest), 0777, true); | |
rename($g[0], $dest.".$ext"); | |
} | |
} | |
### | |
if (TRUE) { | |
if ($g = glob("$dest.*")) { | |
# OKAY | |
if (_LOG) echo "+$dest\n"; | |
} else { | |
echo "!$dest\n"; | |
download($url, $dest); | |
} | |
} | |
### | |
} | |
function dest($url) { | |
$m = substr(md5($url),0,6); | |
return "files/".substr(arbo($url),0,60).".$m"; | |
} | |
function download($url, $dest) { | |
# regarder le nombre de processus lances et attendre etc | |
@mkdir(dirname($dest), 0777, true); | |
wget($url, $dest); | |
} | |
function wget($url, $dest) { | |
$cmd = "wget -O $dest.tmp ".escapeshellarg($url); | |
$domain = preg_replace(',^(.*?//.*?/).*$,S', '\1', $url); | |
echo "$cmd\n"; | |
$cmd .= ' --referer='.escapeshellarg($domain); | |
$cmd .= ' --no-check-certificate --tries=1 --timeout=5 --user-agent="Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6" --header="Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5" --header="Accept-Language: en-us,en;q=0.5" --header="Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7" --header="Keep-Alive: 300" 2>&1'; | |
$res = `$cmd`; | |
if (preg_match(',^L(?:ongueur|ength): .* \[([a-z]+/[a-z\+\-]+)\],mS', $res, $m) | |
AND $x = mime_to_ext($m[1])) { | |
rename("$dest.tmp", "$dest.$x"); | |
echo "=> $dest.$x\n"; | |
} else { | |
($fp = fopen("$dest.log", "w")) | |
&& fwrite($fp, $res) | |
&& fclose($fp); | |
echo $res; | |
} | |
} | |
function mime_to_ext($mime) { | |
static $ext = array( | |
'image/png' => 'png', | |
'image/jpeg' => 'jpg', | |
'image/gif' => 'gif', | |
'text/html' => 'html', | |
'application/xhtml+xml' => 'html', | |
'application/xml' => 'xml', | |
'application/zip' => 'zip', | |
'application/pdf' => 'pdf', | |
'application/msword' => 'doc', | |
'application/json' => 'json', | |
'audio/mpeg' => 'mp3', | |
'video/quicktime' => 'qt', | |
'application/x-shockwave-flash' => 'flv', | |
); | |
if ($x = $ext[$mime] | |
OR (preg_match(',^text/,S', $mime) AND $x='txt') | |
) | |
return $x; | |
} | |
function arbo($url) { | |
$u = preg_replace('@^https?://@iS', '', $url); | |
$u = array_filter(explode('/', $u)); | |
$u[0] = preg_replace(',^www\.,iS', '', $u[0]); | |
$p = count($u); | |
if ($p == 1) | |
$u[] = "index"; | |
else | |
$u[$p-1] = preg_replace('@\.[a-z0-9]{0,8}$@iS', '', $u[$p-1]); | |
$u = strtolower($u[0].'/'.array_pop($u)); | |
$u = preg_replace('@[^a-z0-9/_]+@iS', '-', $u); | |
return $u; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
body { | |
background-color: #eee; | |
color: #333; | |
padding: 20px; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
ini_set("memory_limit","1G"); | |
/* | |
Voici un script qui découpe un backup #seenthis en petits fichiers sur le disque dur ; avec l'idée du coup de pouvoir retrouver mes seens en local, directement dans l'indexation de mon disque dur ; couplé avec git ça devrait être pas mal ? | |
*/ | |
# config | |
# username | |
$USER='USER'; | |
# password | |
$PASSWD='•••••••'; | |
# il est preferable de stocker son USER/PASSWD dans un fichier externe | |
include 'seen-local-config.php'; | |
# repertoire de l'export | |
define('_DIR_SEEN', 'seen/'); | |
# repertoire du cache de la sauvegarde | |
define('_DIR_CACHE', 'tmp/'); | |
# adresse de l'export seenthis | |
define('_BACKUP', "https://$USER:[email protected]/?page=xml_export"); | |
# creer les pages de tags ? | |
define('_DO_TAGS', true); | |
# /config | |
define ('_REG_CHARS', "a-z0-9\-–\_àáäâāăåąæçćĉċčĎďđéèëêēĕėęěĝğġģĥħíìïîĩīĭįıijĵķĸĺļľŀłðñńņňʼnŋóòöôõōŏőœøŕŗřśŝşšţťŧúùüûũūŭůűųŵÿýŷźżžþß" | |
."\'’°\&\+" | |
."אבגדהוזחטיךכלםמןנסעףפץצְֱֲֳִֵֶַָֹֺֻּֽ֑֖֛֢֣֥֦֧֪֚֭֮֒֓֔֕֗֘֙֜֝֞֟֠֡֨֩֫֬֯־ֿ׀ׁׂ׃ׄקרשתءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّْٕٖٝٓٔٗ٘ٙٚٛ" | |
."ՙ՚՛՜՝՞՟աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև" | |
."ͰͱͲͳʹ͵Ͷͷͺͻͼͽ΄΅Ά·ΈΉΊΌΎΏΐάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϐϑϒϓϔϕϖϗϙϛϝϟϡϰϱϲϳϴϵ϶ϸϹϺϻϼϽϾϿ" | |
."абвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁ҂҃҄҅҆҈҉ҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҷҹһҽҿӀӂӄӆӈӌӎӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣ" | |
); | |
define ('_REG_URL', "((http|ftp)s?:\/\/["._REG_CHARS."\"#~!«»;:\|\.’\?=&%@!\-\/\_\,\(\)]+["._REG_CHARS."#\/\=\(\)\$\*]+)"); | |
define ('_REG_TAG', "\#["._REG_CHARS."]+"); | |
date_default_timezone_set('Europe/Paris'); | |
mb_internal_encoding("UTF-8"); | |
@mkdir(_DIR_SEEN, 0777, true); | |
$a = inc_xml_to_array_dist(get_file(_BACKUP)); | |
# traiter les threads | |
foreach ($a as $c) { | |
$cle = $c['href']; | |
$thread = $c[0]; | |
$mem[$cle] = traiter_thread($cle, $thread); | |
} | |
# traiter les tags | |
# etablir la liste des tags | |
if (_DO_TAGS) { | |
$tags = array(); | |
foreach($mem as $cle => $v) { | |
foreach((array) $v['tags'] as $href => $tag) { | |
$tag = mb_strtolower($tag); | |
$tags[$tag][] = '<a href="../../'.$v['file'].'">'.htmlspecialchars($v['title']).'</a>'; | |
$tags_url[$tag] = $href; | |
} | |
} | |
# creer les pages des tags | |
@mkdir (_DIR_SEEN.'tag'); | |
foreach($tags as $tag => $liste) { | |
$content = "<html> | |
<head> | |
<meta http-equiv='content-type' content='text/html; charset=utf-8' /> | |
<title>$tag</title> | |
<link rel='stylesheet' media='all' href='../../seen-local.css' type='text/css' /> | |
</head> | |
<body class='tag'> | |
<h1><a href='".$tags_url[$tag]."'>".mb_substr(htmlspecialchars($tag),0,100)."</a></h1> | |
" | |
. "<ul><li>" . join ('</li><li>', $liste). "</li></ul>" | |
. "</html>\n"; | |
($fp = fopen($a = _DIR_SEEN.'tag/'.urlencode($tag).'.html', 'w')) | |
&& fwrite($fp, $content) | |
&& fclose($fp); | |
echo "tag $a\n"; | |
} | |
} | |
# etablir dans un fichier a plat la liste des URLs | |
$urls = array(); | |
foreach($mem as $cle => $v) { | |
foreach((array) $v['urls'] as $href => $url) { | |
if (is_array($url)) | |
$url = $url['url'][0]; | |
$urls[] = $url; | |
} | |
} | |
$urls = array_unique($urls); | |
sort($urls); | |
($fp = fopen(_DIR_SEEN.'urls.txt', 'w')) | |
&& fwrite($fp, join("\n", $urls)) | |
&& fclose($fp); | |
#do_git(); | |
exit; | |
function traiter_thread($cle, $thread) { | |
static $touched; | |
preg_match('/\d+$/', $cle, $id); | |
$id = base_convert($id[0],10,36); | |
$date = 0; | |
$tags = $urls = array(); | |
foreach ($thread as $msg) { | |
$date = max($date, strtotime($msg[0]['date'][0])); | |
if (is_array($msg[0]['tags'])) | |
foreach($msg[0]['tags'][0] as $t) | |
$tags[$t['href']] = $t[0]; | |
## glups ! | |
foreach ($msg[0] as $k => $v) | |
if (substr($k, 0, 3) == 'url') | |
$urls[$v['href']] = $v[0]; | |
} | |
$msg = $thread['message']; | |
$title = array_shift(array_filter(explode("\n",$msg[0]['text'][0]))); | |
@mkdir($dir = _DIR_SEEN.substr($id, 0,-2)); | |
if (!$touched[$dir]++) touch ($dir, $date); | |
$file = $dir.'/seen-'.string_to_filename($title).'.'.$id.'.html'; | |
if (@filemtime($file) !== $date) { | |
seensave($id, $thread, $date, $cle, $file); | |
} | |
return array('title' => $title, 'file' => $file, 'tags' => $tags, 'urls' => $urls); | |
} | |
function seensave($id, $thread, $date, $cle, $file) { | |
// chercher un autre fichier (en cas de changement de titre) | |
@unlink($file); | |
if ($a = glob(_DIR_SEEN.'?/*.'.$id.'.html')) { | |
foreach($a as $f) { | |
do_git_mv($f, $file); | |
unlink($f); | |
} | |
} | |
// ecrire le nouveau fichier | |
echo "writing $file..."; | |
$content = make_html_content($thread, $cle, $file); | |
$r = ($fp = fopen($file, 'w')) | |
&& fwrite($fp, $content) | |
&& fclose($fp) | |
&& touch($file, $date); | |
if ($r) | |
echo " OK\n"; | |
else | |
echo " oups\n"; | |
} | |
function make_html_content($thread, $cle, $file) { | |
$msg = $thread['message']; | |
$title = array_shift(array_filter(explode("\n",$msg[0]['text'][0]))); | |
$content = "<html> | |
<head> | |
<meta http-equiv='content-type' content='text/html; charset=utf-8' /> | |
<title>$title</title> | |
<link rel='stylesheet' media='all' href='../../seen-local.css' type='text/css' /> | |
</head> | |
<body> | |
<h1>".mb_substr($title,0,100)."</h1> | |
"; | |
$content .= '<a href="'.$cle.'" class="calcul_date">' | |
.date('Y-m-d H:i:s', strtotime($thread['message'][0]['date'][0]))."</a>"; | |
foreach($thread as $msg) { | |
$text = nl2br(htmlspecialchars($msg[0]['text'][0])); | |
$text = creer_liens($text); | |
# tags | |
$text = creer_tags($text, $file, $title); | |
# citations | |
$text = preg_replace(',❝.*?❞,ms', | |
"<blockquote>\\0</blockquote>", $text); | |
$text = preg_replace(',(?:<br />|\n)*(</?blockquote>)(?:<br />|\n)*,', | |
'\1', $text); | |
$content .= "<hr /> | |
<span class='auteur'><a href='".$msg[0]['author']['href']."'>" | |
.$msg[0]['author'][0]."</a></span>" | |
."<div>".$text."</div>\n"; | |
} | |
$content .= '<script type="text/javascript"> | |
thread = '.json_encode($thread).'; | |
</script> | |
'; | |
$content .="</html>\n"; | |
return $content; | |
} | |
function do_git() { | |
$p = getcwd(); | |
chdir (_DIR_SEEN); | |
`git add .`; | |
`git commit -a -m'autosave'`; | |
chdir($p); | |
} | |
function do_git_mv($src, $dest) { | |
echo "`git mv $src $dest`\n"; | |
} | |
/** | |
* Object -> tableau | |
* @param Object $object | |
* @return array|bool | |
*/ | |
function ObjectToArray($object){ | |
$xml_array = array(); | |
for( $object->rewind(); $object->valid(); $object->next() ) { | |
if(array_key_exists($key = $object->key(), $xml_array)){ | |
$key .= '-'.uniqid(); | |
} | |
$vars = get_object_vars($object->current()); | |
if (isset($vars['@attributes'])) | |
foreach($vars['@attributes'] as $k => $v) | |
$xml_array[$key][$k] = $v; | |
if($object->hasChildren()){ | |
$xml_array[$key][] = ObjectToArray( | |
$object->current()); | |
} | |
else{ | |
$xml_array[$key][] = strval($object->current()); | |
} | |
} | |
return $xml_array; | |
} | |
/** | |
* xml -> tableau | |
* @param string $u | |
* @return array | |
*/ | |
function inc_xml_to_array_dist($u) { | |
return @ObjectToArray(new SimpleXmlIterator($u)); | |
} | |
function get_file($url) { | |
if (@filemtime($f = _DIR_CACHE.'seen-'.md5($url).'.gz') < time()-3600 | |
AND $content = file_get_contents($url)) { | |
($fp = gzopen($f.'.tmp', 'w')) | |
&& gzwrite($fp, $content) | |
&& gzclose($fp) | |
&& rename($f.'.tmp', $f); | |
} | |
return join('', gzfile($f)); | |
} | |
# bug sur local/seen/3/seen-3mw-http-ue-eu-int-ueDocs-cms_Data-docs.html | |
# qui contient deux fois le meme lien | |
function creer_liens($text) { | |
if (preg_match_all('/'._REG_URL.'/uiS', $text, $ms, PREG_SET_ORDER)) | |
foreach ($ms as $m) { | |
#echo $m[0]."\n"; | |
$text = str_replace($m[0], "<a href='".$m[0]."'>".$m[0]."</a>", $text); | |
} | |
return $text; | |
} | |
function creer_tags($text, $file=null, $title=null) { | |
// echapper les liens | |
$text = preg_split(',(<a\b.*?</a>),msS', $text, -1, PREG_SPLIT_DELIM_CAPTURE); | |
foreach ($text as $k=>&$v) | |
if (($k%2 == 0) | |
&& preg_match_all('/'._REG_TAG.'/uiS', $v, $ms, PREG_SET_ORDER)) | |
foreach ($ms as $m) { | |
$tag = substr(mb_strtolower($m[0]),1); | |
$v = str_replace($m[0], "<a href='../tag/".urlencode($tag).".html'>".$m[0]."</a>", $v); | |
} | |
return join('',$text); | |
} | |
function string_to_filename($t) { | |
return str_replace(' ', '-', trim(substr(preg_replace('/[^\w]+/u', ' ', | |
remove_accents($t)), 0,40))); | |
} | |
/* pike dans wordpress */ | |
/** | |
* Checks to see if a string is utf8 encoded. | |
* | |
* NOTE: This function checks for 5-Byte sequences, UTF8 | |
* has Bytes Sequences with a maximum length of 4. | |
* | |
* @author bmorel at ssi dot fr (modified) | |
* @since 1.2.1 | |
* | |
* @param string $str The string to be checked | |
* @return bool True if $str fits a UTF-8 model, false otherwise. | |
*/ | |
function seems_utf8($str) { | |
$length = strlen($str); | |
for ($i=0; $i < $length; $i++) { | |
$c = ord($str[$i]); | |
if ($c < 0x80) $n = 0; # 0bbbbbbb | |
elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb | |
elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb | |
elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb | |
elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb | |
elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b | |
else return false; # Does not match any model | |
for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? | |
if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) | |
return false; | |
} | |
} | |
return true; | |
} | |
/** | |
* Converts all accent characters to ASCII characters. | |
* | |
* If there are no accent characters, then the string given is just returned. | |
* | |
* @since 1.2.1 | |
* | |
* @param string $string Text that might have accent characters | |
* @return string Filtered string with replaced "nice" characters. | |
*/ | |
function remove_accents($string) { | |
if ( !preg_match('/[\x80-\xff]/', $string) ) | |
return $string; | |
if (seems_utf8($string)) { | |
$chars = array( | |
// Decompositions for Latin-1 Supplement | |
chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', | |
chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', | |
chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', | |
chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C', | |
chr(195).chr(136) => 'E', chr(195).chr(137) => 'E', | |
chr(195).chr(138) => 'E', chr(195).chr(139) => 'E', | |
chr(195).chr(140) => 'I', chr(195).chr(141) => 'I', | |
chr(195).chr(142) => 'I', chr(195).chr(143) => 'I', | |
chr(195).chr(144) => 'D', chr(195).chr(145) => 'N', | |
chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', | |
chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', | |
chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', | |
chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', | |
chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', | |
chr(195).chr(158) => 'TH',chr(195).chr(159) => 's', | |
chr(195).chr(160) => 'a', chr(195).chr(161) => 'a', | |
chr(195).chr(162) => 'a', chr(195).chr(163) => 'a', | |
chr(195).chr(164) => 'a', chr(195).chr(165) => 'a', | |
chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c', | |
chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', | |
chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', | |
chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', | |
chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', | |
chr(195).chr(176) => 'd', chr(195).chr(177) => 'n', | |
chr(195).chr(178) => 'o', chr(195).chr(179) => 'o', | |
chr(195).chr(180) => 'o', chr(195).chr(181) => 'o', | |
chr(195).chr(182) => 'o', chr(195).chr(184) => 'o', | |
chr(195).chr(185) => 'u', chr(195).chr(186) => 'u', | |
chr(195).chr(187) => 'u', chr(195).chr(188) => 'u', | |
chr(195).chr(189) => 'y', chr(195).chr(190) => 'th', | |
chr(195).chr(191) => 'y', | |
// Decompositions for Latin Extended-A | |
chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', | |
chr(196).chr(130) => 'A', chr(196).chr(131) => 'a', | |
chr(196).chr(132) => 'A', chr(196).chr(133) => 'a', | |
chr(196).chr(134) => 'C', chr(196).chr(135) => 'c', | |
chr(196).chr(136) => 'C', chr(196).chr(137) => 'c', | |
chr(196).chr(138) => 'C', chr(196).chr(139) => 'c', | |
chr(196).chr(140) => 'C', chr(196).chr(141) => 'c', | |
chr(196).chr(142) => 'D', chr(196).chr(143) => 'd', | |
chr(196).chr(144) => 'D', chr(196).chr(145) => 'd', | |
chr(196).chr(146) => 'E', chr(196).chr(147) => 'e', | |
chr(196).chr(148) => 'E', chr(196).chr(149) => 'e', | |
chr(196).chr(150) => 'E', chr(196).chr(151) => 'e', | |
chr(196).chr(152) => 'E', chr(196).chr(153) => 'e', | |
chr(196).chr(154) => 'E', chr(196).chr(155) => 'e', | |
chr(196).chr(156) => 'G', chr(196).chr(157) => 'g', | |
chr(196).chr(158) => 'G', chr(196).chr(159) => 'g', | |
chr(196).chr(160) => 'G', chr(196).chr(161) => 'g', | |
chr(196).chr(162) => 'G', chr(196).chr(163) => 'g', | |
chr(196).chr(164) => 'H', chr(196).chr(165) => 'h', | |
chr(196).chr(166) => 'H', chr(196).chr(167) => 'h', | |
chr(196).chr(168) => 'I', chr(196).chr(169) => 'i', | |
chr(196).chr(170) => 'I', chr(196).chr(171) => 'i', | |
chr(196).chr(172) => 'I', chr(196).chr(173) => 'i', | |
chr(196).chr(174) => 'I', chr(196).chr(175) => 'i', | |
chr(196).chr(176) => 'I', chr(196).chr(177) => 'i', | |
chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij', | |
chr(196).chr(180) => 'J', chr(196).chr(181) => 'j', | |
chr(196).chr(182) => 'K', chr(196).chr(183) => 'k', | |
chr(196).chr(184) => 'k', chr(196).chr(185) => 'L', | |
chr(196).chr(186) => 'l', chr(196).chr(187) => 'L', | |
chr(196).chr(188) => 'l', chr(196).chr(189) => 'L', | |
chr(196).chr(190) => 'l', chr(196).chr(191) => 'L', | |
chr(197).chr(128) => 'l', chr(197).chr(129) => 'L', | |
chr(197).chr(130) => 'l', chr(197).chr(131) => 'N', | |
chr(197).chr(132) => 'n', chr(197).chr(133) => 'N', | |
chr(197).chr(134) => 'n', chr(197).chr(135) => 'N', | |
chr(197).chr(136) => 'n', chr(197).chr(137) => 'N', | |
chr(197).chr(138) => 'n', chr(197).chr(139) => 'N', | |
chr(197).chr(140) => 'O', chr(197).chr(141) => 'o', | |
chr(197).chr(142) => 'O', chr(197).chr(143) => 'o', | |
chr(197).chr(144) => 'O', chr(197).chr(145) => 'o', | |
chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe', | |
chr(197).chr(148) => 'R',chr(197).chr(149) => 'r', | |
chr(197).chr(150) => 'R',chr(197).chr(151) => 'r', | |
chr(197).chr(152) => 'R',chr(197).chr(153) => 'r', | |
chr(197).chr(154) => 'S',chr(197).chr(155) => 's', | |
chr(197).chr(156) => 'S',chr(197).chr(157) => 's', | |
chr(197).chr(158) => 'S',chr(197).chr(159) => 's', | |
chr(197).chr(160) => 'S', chr(197).chr(161) => 's', | |
chr(197).chr(162) => 'T', chr(197).chr(163) => 't', | |
chr(197).chr(164) => 'T', chr(197).chr(165) => 't', | |
chr(197).chr(166) => 'T', chr(197).chr(167) => 't', | |
chr(197).chr(168) => 'U', chr(197).chr(169) => 'u', | |
chr(197).chr(170) => 'U', chr(197).chr(171) => 'u', | |
chr(197).chr(172) => 'U', chr(197).chr(173) => 'u', | |
chr(197).chr(174) => 'U', chr(197).chr(175) => 'u', | |
chr(197).chr(176) => 'U', chr(197).chr(177) => 'u', | |
chr(197).chr(178) => 'U', chr(197).chr(179) => 'u', | |
chr(197).chr(180) => 'W', chr(197).chr(181) => 'w', | |
chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y', | |
chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z', | |
chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z', | |
chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z', | |
chr(197).chr(190) => 'z', chr(197).chr(191) => 's', | |
// Decompositions for Latin Extended-B | |
chr(200).chr(152) => 'S', chr(200).chr(153) => 's', | |
chr(200).chr(154) => 'T', chr(200).chr(155) => 't', | |
// Euro Sign | |
chr(226).chr(130).chr(172) => 'E', | |
// GBP (Pound) Sign | |
chr(194).chr(163) => ''); | |
$string = strtr($string, $chars); | |
} else { | |
// Assume ISO-8859-1 if not UTF-8 | |
$chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158) | |
.chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194) | |
.chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202) | |
.chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210) | |
.chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218) | |
.chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227) | |
.chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235) | |
.chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243) | |
.chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251) | |
.chr(252).chr(253).chr(255); | |
$chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"; | |
$string = strtr($string, $chars['in'], $chars['out']); | |
$double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254)); | |
$double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'); | |
$string = str_replace($double_chars['in'], $double_chars['out'], $string); | |
} | |
return $string; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment