Skip to content

Instantly share code, notes, and snippets.

@pospi
Created September 24, 2013 01:03
Show Gist options
  • Save pospi/6679070 to your computer and use it in GitHub Desktop.
Save pospi/6679070 to your computer and use it in GitHub Desktop.
Class for working with data to be used in latin-1 encoded HTML documents, or for normalising user input.
<?php
class CharsetConverter
{
private static $WORD_CHARS_MAP = array(
"\xE2\x80\x9A" => "&sbquo;",
"\xE2\x80\x9E" => "&bdquo;",
"\xE2\x80\x98" => "'",
"\xE2\x80\x99" => "'",
"\xE2\x80\x9C" => "\"",
"\xE2\x80\x9D" => "\"",
"\xE2\x80\xA2" => "-",
"\xE2\x80\x93" => "-",
"\xE2\x80\x94" => "-",
"\xE2\x80\xA6" => "...",
"\xC2\xBC" => "&frac14;",
"\xC2\xBD" => "&frac12;",
"\xC2\xBE" => "&frac34;",
);
//==========================================================================
/**
* Removes microsoft "smart quote" characters from the string
* @see https://github.com/pospi/cp1252fixer
*
* @return the string with characters stripped
*/
public static function stripMSCharacters($str)
{
return str_replace(array_keys(CharsetConverter::$WORD_CHARS_MAP), array_values(CharsetConverter::$WORD_CHARS_MAP), $str, $count);
}
/**
* Replaces all low (< 32) & high (> 0x9F) ASCII characters with HTML numbered entities
*
* @return the string with characters replaced
*/
public static function convertHighASCII($strin)
{
$strout = '';
for ($i = 0; $i < strlen($strin); $i++) {
$chr = $strin[$i];
$ord = ord($chr);
if ($ord > 0x9F) {
$strout .= "&#{$ord};";
} else {
$strout .= $chr;
}
}
return $strout;
}
/**
* Strips all low (< 32) & high (> 0x9F) ASCII characters from a string
*
* @return the string with characters stripped
*/
public static function stripHighASCII($strin)
{
$strout = '';
for ($i = 0; $i < strlen($strin); $i++) {
$chr = $strin[$i];
$ord = ord($chr);
if ($ord <= 0x9F) {
$strout .= $chr;
}
}
return $strout;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment