Skip to content

Instantly share code, notes, and snippets.

@lynsei
Last active February 15, 2022 20:18
Show Gist options
  • Save lynsei/0cf5bbe8919ef3503897 to your computer and use it in GitHub Desktop.
Save lynsei/0cf5bbe8919ef3503897 to your computer and use it in GitHub Desktop.
OSL That I've written for Data Sifting and Cleansing in various formats. I use this with my crypto algorithms to make sure nothing weird messes up the character conversions
<?php
/*
_____ ______ _________________________________ _____ ______ _______ (R)
| ||_____] | |______| | | | || \|______
|_____||_____]__| |______|_____ | |_____ |_____||_____/|______
/****
* @desc: sift and filter through data like a mothafuka
* @vers: reliable v3.3 this is ported from old old code that is quite stable
*
**``~x&*/
class Sift {
//-----------------------------------------------------------
// Cleanse Special HTML characters that get generated from
// random occurances. Sometimes this happens when pulling
// tags from the PHP DOM object, sometimes it occurs with
// databases. Bottom line: this problem sucks.
//-----------------------------------------------------------
public static function clean($string) {
$patterns = array(
'/’/',
'/[‘’‚“”„†‡‰‹›™`¡¢£¤¥¦§£¨€©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿]/',
'/[ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ]/'
);
$replacements = array(
"'",
'',
''
);
return preg_replace($patterns,$replacements,$string);
}
//-----------------------------------------------------------
// Cleanse content for use in sessions
//-----------------------------------------------------------
public static function cleanse($string,$type=null) {
if (is_null($type)) {
return urlencode(base64_encode(convert_uuencode(htmlentities(self::clean(
$string
)
,ENT_QUOTES
))));
}
switch ($type) {
case 'auth':
$data = null;
$len = strlen($string);
for ($i=0; $i<$len; $i++) $data.=sprintf("%02x",ord(substr($string,$i,1)));
return $data;
break;
case 'cleanse':
return filter_var ($string,
FILTER_UNSAFE_RAW,
FILTER_FLAG_STRIP_LOW |
FILTER_FLAG_STRIP_HIGH
);
break;
case 'encode':
return filter_var ($string,
FILTER_UNSAFE_RAW,
FILTER_FLAG_ENCODE_LOW |
FILTER_FLAG_ENCODE_HIGH |
FILTER_FLAG_ENCODE_AMP
);
break;
case 'cleanse-magic-quotes':
return filter_var(
filter_var ($string,
FILTER_UNSAFE_RAW,
FILTER_FLAG_STRIP_LOW |
FILTER_FLAG_STRIP_HIGH
), FILTER_SANITIZE_MAGIC_QUOTES
);
break;
case 'encode-magic-quotes':
return filter_var(
filter_var ($string,
FILTER_UNSAFE_RAW,
FILTER_FLAG_ENCODE_LOW |
FILTER_FLAG_ENCODE_HIGH |
FILTER_FLAG_ENCODE_AMP
), FILTER_SANITIZE_MAGIC_QUOTES
);
break;
case 'encode-magic-quotes-remove-all-slashes-first':
return filter_var(
filter_var (
str_replace(
array('\\','/'),
array('',''),
$string
),
FILTER_UNSAFE_RAW,
FILTER_FLAG_ENCODE_LOW |
FILTER_FLAG_ENCODE_HIGH |
FILTER_FLAG_ENCODE_AMP
), FILTER_SANITIZE_MAGIC_QUOTES
);
break;
case 'strip-low':
return filter_var ($string,
FILTER_SANITIZE_STRING,
FILTER_FLAG_STRIP_LOW
);
break;
case 'strip-high':
return filter_var ($string,
FILTER_SANITIZE_STRING,
FILTER_FLAG_STRIP_HIGH
);
break;
case 'strip-encode-low':
return filter_var ($string,
FILTER_SANITIZE_STRING,
FILTER_FLAG_ENCODE_LOW |
FILTER_FLAG_ENCODE_AMP
);
break;
case 'strip-encode-high':
return filter_var ($string,
FILTER_SANITIZE_STRING,
FILTER_FLAG_ENCODE_HIGH |
FILTER_FLAG_ENCODE_AMP
);
break;
case 'file-path-secret-encoding':
$searches = array(
'/\$FSLASH\$/i',
'/\$DOT\$/i'
);
$replacements = array(
'/',
'.'
);
return preg_replace ($searches, $replacements,
filter_var (
filter_var(
filter_var (
str_replace(
array('\\','/'),
array(null,null),
$string
),
FILTER_UNSAFE_RAW,
FILTER_FLAG_ENCODE_LOW |
FILTER_FLAG_ENCODE_HIGH |
FILTER_FLAG_ENCODE_AMP
), FILTER_SANITIZE_MAGIC_QUOTES
),
FILTER_SANITIZE_STRING,
FILTER_FLAG_ENCODE_HIGH |
FILTER_FLAG_ENCODE_AMP
)
);
break;
case 'make-file-path':
$searches = array(
'/\//',
'/\./'
);
$replacements = array(
'$FSLASH$',
'$DOT$'
);
return preg_replace($searches, $replacements, $string);
break;
default:
return self::clean($string);
}
}
//-----------------------------------------------------------
// Cleanse content for use in databases
//-----------------------------------------------------------
public static function db_cleanse($string) {
/* // ... LEGACY .. less efficient method for DB Cleanse
return
addslashes(
Primary::cleanse_special_characters(
htmlentities(
$string
,ENT_QUOTES
))); */
// ... UPGRADE
// I've always been improving the way I cleanse data,
// and since PHP 5.2 added these fantastic 'filter_var'
// functions... I've been gutting out the old ones and
// replacing them.
return self::cleanse($string,'encode-magic-quotes');
}
//-----------------------------------------------------------
// Restore session content to it's original glory
//-----------------------------------------------------------
public static function uncleanse($string,$type=null) {
if (is_null($type)) {
return
self::clean(
html_entity_decode(
convert_uudecode(
base64_decode(
urldecode(
$string
)))
,ENT_QUOTES
));
} elseif ($type=='auth') {
$data = null;
$len = strlen($string);
for ($i=0;$i<$len;$i+=2) $data.=chr(hexdec(substr($string,$i,2)));
return $data;
}
else return html_entity_decode($string);
}
//-----------------------------------------------------------
// Uncleanse content that's pulled from a database
//-----------------------------------------------------------
public static function db_uncleanse($string) {
return
preg_replace('/\\\/','',
self::clean(
htmlentities(
html_entity_decode(
$string
,ENT_QUOTES)
,ENT_QUOTES)
)
);
}
}
/*+--------------------[objectcode: 12/20/2015 3:48:47 AM] [/end.automation]+--------------------+*/
@lynsei
Copy link
Author

lynsei commented Dec 20, 2015

This was adapted from old code, so I'm sprucing it up at the moment.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment