Created
July 17, 2024 14:41
-
-
Save JoryHogeveen/9251836cf09f3cc5bffba50d4be0b92e to your computer and use it in GitHub Desktop.
PHP CLI to fix charset collasion with serialized data support
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/php | |
<?php | |
/** | |
* File edited by Jory Hogeveen <KERAWEB> to also convert table charsets and collasion. | |
* IMPORTANT: SQL file needs to be UTF8 format! | |
* | |
* Example usage: | |
* | |
* - php -d memory_limit=4G cli-fix-serialization-charset.php path_to_source.sql path_to_target.sql latin1_swedish_ci utf8mb4_unicode_520_ci | |
* | |
* You can also convert multiple source collasions by providing a comma separated list as the third argument: | |
* | |
* - php -d memory_limit=4G cli-fix-serialization-charset.php path_to_source.sql path_to_target.sql latin1_swedish_ci,utf8mb3_unicode_ci utf8mb4_unicode_520_ci | |
* | |
* | |
* ********************** | |
* | |
* | |
* Serialization fixer for files by BracketSpace | |
* | |
* @author: Wojtek Szałkiewicz - [email protected] | |
* @license: GPL version 3 or later - http://www.gnu.org/licenses/gpl.txt | |
* | |
* Based on Pau Iglesias http://blogestudio.com package | |
* | |
* Usage: | |
* `/usr/bin/php fix-serialization.php my-sql-file.sql` | |
* | |
* Known errors: | |
* | |
* - Memory size exhausted | |
* Allowed memory size of 67108864 bytes exhausted (tried to allocate 35266489 bytes) | |
* How to fix: update php.ini memory_limit to 512M or more, and restart cgi service or web server | |
* | |
* - Function preg_replace returns null or 0 length string | |
* If preg_last_error = PREG_BACKTRACK_LIMIT_ERROR (value 2), increase pcre.backtrack_limit in php.ini (by default 100k, change to 2M by example) | |
* Same way for others preg_last_error codes: http://www.php.net/manual/en/function.preg-last-error.php | |
* | |
*/ | |
// Unescape to avoid dump-text issues | |
function unescape_mysql( $value ) { | |
return str_replace( | |
[ "\\\\", "\\0", "\\n", "\\r", "\Z", "\'", '\"' ], | |
[ "\\", "\0", "\n", "\r", "\x1a", "'", '"' ], | |
$value | |
); | |
} | |
// Fix strange behaviour if you have escaped quotes in your replacement | |
function unescape_quotes( $value ) { | |
return str_replace('\"', '"', $value); | |
} | |
// Check command line arguments | |
if ( ! isset( $argv ) || ! isset( $argv[1] ) ) { | |
// Error | |
echo "Error: no input file specified\n\n"; | |
exit; | |
} | |
// With arguments | |
$input = $argv[1]; | |
if ( ! file_exists( $input ) ) { | |
// Error | |
echo "Error: input file does not exists\n"; | |
echo "{$input}\n\n"; | |
exit; | |
} | |
// File exists | |
// Copy data | |
$data = file_get_contents( $input ); | |
if ( ! $data ) { | |
// Error | |
echo "Error: can`t read data from input file\n"; | |
echo "{$input}\n\n"; | |
exit; | |
} | |
// Check data | |
if ( ! isset( $data ) || ! ( strlen( $data ) > 0 ) ) { | |
// Warning | |
echo "Warning: the file is empty or can't read contents\n"; | |
echo "{$input}\n\n"; | |
exit; | |
} | |
// Data ok | |
// KERAWEB: Convert charsets. | |
if ( isset( $argv[3] ) && isset( $argsv[4] ) ) { | |
$source_collasions = explode( ',', $argv[3] ); | |
$target_collation = $argv[4]; | |
foreach ( $source_collasions as $source_collasion ) { | |
$source_parts = explode( '_', $source_collasion ); | |
$target_parts = explode( '_', $target_collasion ); | |
$source_charset = $source_parts[0]; | |
$target_charset = $target_parts[0]; | |
if ( 1 < count( $source_parts ) ) { | |
// Convert collations | |
$data = str_replace( $source_collasion, $target_collation, $data ); | |
} | |
// Convert charsets | |
$data = str_replace( | |
[ 'SET=' . $source_charset, 'SET = ' . $source_charset, 'SET ' . $source_charset, 'NAMES ' . $source_charset ], | |
[ 'SET=' . $target_charset, 'SET = ' . $target_charset, 'SET ' . $target_charset, 'NAMES ' . $target_charset ], | |
$data | |
); | |
} | |
} | |
$processed = 0; | |
$fixed = 0; | |
// Replace serialized string values | |
$data = preg_replace_callback('!s:(\d+):([\\\\]?"[\\\\]?"|[\\\\]?"((.*?)[^\\\\])[\\\\]?");!', function( $matches ) use ( &$processed, &$fixed ) { | |
if ( isset( $matches[3] ) ) { | |
$len = strlen( unescape_mysql( $matches[3] ) ); | |
$str = unescape_quotes( $matches[3] ); | |
$result = "s:{$len}:\"$str\";"; | |
if ( $result !== $matches[0] ) { | |
$fixed++; | |
} | |
} else { | |
$result = $matches[0]; | |
} | |
$processed++; | |
return $result; | |
}, $data); | |
// Check data | |
if ( ! ( isset( $data ) && strlen( $data ) > 0 ) ) { | |
// Error | |
echo "Error: preg_replace returns nothing\n"; | |
if ( function_exists( 'preg_last_error' ) ) { | |
$last_error = preg_last_error(); | |
echo "preg_last_error() = {$last_error}\n"; | |
} | |
echo "{$input}\n\n"; | |
exit; | |
} | |
if ( isset( $argv[2] ) ) { | |
$output = $argv[2]; | |
} else { | |
$output = $input; | |
} | |
$result = file_put_contents( $output, $data ); | |
// Write file data | |
if ( false === $result ) { | |
// Error | |
echo "Error: can't write fixed content\n"; | |
echo "{$output}\n\n"; | |
exit; | |
} | |
echo "Found strings: {$processed}\n"; | |
echo "Fixed: {$fixed}\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment