Last active
June 6, 2018 16:08
-
-
Save Xowap/3708be0a6b3aec2f423a214613ca437e to your computer and use it in GitHub Desktop.
Search & Replace in PHP serialized values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from argparse import ArgumentParser | |
from sys import stdin, stdout | |
import re | |
SER_STR = re.compile(r"s:(\d+):(\\*\")(.*?)(\\*\");") | |
def parse_args(): | |
""" | |
Configure the arguments parser and parses the arguments. Returns the | |
parsing result. | |
""" | |
parser = ArgumentParser( | |
description='Replaces a string by another safely even if it lies ' | |
'within a PHP serialized value', | |
) | |
parser.add_argument('search') | |
parser.add_argument('replace') | |
return parser.parse_args() | |
def quoted_char(ref: str, char: str): | |
""" | |
Generates the quoted version of a given char based on what was observed | |
in the original string. | |
""" | |
return ref.replace('"', char) | |
def unquote(ref: str, s: str): | |
""" | |
Removes quoted chars and try to get the real string out. If there's \n or | |
\n in the middle, assume that they're from a higher level (since PHP | |
doesn't transform the strings) and replace them by the actual character. | |
The ref parameter gives an example of quoted char, to know how deep the | |
quoting is. | |
""" | |
s = s.replace(ref, '"') | |
s = s.replace(quoted_char(ref, "'"), "'") | |
replace_eol = False | |
qn = quoted_char(ref, 'n') | |
qr = quoted_char(ref, 'r') | |
if qn in s or qr in s: | |
s = s.replace('\\n', '\n') | |
s = s.replace('\\r', '\r') | |
replace_eol = True | |
return s, replace_eol | |
def quote(ref: str, s: str): | |
""" | |
Quote back the string the same way it was unquoted before | |
""" | |
s = s.replace('"', ref) | |
s = s.replace("'", ref.replace('"', "'")) | |
return s | |
def quote_eol(s: str): | |
""" | |
EOL quoting happens after the initial quoting because otherwise it would | |
not be possible to measure the string length from PHP's perspective (since | |
this quoting is done by mysqldump and not PHP's serialize()). | |
""" | |
s = s.replace('\n', '\\n') | |
s = s.replace('\r', '\\r') | |
return s | |
def replace_line(line, search, replace): | |
""" | |
Do the search and replace inside the line | |
1. Try to detect items inside a PHP serialized structure | |
2. Replace all remaining strings | |
This allows to replace both strings that are serialized and those who are | |
not. | |
""" | |
def replace_ser(_m): | |
""" | |
Do the actual replacement. The PHP serialized string structure has been | |
detected so we parse it, unquote stuff and check if the results are | |
consistent. If all right, do the replacement and then re-encode the | |
other way back. | |
""" | |
length = int(_m.group(1)) | |
quotes: str = _m.group(2) | |
val: str = _m.group(3) | |
unq_val, replace_eol = unquote(quotes, val) | |
val_len = len(unq_val.encode('utf-8')) | |
if val_len != length or quotes != _m.group(4): | |
return _m.group(0) | |
new_val = unq_val.replace(search, replace) | |
new_len = len(new_val.encode('utf-8')) | |
new_val = quote(quotes, new_val) | |
if replace_eol: | |
new_val = quote_eol(new_val) | |
return f's:{new_len}:{quotes}{new_val}{quotes};' | |
line = SER_STR.sub(replace_ser, line) | |
line = line.replace(search, replace) | |
return line | |
def main(): | |
args = parse_args() | |
for line in stdin.readlines(): | |
stdout.write(replace_line(line, args.search, args.replace)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment