regilero · September 1, 2016 13:01 · regilero · Jun 28, 2013 · regilero · Oct 7, 2014
diff --git a/serialization_sql_dump_cleaner.sh b/serialization_sql_dump_cleaner.sh
 #!/bin/bash
 ##################################################################
 # Licensed under GNU GPL v3                                      #
 # regis.leroy@gmail.com                                          #
 #                                                                #
 # DNS replacement scrpit in SQL dumps containing (also) PHP      #
 # serialized strings.                                            #
 # This script use bash and perl' perl is used to increment       #
 # serialized string length while performing DNS replacement      #
 # It also use sed for other basic DNS replacements               #
 ##################################################################

 ###### CONFIG ####################################################
 OLD_DNS="to.be.replaced.com"
 NEW_DNS="foobar.titi.example.com"
 # source file to cleanup
 DUMP_FILE="exemple.txt"
 ###### END CONFIG ################################################

 # Replace . by \. so perl will not interpret dots
 ESCAPED_NEW_DNS=${NEW_DNS//\./\\.}
 ESCAPED_OLD_DNS=${OLD_DNS//\./\\.}

 # Test new DNS is not a subdomain, infinite loops in perl replacements
 if [[ "${NEW_DNS}" == *${OLD_DNS}* ]]
 then
  echo "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!";
  exit 1;
 fi

 SED=`which sed`;
 if [ ! ${SED} ]; then
    echo "Error: 'sed' command not found."
    exit 1;
 fi

 PERL=`which perl`;
 if [ ! ${PERL} ]; then
    echo "Error: 'perl' command not found."
    exit 1;
 fi

 LEN1=${#OLD_DNS}
 LEN2=${#NEW_DNS}
 DIRECTION=$((LEN2>LEN1))
 COUNT=$((LEN2-LEN1))
 if [[ $COUNT -eq 0 ]]; then
    echo "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement"
 else
    
    NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
    if [ "0" != "${NB}" ]; then
        echo "Found ${NB} lines matching at least once ${OLD_DNS} in this file"
        echo "Starting serialized content inline replacement in dump with string lenght increment..."
        # $1 : «([;|{]s:)» : detect start of serialized string with «;s:» or «{s:» or «s:»
        # $2 : «([0-9]+)» : the serialized string length numbers
        # «:\\"» : start of the string with «\";»
        # $3 and $4 : «(((?!\\";).)*?)» :   (?!\\";) means not the substring «\";», ((XX.)*?) so here we match everything (.*) which does not contain this substring and the last ? means
        # here a small bug $4 contains the last matched char. unused.
        # non greedy, se we take the shortest match
        # $5 : «('${OLD_DNS2}')» :  finally it is (foo\.example\.com)' matching the DNS to replace
        # $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest)
        # it is OK as next pattern is closing the serialized string.
        # non greedy: i.e. regular mode is match as much as you can contain in backward mode,
        # in non greedy is match the smallest way still working
        # it will make the match as small as possible, and next pattern will match the 1st end of serialized string available
        # «\\";» last pattern is end of serialized string
        # problem is that $6 contains the rest of string after 1st old DNS match.
        # This string may contain other occurrences of old DNS
        # and replacement should be done several times until nothing more happens, nb of replacement is 
        # returned by the s// pattern, so we loop until nothing more happens with the «l while»
        # TEST with: perl -n -pe '$C+=s#([;|{]?s:)([0-9]+):\\"(((?!\\";).)*?)('${OLD_DNS2}')(.*?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt
        ${PERL} -n -p -i -e '$rgx=qr/([;|{]?s:)([0-9]+):\\"(((?!\\";).)*?)('${ESCAPED_OLD_DNS}')(.*?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${DUMP_FILE}
        echo "Done with serialized strings"
    else
        echo "${OLD_DNS} not found in file, quite certainly nothing to be done."
    fi
 fi
 NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
 if [ "0" != "${NB}" ]; then
    echo "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: ";
    `${SED} -i "s#${OLD_DNS}#${NEW_DNS}#g" ${DUMP_FILE}`
 fi
 echo "Everything Done";
 exit 0;
	#!/bin/bash
	##################################################################
	# Licensed under GNU GPL v3 #
	# regis.leroy@gmail.com #
	# #
	# DNS replacement scrpit in SQL dumps containing (also) PHP #
	# serialized strings. #
	# This script use bash and perl' perl is used to increment #
	# serialized string length while performing DNS replacement #
	# It also use sed for other basic DNS replacements #
	##################################################################

	###### CONFIG ####################################################
	OLD_DNS="to.be.replaced.com"
	NEW_DNS="foobar.titi.example.com"
	# source file to cleanup
	DUMP_FILE="exemple.txt"
	###### END CONFIG ################################################

	# Replace . by \. so perl will not interpret dots
	ESCAPED_NEW_DNS=${NEW_DNS//\./\\.}
	ESCAPED_OLD_DNS=${OLD_DNS//\./\\.}

	# Test new DNS is not a subdomain, infinite loops in perl replacements
	if [[ "${NEW_DNS}" == ${OLD_DNS} ]]
	then
	echo "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!";
	exit 1;
	fi

	SED=`which sed`;
	if [ ! ${SED} ]; then
	echo "Error: 'sed' command not found."
	exit 1;
	fi

	PERL=`which perl`;
	if [ ! ${PERL} ]; then
	echo "Error: 'perl' command not found."
	exit 1;
	fi

	LEN1=${#OLD_DNS}
	LEN2=${#NEW_DNS}
	DIRECTION=$((LEN2>LEN1))
	COUNT=$((LEN2-LEN1))
	if [[ $COUNT -eq 0 ]]; then
	echo "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement"
	else

	NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
	if [ "0" != "${NB}" ]; then
	echo "Found ${NB} lines matching at least once ${OLD_DNS} in this file"
	echo "Starting serialized content inline replacement in dump with string lenght increment..."
	# $1 : «([;\|{]s:)» : detect start of serialized string with «;s:» or «{s:» or «s:»
	# $2 : «([0-9]+)» : the serialized string length numbers
	# «:\\"» : start of the string with «\";»
	# $3 and $4 : «(((?!\\";).)?)» : (?!\\";) means not the substring «\";», ((XX.)?) so here we match everything (.*) which does not contain this substring and the last ? means
	# here a small bug $4 contains the last matched char. unused.
	# non greedy, se we take the shortest match
	# $5 : «('${OLD_DNS2}')» : finally it is (foo\.example\.com)' matching the DNS to replace
	# $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest)
	# it is OK as next pattern is closing the serialized string.
	# non greedy: i.e. regular mode is match as much as you can contain in backward mode,
	# in non greedy is match the smallest way still working
	# it will make the match as small as possible, and next pattern will match the 1st end of serialized string available
	# «\\";» last pattern is end of serialized string
	# problem is that $6 contains the rest of string after 1st old DNS match.
	# This string may contain other occurrences of old DNS
	# and replacement should be done several times until nothing more happens, nb of replacement is
	# returned by the s// pattern, so we loop until nothing more happens with the «l while»
	# TEST with: perl -n -pe '$C+=s#([;\|{]?s:)([0-9]+):\\"(((?!\\";).)?)('${OLD_DNS2}')(.?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt
	${PERL} -n -p -i -e '$rgx=qr/([;\|{]?s:)([0-9]+):\\"(((?!\\";).)?)('${ESCAPED_OLD_DNS}')(.?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${DUMP_FILE}
	echo "Done with serialized strings"
	else
	echo "${OLD_DNS} not found in file, quite certainly nothing to be done."
	fi
	fi
	NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
	if [ "0" != "${NB}" ]; then
	echo "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: ";
	`${SED} -i "s#${OLD_DNS}#${NEW_DNS}#g" ${DUMP_FILE}`
	fi
	echo "Everything Done";
	exit 0;
No results found