seeschloss · December 19, 2015 12:49
diff --git a/serialization_sql_dump_cleaner.sh b/serialization_sql_dump_cleaner.sh
 #!/bin/bash
 ##################################################################
 # Licensed under GNU GPL v3                                      #
 # [email protected]                                          #
 #                                                                #
 # DNS replacement scrpit in SQL dumps containing (also) PHP      #
 # serialized strings.                                            #
 # This script use bash and perl' perl is used to increment       #
 # serialized string length while performing DNS replacement      #
 # It also use sed for other basic DNS replacements               #
 #                                                                #
 # This is a modified version from                                #
 # https://gist.github.com/regilero/5885056 which takes old and   #
 # new DNS names from command-line arguments, and more            #
 # importantly takes the SQL dump to modify from stdin, in order  #
 # to allow chaining to workaround the subdomain replacement      #
 # problem. For example, going from name.tld to new.name.tld:     #
 #                                                                #
 # cat dump.sql | \                                               #
 #  serialization_sql_dump_cleaner.sh "name.tld" "fake.tld" | \   #
 #  serialization_sql_dump_cleaner.sh "fake.tld" "new.name.tld" \ #
 #  > fixed-dump.sql                                              #
 #                                                                #
 # Keep in mind that your dump will be copied to a temporary file #
 # so if it is more than 100 MB or so, this will make a lot of    #
 # writes and might even fill up your /tmp folder if you have a   #
 # lot of chaining going on.                                      #
 ##################################################################

 function log() {
  echo "$@" > /dev/stderr
 }

 # Test arguments.
 if [ -z "$1" -o -z "$2" ]
 then
  log "Usage: $0 <old domain> <new domain>";
  log "  Replaces all occurences of <old domain> in Drupal SQL dump";
  log "  to <new domain> taking into account PHP serialized strings.";
  exit 1;
 fi

 OLD_DNS="$1"
 NEW_DNS="$2"

 # Replace . by \. so perl will not interpret dots
 ESCAPED_NEW_DNS=${NEW_DNS//\./\\.}
 ESCAPED_OLD_DNS=${OLD_DNS//\./\\.}

 # Test new DNS is not a subdomain, infinite loops in perl replacements
 if [[ "${NEW_DNS}" == *${OLD_DNS}* ]]
 then
  log "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!";
  exit 1;
 fi

 SED=`which sed`;
 if [ ! ${SED} ]; then
    log "Error: 'sed' command not found."
    exit 1;
 fi

 PERL=`which perl`;
 if [ ! ${PERL} ]; then
    log "Error: 'perl' command not found."
    exit 1;
 fi

 DUMP_FILE="/dev/stdin"

 TEMP_FILE="$(mktemp)"; trap 'rm "${TEMP_FILE}"' EXIT

 cat > "${TEMP_FILE}" < "${DUMP_FILE}"

 LEN1=${#OLD_DNS}
 LEN2=${#NEW_DNS}
 DIRECTION=$((LEN2>LEN1))
 COUNT=$((LEN2-LEN1))
 if [[ $COUNT -eq 0 ]]; then
    log "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement"
 else
    
    NB=`grep -F -c "${OLD_DNS}" < ${TEMP_FILE}`;
    if [ "0" != "${NB}" ]; then
        log "Found ${NB} lines matching at least once ${OLD_DNS} in this file"
        log "Starting serialized content inline replacement in dump with string lenght increment..."
        # $1 : «([;|{]s:)» : detect start of serialized string with «;s:» or «{s:»
        # $2 : «([0-9]+)» : the serialized string length numbers
        # «:\\"» : start of the string with «\";»
        # $3 and $4 : «(((?!\\";).)*?)» :   (?!\\";) means not the substring «\";», ((XX.)*?) so here we match everything (.*) which does not contain this substring and the last ? means
        # here a small bug $4 contains the last matched char. unused.
        # non greedy, se we take the shortest match
        # $5 : «('${OLD_DNS2}')» :  finally it is (foo\.example\.com)' matching the DNS to replace
        # $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest)
        # it is OK as next pattern is closing the serialized string.
        # non greedy: i.e. regular mode is match as much as you can contain in backward mode,
        # in non greedy is match the smallest way still working
        # it will make the match as small as possible, and next pattern will match the 1st end of serialized string available
        # «\\";» last pattern is end of serialized string
        # problem is that $6 contains the rest of string after 1st old DNS match.
        # This string may contain other occurrences of old DNS
        # and replacement should be done several times until nothing more happens, nb of replacement is 
        # returned by the s// pattern, so we loop until nothing more happens with the «l while»
        # TEST with: perl -n -pe'$C+=s#([;|{]s:)([0-9]+):\\"(((?!\\";).)*?)('${OLD_DNS2}')(.*?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt
        ${PERL} -n -p -i -e '$rgx=qr/([;|{]s:)([0-9]+):\\"(((?!\\";).)*?)('${ESCAPED_OLD_DNS}')(.*?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${TEMP_FILE}
        log "Done with serialized strings"
    else
        log "${OLD_DNS} not found in file, quite certainly nothing to be done."
    fi
 fi
 NB=`grep -F -c "${OLD_DNS}" ${TEMP_FILE}`;
 if [ "0" != "${NB}" ]; then
    log "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: ";
    ${SED} "s#${OLD_DNS}#${NEW_DNS}#g" < "${TEMP_FILE}"
 fi
 log "Everything Done";
 exit 0;
	#!/bin/bash
	##################################################################
	# Licensed under GNU GPL v3 #
	# [email protected] #
	# #
	# DNS replacement scrpit in SQL dumps containing (also) PHP #
	# serialized strings. #
	# This script use bash and perl' perl is used to increment #
	# serialized string length while performing DNS replacement #
	# It also use sed for other basic DNS replacements #
	# #
	# This is a modified version from #
	# https://gist.github.com/regilero/5885056 which takes old and #
	# new DNS names from command-line arguments, and more #
	# importantly takes the SQL dump to modify from stdin, in order #
	# to allow chaining to workaround the subdomain replacement #
	# problem. For example, going from name.tld to new.name.tld: #
	# #
	# cat dump.sql \| \ #
	# serialization_sql_dump_cleaner.sh "name.tld" "fake.tld" \| \ #
	# serialization_sql_dump_cleaner.sh "fake.tld" "new.name.tld" \ #
	# > fixed-dump.sql #
	# #
	# Keep in mind that your dump will be copied to a temporary file #
	# so if it is more than 100 MB or so, this will make a lot of #
	# writes and might even fill up your /tmp folder if you have a #
	# lot of chaining going on. #
	##################################################################

	function log() {
	echo "$@" > /dev/stderr
	}

	# Test arguments.
	if [ -z "$1" -o -z "$2" ]
	then
	log "Usage: $0 <old domain> <new domain>";
	log " Replaces all occurences of <old domain> in Drupal SQL dump";
	log " to <new domain> taking into account PHP serialized strings.";
	exit 1;
	fi

	OLD_DNS="$1"
	NEW_DNS="$2"

	# Replace . by \. so perl will not interpret dots
	ESCAPED_NEW_DNS=${NEW_DNS//\./\\.}
	ESCAPED_OLD_DNS=${OLD_DNS//\./\\.}

	# Test new DNS is not a subdomain, infinite loops in perl replacements
	if [[ "${NEW_DNS}" == ${OLD_DNS} ]]
	then
	log "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!";
	exit 1;
	fi

	SED=`which sed`;
	if [ ! ${SED} ]; then
	log "Error: 'sed' command not found."
	exit 1;
	fi

	PERL=`which perl`;
	if [ ! ${PERL} ]; then
	log "Error: 'perl' command not found."
	exit 1;
	fi

	DUMP_FILE="/dev/stdin"

	TEMP_FILE="$(mktemp)"; trap 'rm "${TEMP_FILE}"' EXIT

	cat > "${TEMP_FILE}" < "${DUMP_FILE}"

	LEN1=${#OLD_DNS}
	LEN2=${#NEW_DNS}
	DIRECTION=$((LEN2>LEN1))
	COUNT=$((LEN2-LEN1))
	if [[ $COUNT -eq 0 ]]; then
	log "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement"
	else

	NB=`grep -F -c "${OLD_DNS}" < ${TEMP_FILE}`;
	if [ "0" != "${NB}" ]; then
	log "Found ${NB} lines matching at least once ${OLD_DNS} in this file"
	log "Starting serialized content inline replacement in dump with string lenght increment..."
	# $1 : «([;\|{]s:)» : detect start of serialized string with «;s:» or «{s:»
	# $2 : «([0-9]+)» : the serialized string length numbers
	# «:\\"» : start of the string with «\";»
	# $3 and $4 : «(((?!\\";).)?)» : (?!\\";) means not the substring «\";», ((XX.)?) so here we match everything (.*) which does not contain this substring and the last ? means
	# here a small bug $4 contains the last matched char. unused.
	# non greedy, se we take the shortest match
	# $5 : «('${OLD_DNS2}')» : finally it is (foo\.example\.com)' matching the DNS to replace
	# $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest)
	# it is OK as next pattern is closing the serialized string.
	# non greedy: i.e. regular mode is match as much as you can contain in backward mode,
	# in non greedy is match the smallest way still working
	# it will make the match as small as possible, and next pattern will match the 1st end of serialized string available
	# «\\";» last pattern is end of serialized string
	# problem is that $6 contains the rest of string after 1st old DNS match.
	# This string may contain other occurrences of old DNS
	# and replacement should be done several times until nothing more happens, nb of replacement is
	# returned by the s// pattern, so we loop until nothing more happens with the «l while»
	# TEST with: perl -n -pe'$C+=s#([;\|{]s:)([0-9]+):\\"(((?!\\";).)?)('${OLD_DNS2}')(.?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt
	${PERL} -n -p -i -e '$rgx=qr/([;\|{]s:)([0-9]+):\\"(((?!\\";).)?)('${ESCAPED_OLD_DNS}')(.?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${TEMP_FILE}
	log "Done with serialized strings"
	else
	log "${OLD_DNS} not found in file, quite certainly nothing to be done."
	fi
	fi
	NB=`grep -F -c "${OLD_DNS}" ${TEMP_FILE}`;
	if [ "0" != "${NB}" ]; then
	log "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: ";
	${SED} "s#${OLD_DNS}#${NEW_DNS}#g" < "${TEMP_FILE}"
	fi
	log "Everything Done";
	exit 0;