Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save seeschloss/5958033 to your computer and use it in GitHub Desktop.
Save seeschloss/5958033 to your computer and use it in GitHub Desktop.
#!/bin/bash
##################################################################
# Licensed under GNU GPL v3 #
# [email protected] #
# #
# DNS replacement scrpit in SQL dumps containing (also) PHP #
# serialized strings. #
# This script use bash and perl' perl is used to increment #
# serialized string length while performing DNS replacement #
# It also use sed for other basic DNS replacements #
# #
# This is a modified version from #
# https://gist.github.com/regilero/5885056 which takes old and #
# new DNS names from command-line arguments, and more #
# importantly takes the SQL dump to modify from stdin, in order #
# to allow chaining to workaround the subdomain replacement #
# problem. For example, going from name.tld to new.name.tld: #
# #
# cat dump.sql | \ #
# serialization_sql_dump_cleaner.sh "name.tld" "fake.tld" | \ #
# serialization_sql_dump_cleaner.sh "fake.tld" "new.name.tld" \ #
# > fixed-dump.sql #
# #
# Keep in mind that your dump will be copied to a temporary file #
# so if it is more than 100 MB or so, this will make a lot of #
# writes and might even fill up your /tmp folder if you have a #
# lot of chaining going on. #
##################################################################
function log() {
echo "$@" > /dev/stderr
}
# Test arguments.
if [ -z "$1" -o -z "$2" ]
then
log "Usage: $0 <old domain> <new domain>";
log " Replaces all occurences of <old domain> in Drupal SQL dump";
log " to <new domain> taking into account PHP serialized strings.";
exit 1;
fi
OLD_DNS="$1"
NEW_DNS="$2"
# Replace . by \. so perl will not interpret dots
ESCAPED_NEW_DNS=${NEW_DNS//\./\\.}
ESCAPED_OLD_DNS=${OLD_DNS//\./\\.}
# Test new DNS is not a subdomain, infinite loops in perl replacements
if [[ "${NEW_DNS}" == *${OLD_DNS}* ]]
then
log "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!";
exit 1;
fi
SED=`which sed`;
if [ ! ${SED} ]; then
log "Error: 'sed' command not found."
exit 1;
fi
PERL=`which perl`;
if [ ! ${PERL} ]; then
log "Error: 'perl' command not found."
exit 1;
fi
DUMP_FILE="/dev/stdin"
TEMP_FILE="$(mktemp)"; trap 'rm "${TEMP_FILE}"' EXIT
cat > "${TEMP_FILE}" < "${DUMP_FILE}"
LEN1=${#OLD_DNS}
LEN2=${#NEW_DNS}
DIRECTION=$((LEN2>LEN1))
COUNT=$((LEN2-LEN1))
if [[ $COUNT -eq 0 ]]; then
log "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement"
else
NB=`grep -F -c "${OLD_DNS}" < ${TEMP_FILE}`;
if [ "0" != "${NB}" ]; then
log "Found ${NB} lines matching at least once ${OLD_DNS} in this file"
log "Starting serialized content inline replacement in dump with string lenght increment..."
# $1 : «([;|{]s:)» : detect start of serialized string with «;s:» or «{s:»
# $2 : «([0-9]+)» : the serialized string length numbers
# «:\\"» : start of the string with «\";»
# $3 and $4 : «(((?!\\";).)*?)» : (?!\\";) means not the substring «\";», ((XX.)*?) so here we match everything (.*) which does not contain this substring and the last ? means
# here a small bug $4 contains the last matched char. unused.
# non greedy, se we take the shortest match
# $5 : «('${OLD_DNS2}')» : finally it is (foo\.example\.com)' matching the DNS to replace
# $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest)
# it is OK as next pattern is closing the serialized string.
# non greedy: i.e. regular mode is match as much as you can contain in backward mode,
# in non greedy is match the smallest way still working
# it will make the match as small as possible, and next pattern will match the 1st end of serialized string available
# «\\";» last pattern is end of serialized string
# problem is that $6 contains the rest of string after 1st old DNS match.
# This string may contain other occurrences of old DNS
# and replacement should be done several times until nothing more happens, nb of replacement is
# returned by the s// pattern, so we loop until nothing more happens with the «l while»
# TEST with: perl -n -pe'$C+=s#([;|{]s:)([0-9]+):\\"(((?!\\";).)*?)('${OLD_DNS2}')(.*?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt
${PERL} -n -p -i -e '$rgx=qr/([;|{]s:)([0-9]+):\\"(((?!\\";).)*?)('${ESCAPED_OLD_DNS}')(.*?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${TEMP_FILE}
log "Done with serialized strings"
else
log "${OLD_DNS} not found in file, quite certainly nothing to be done."
fi
fi
NB=`grep -F -c "${OLD_DNS}" ${TEMP_FILE}`;
if [ "0" != "${NB}" ]; then
log "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: ";
${SED} "s#${OLD_DNS}#${NEW_DNS}#g" < "${TEMP_FILE}"
fi
log "Everything Done";
exit 0;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment