Forked from regilero/serialization_sql_dump_cleaner.sh
Last active
December 19, 2015 12:49
-
-
Save seeschloss/5958033 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
################################################################## | |
# Licensed under GNU GPL v3 # | |
# [email protected] # | |
# # | |
# DNS replacement scrpit in SQL dumps containing (also) PHP # | |
# serialized strings. # | |
# This script use bash and perl' perl is used to increment # | |
# serialized string length while performing DNS replacement # | |
# It also use sed for other basic DNS replacements # | |
# # | |
# This is a modified version from # | |
# https://gist.github.com/regilero/5885056 which takes old and # | |
# new DNS names from command-line arguments, and more # | |
# importantly takes the SQL dump to modify from stdin, in order # | |
# to allow chaining to workaround the subdomain replacement # | |
# problem. For example, going from name.tld to new.name.tld: # | |
# # | |
# cat dump.sql | \ # | |
# serialization_sql_dump_cleaner.sh "name.tld" "fake.tld" | \ # | |
# serialization_sql_dump_cleaner.sh "fake.tld" "new.name.tld" \ # | |
# > fixed-dump.sql # | |
# # | |
# Keep in mind that your dump will be copied to a temporary file # | |
# so if it is more than 100 MB or so, this will make a lot of # | |
# writes and might even fill up your /tmp folder if you have a # | |
# lot of chaining going on. # | |
################################################################## | |
function log() { | |
echo "$@" > /dev/stderr | |
} | |
# Test arguments. | |
if [ -z "$1" -o -z "$2" ] | |
then | |
log "Usage: $0 <old domain> <new domain>"; | |
log " Replaces all occurences of <old domain> in Drupal SQL dump"; | |
log " to <new domain> taking into account PHP serialized strings."; | |
exit 1; | |
fi | |
OLD_DNS="$1" | |
NEW_DNS="$2" | |
# Replace . by \. so perl will not interpret dots | |
ESCAPED_NEW_DNS=${NEW_DNS//\./\\.} | |
ESCAPED_OLD_DNS=${OLD_DNS//\./\\.} | |
# Test new DNS is not a subdomain, infinite loops in perl replacements | |
if [[ "${NEW_DNS}" == *${OLD_DNS}* ]] | |
then | |
log "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!"; | |
exit 1; | |
fi | |
SED=`which sed`; | |
if [ ! ${SED} ]; then | |
log "Error: 'sed' command not found." | |
exit 1; | |
fi | |
PERL=`which perl`; | |
if [ ! ${PERL} ]; then | |
log "Error: 'perl' command not found." | |
exit 1; | |
fi | |
DUMP_FILE="/dev/stdin" | |
TEMP_FILE="$(mktemp)"; trap 'rm "${TEMP_FILE}"' EXIT | |
cat > "${TEMP_FILE}" < "${DUMP_FILE}" | |
LEN1=${#OLD_DNS} | |
LEN2=${#NEW_DNS} | |
DIRECTION=$((LEN2>LEN1)) | |
COUNT=$((LEN2-LEN1)) | |
if [[ $COUNT -eq 0 ]]; then | |
log "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement" | |
else | |
NB=`grep -F -c "${OLD_DNS}" < ${TEMP_FILE}`; | |
if [ "0" != "${NB}" ]; then | |
log "Found ${NB} lines matching at least once ${OLD_DNS} in this file" | |
log "Starting serialized content inline replacement in dump with string lenght increment..." | |
# $1 : «([;|{]s:)» : detect start of serialized string with «;s:» or «{s:» | |
# $2 : «([0-9]+)» : the serialized string length numbers | |
# «:\\"» : start of the string with «\";» | |
# $3 and $4 : «(((?!\\";).)*?)» : (?!\\";) means not the substring «\";», ((XX.)*?) so here we match everything (.*) which does not contain this substring and the last ? means | |
# here a small bug $4 contains the last matched char. unused. | |
# non greedy, se we take the shortest match | |
# $5 : «('${OLD_DNS2}')» : finally it is (foo\.example\.com)' matching the DNS to replace | |
# $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest) | |
# it is OK as next pattern is closing the serialized string. | |
# non greedy: i.e. regular mode is match as much as you can contain in backward mode, | |
# in non greedy is match the smallest way still working | |
# it will make the match as small as possible, and next pattern will match the 1st end of serialized string available | |
# «\\";» last pattern is end of serialized string | |
# problem is that $6 contains the rest of string after 1st old DNS match. | |
# This string may contain other occurrences of old DNS | |
# and replacement should be done several times until nothing more happens, nb of replacement is | |
# returned by the s// pattern, so we loop until nothing more happens with the «l while» | |
# TEST with: perl -n -pe'$C+=s#([;|{]s:)([0-9]+):\\"(((?!\\";).)*?)('${OLD_DNS2}')(.*?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt | |
${PERL} -n -p -i -e '$rgx=qr/([;|{]s:)([0-9]+):\\"(((?!\\";).)*?)('${ESCAPED_OLD_DNS}')(.*?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${TEMP_FILE} | |
log "Done with serialized strings" | |
else | |
log "${OLD_DNS} not found in file, quite certainly nothing to be done." | |
fi | |
fi | |
NB=`grep -F -c "${OLD_DNS}" ${TEMP_FILE}`; | |
if [ "0" != "${NB}" ]; then | |
log "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: "; | |
${SED} "s#${OLD_DNS}#${NEW_DNS}#g" < "${TEMP_FILE}" | |
fi | |
log "Everything Done"; | |
exit 0; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment