Skip to content

Instantly share code, notes, and snippets.

@fapestniegd
Created August 4, 2009 20:39
Show Gist options
  • Save fapestniegd/161530 to your computer and use it in GitHub Desktop.
Save fapestniegd/161530 to your computer and use it in GitHub Desktop.
#!/bin/bash
################################################################################
# de-duplicate all data under the ${BACKUPS} directory
# by hard-linking it to ${CASDIR}/<file's sha1sum>
################################################################################
BASE="/dedupe"
BACKUPS="${BASE}/backups"
CAS="${BASE}/backupcas"
if [ ! -d "${CAS}" ];then mkdir -p "${CAS}"; fi
for FQDN in $(ls ${BACKUPS});do
for TIME in $(ls ${BACKUPS}/${FQDN});do
echo "$FQDN $TIME"
if [ ! -f ${BACKUPS}/${FQDN}/${TIME}/.deduplication_complete ]; then
find "${BACKUPS}/${FQDN}/${TIME}" -type f | while read LINE ;do
SHA=$(/usr/bin/sha1sum "${LINE}"|/usr/bin/awk '{print $1}')
read INO USERNAME GROUP PERMS < <(/usr/bin/stat -c "%i %u %g %a" "${LINE}")
# Create the SHA if there isn't one
if [ ! -f "${CAS}/${SHA}" ];then
echo "Create: ${LINE}"
/bin/ln "${LINE}" "${CAS}/${SHA}"
/bin/chown root:root "${CAS}/${SHA}"
/bin/chmod 0600 "${CAS}/${SHA}"
else
read CASINO USERNAME GROUP PERMS < <(/usr/bin/stat -c "%i %u %g %a" "${CAS}/${SHA}")
if [ ${INO} -ne ${CASINO} ];then
# Un-link and re-link if it is a duplicate but reset the permissions
echo "Linked: ${LINE} "
/usr/bin/unlink "${LINE}"
/bin/ln "${CAS}/${SHA}" "${LINE}"
/bin/chown ${USERNAME}:${GROUP} "${LINE}"
/bin/chmod ${PERMS} "${LINE}"
else
# Otherwise tell us it is already deduped
echo "Exists: ${LINE} "
fi
fi
done
touch "${BACKUPS}/${FQDN}/${TIME}/.deduplication_complete"
fi
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment