Last active
February 6, 2024 10:45
-
-
Save papachristoumarios/8fb2ab391b54605a650f91662dfaa99f to your computer and use it in GitHub Desktop.
Anonymize source code for blind review paper submission with sed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Anonymization of source code for blind review paper submission | |
# using sed to replace words with XXX. It also removes the .git | |
# directory in the anonymized project to avoid exposing any | |
# sensitive information about the author(s). | |
# | |
# Usage: anonymize.sh /path/to/project words_to_anonymize.txt | |
# words_to_anonymize.txt contains a word in each line | |
# | |
# Author: Marios Papachristou | |
PROJECT=$1 | |
ANON_FILE=$2 | |
ANON_NAME=`cat /dev/urandom | tr -cd 'a-f0-9' | head -c 32` | |
ANON_WORDS=`cat $ANON_FILE` | |
echo "Copying $PROJECT to $ANON_NAME" && | |
cp -r $PROJECT $ANON_NAME && | |
for repl in $ANON_WORDS; do | |
find "$ANON_NAME" -type f -print0 | | |
xargs -0 sed -ri "s/$repl/XXX/g" | |
done | |
rm -rf $ANON_NAME/.git && | |
tar -zcvf $ANON_NAME.tar.gz $ANON_NAME && | |
rm -rf $ANON_NAME |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment