Created
October 25, 2013 12:37
-
-
Save Makistos/7154021 to your computer and use it in GitHub Desktop.
This script will calculate how many lines of code a company or organization has submitted to a Git repo for a single revision. This is done using git blame with emails. Emali address is used to filter the lines to produce source code files that only contain lines by selected company. Blame output is then removed from these files and the resultin…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
TEMP_DIR=`mktemp -d` | |
# Show counter if number of files is greater than this | |
COUNTER_VISIBLE=1 | |
COMPANY="" | |
SCRIPT=`basename $0` | |
EXCLUDE_FILE="" # Name of exclude list file | |
TOOL=1 # Default "cloc" | |
FILTER="\.[ch]p{0,2}$" # Default file filter | |
function help { | |
echo | |
echo "Usage: $SCRIPT [options]" | |
echo " Options:" | |
echo " --all Analyze ALL files, no filtering is done." | |
echo " --exlude-list= File containing exclude list (see comment below)." | |
echo " -c, --company= Company name as shown in email address (REQUIRED)." | |
echo " -f, --file-filter= File filter. Default \.[ch]p{0,2}$ which includes C and C++ files." | |
echo " -t. --tool LOC count tool to use. 1 = cloc (default), 2 = sloccount." | |
echo " -h, --help This help text." | |
echo | |
echo " Exclude list can be used to exclude files or directories from | |
the analysis. Simply list them relative to the root of the repository | |
directory." | |
echo | |
echo " Script requires a relatively new version of Git that supports --show-email." | |
echo | |
} | |
while test $# -gt 0; do | |
case "$1" in | |
-h) | |
help | |
exit 1 | |
;; | |
--help) | |
help | |
exit 1 | |
;; | |
--all) | |
FILTER="" | |
shift | |
;; | |
-f) | |
shift | |
if test $# -gt 0; then | |
FILTER=$1 | |
fi | |
shift | |
;; | |
--file-filter*) | |
FILTER=`echo $1|sed -e 's/^[^=]*=/g'` | |
;; | |
-c) | |
shift | |
if test $# -gt 0; then | |
COMPANY=$1 | |
fi | |
shift | |
;; | |
--company*) | |
COMPANY=`echo $1|sed -e 's/^[^=]*=//g'` | |
shift | |
;; | |
--exclude-list*) | |
if [ $# -gt 0 ]; then | |
EXCLUDE_FILE=`echo $1|sed -e 's/^[^=]*=//g'` | |
else | |
echo "Exclude file missing!" | |
exit 1 | |
fi | |
shift | |
;; | |
-t) | |
shift | |
if test $# -gt 0; then | |
TOOL=$1 | |
fi | |
shift | |
;; | |
*) | |
break; | |
;; | |
esac | |
done | |
if [ "$COMPANY" == "" ]; then | |
echo "Company name missing, exiting." | |
echo "Try $SCRIPT -h" | |
exit 1 | |
fi | |
echo "Copying directory structure to $TEMP_DIR." | |
cp -r * $TEMP_DIR | |
find $TEMP_DIR -type f -exec rm "{}" \; | |
# Get list of authors from company (emails only) | |
echo "Creating list of authors into authors.txt..." | |
git log --oneline --format="%ae" |grep $COMPANY |sort |uniq > authors.txt | |
# Create blame file of all source code | |
echo "Creating list of files..." | |
git ls-files --full-name > $TEMP_DIR/file-list.txt | |
TOTAL_FILES=`wc -l < $TEMP_DIR/file-list.txt` | |
if [ "$FILTER" != "" ]; then | |
# Only include code files (c, h, cpp, hpp) | |
egrep -e $FILTER < $TEMP_DIR/file-list.txt > $TEMP_DIR/file-list.tmp | |
mv $TEMP_DIR/file-list.tmp $TEMP_DIR/file-list.txt | |
fi | |
INCLUDED_FILES=`wc -l < $TEMP_DIR/file-list.txt` | |
echo "Total number of files: $TOTAL_FILES" | |
echo "Total number of files to analyze: $INCLUDED_FILES" | |
echo | |
echo "Removing files with no edits by $COMPANY..." | |
counter=1 | |
while read file | |
do | |
if [ $INCLUDED_FILES -gt $COUNTER_VISIBLE ]; then | |
echo -en "\r$counter / $INCLUDED_FILES ($file) " | |
fi | |
# Only include files that have edits by company | |
BY_COMPANY=`git log --oneline --format="%ae" "$file" |grep -i $COMPANY` | |
if [ "$BY_COMPANY" != "" ]; then | |
echo $file >> $TEMP_DIR/file-list.tmp | |
fi | |
counter=`expr $counter + 1` | |
done < $TEMP_DIR/file-list.txt | |
echo | |
if [ -e $TEMP_DIR/file-list.tmp ]; then | |
mv $TEMP_DIR/file-list.tmp $TEMP_DIR/file-list.txt | |
fi | |
# Remove temp files | |
CURR_DIR=`pwd` | |
if [ "$EXCLUDE_FILE" != "" ]; then | |
echo "Handling exclude list..." | |
grep -vFf $EXCLUDE_FILE $TEMP_DIR/file-list.txt > $TEMP_DIR/file-filtered.txt | |
mv $TEMP_DIR/file-filtered.txt $TEMP_DIR/file-list.txt | |
fi | |
COMPANY_FILES=`wc -l < $TEMP_DIR/file-list.txt` | |
echo "Files removed: `expr $INCLUDED_FILES - $COMPANY_FILES`" | |
echo | |
# For each file, find lines of code added or edited by company employee and | |
# only copy those lines to the temporary directory. | |
echo "Removing code not written by $COMPANY.." | |
counter=1 | |
while read file | |
do | |
if [ $COMPANY_FILES -gt $COUNTER_VISIBLE ]; then | |
echo -en "\r$counter / $COMPANY_FILES ($file) " | |
fi | |
git blame -w --show-email "$file" > "$TEMP_DIR/$file.back" | |
grep -Ff authors.txt "$TEMP_DIR/$file.back" > "$TEMP_DIR/$file.back2" | |
# Remove git blame stuff | |
sed 's/\w*\s(<.*>\s*\w*-\w*-\w*\s\w*:\w*\w*:\w*\s+\w*\s*\w*)//' < "$TEMP_DIR/$file.back2" > "$TEMP_DIR/$file" | |
counter=`expr $counter + 1` | |
done < $TEMP_DIR/file-list.txt | |
echo | |
echo | |
if [ "$EXCLUDE_FILE" != "" ]; then | |
cp $EXCLUDE_FILE $TEMP_DIR | |
fi | |
cd $TEMP_DIR | |
find $TEMP_DIR -name "*.back*" -exec rm "{}" \; | |
echo "Running analysis" | |
echo "----------------" | |
# Count lines & print results | |
if [ $TOOL -eq 2 ]; then | |
if [ "$EXCLUDE_FILE" != "" ]; then | |
echo "Handling exclude list..." | |
while read file | |
do | |
if [ -d "$file" ]; then | |
echo "Removing dir $file" | |
rm -rf $file | |
else | |
echo "Removing $file" | |
rm $file | |
fi | |
done < $EXCLUDE_FILE | |
fi | |
sloccount . | |
else | |
cloc --list-file $TEMP_DIR/file-list.txt | |
fi | |
cd $CURR_DIR |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment