|
#!/bin/sh |
|
|
|
# Syntax: authors.sh [ PATH ] |
|
|
|
LC_CTYPE=C |
|
LANG=C |
|
|
|
# Get all files in HEAD, skipping the bootloaders directory |
|
files=$(git ls-tree -r --name-only HEAD "$1" | grep -v "^bootloaders" | grep -v "^firmwares" | xargs grep -Il "") |
|
|
|
# Get the list of authors with e-mail addresses |
|
IFS=$'\n' |
|
authors=$(for f in $files; do |
|
git blame -w -C --porcelain "$f" | sed -n \ |
|
-e '/^author /{ s/^author //; s/,/ /g; h; }' \ |
|
-e '/^author-mail /{ s/^author-mail //; H; x; s/[<>]//g; s/\n/,/p; }' |
|
git log "$f" | grep Co-authored-by | sed -Ee 's/[[:space:]]*Co-authored-by: (.+) <(.+)>/\1,\2/' |
|
done) |
|
|
|
# Remove duplicates based on e-mail address |
|
authors=$(printf "%s" "$authors" | sort -u -t, -k2,2) |
|
|
|
# Get the count of relevant contributions for each author |
|
# Relevant contributions are the total number of added or changed lines |
|
# belonging to chunks of at least 3 lines. Please note that even a |
|
# one-line contribution might be copyrightable so this threshold is |
|
# aribtrary. YMMV. |
|
# Also, this may raise false positives because it also considers |
|
# contributions that are not visible in HEAD anymore. |
|
authors=$(for a in $authors; do |
|
printf "%s," "$a" |
|
email=$(printf "$a" | cut -d, -f2) |
|
git log --author="$email" --pretty=tformat: --numstat "$1" | awk 'BEGIN {s=0} $1 > 2 {s+=$1} END {print s}' |
|
done | sort -t, -k3,3 -nr) |
|
|
|
# Convert commas to tabs |
|
# (It was much easier to use commas in the above commands) |
|
authors=$(printf "%s" "$authors" | tr , $'\t') |
|
|
|
echo "$authors" |
The script does not consider that IP may later be moved to a different file by another author:
Moving the IP from one file to another doesn't change the fact that "Foo Contributor <foo@example.com>" is still a copyright holder, yet the script does not recognize them as such.