Skip to content

Instantly share code, notes, and snippets.

@danieltomasz
Last active September 5, 2024 19:03
Show Gist options
  • Save danieltomasz/e3be94d7f03ade2eb030edf63920e95a to your computer and use it in GitHub Desktop.
Save danieltomasz/e3be94d7f03ade2eb030edf63920e95a to your computer and use it in GitHub Desktop.
A simple script to count net changes in the git repository for the defined file extensions (useful for daily goals when writing thesis/articles)
#!/bin/bash
# Git Word Count Analyzer
# Copyright (c) 2024 Daniel Borek
# MIT License
# Inspired by https://www.gijsvandam.nl/post/measuring-your-writing-progress-with-a-git-word-count/
# Configuration
# FILE_EXTENSIONS: Array of file extensions to analyze (e.g., "typ" "md")
# Separate multiple extensions with spaces
FILE_EXTENSIONS=("typ" "md" )
# DAYS_TO_SEARCH: Number of days to analyze, starting from today
# The script will stop at the repository's creation date if reached
# Default is set to 1 week (7 days)
DAYS_TO_SEARCH=7
# Function to calculate word changes for a single commit
calculate_word_changes() {
local sha=$1
local parent_sha=$(git rev-parse $sha^@ 2>/dev/null || git hash-object -t tree /dev/null)
local file_patterns=()
for ext in "${FILE_EXTENSIONS[@]}"; do
file_patterns+=("*.$ext")
done
if [ "$parent_sha" = "$sha" ]; then
# This is the first commit
local added=$(git show --format= --name-only $sha | grep -E "\.($(IFS=\|; echo "${FILE_EXTENSIONS[*]}"))\$" | xargs cat | wc -w | xargs)
local deleted=0
local duplicated=0
else
local added=$(git diff --word-diff=porcelain $parent_sha..$sha -- "${file_patterns[@]}" | grep -e"^+[^+]" | wc -w | xargs)
local deleted=$(git diff --word-diff=porcelain $parent_sha..$sha -- "${file_patterns[@]}" | grep -e"^-[^-]" | wc -w | xargs)
local duplicated=$(git diff $parent_sha..$sha -- "${file_patterns[@]}" | grep -e"^+[^+]" -e"^-[^-]" | sed -e's/.//' | sort | uniq -d | wc -w | xargs)
fi
echo "$added $deleted $duplicated"
}
# Function to format numbers with leading spaces
format_number() {
printf "%6d" $1
}
# Get the date of the earliest commit
earliest_date=$(git log --reverse --format=%ad --date=short | head -1)
# Print header
printf "%-10s %8s %8s %11s %11s\n" "Date" "Added" "Deleted" "Duplicated" "Net Change"
printf "%-10s %8s %8s %11s %11s\n" "----------" "--------" "--------" "-----------" "-----------"
for i in $(seq 0 $((DAYS_TO_SEARCH - 1))); do
# Calculate the date we're checking
check_date=$(date -v-${i}d +%Y-%m-%d)
# If we've gone past the earliest commit date, break the loop
if [[ "$check_date" < "$earliest_date" ]]; then
break
fi
j=$((i+1))
next_date=$(date -v-${j}d +%Y-%m-%d)
# Get the list of commits for this day
commits=$(git rev-list --since="$next_date" --until="$check_date" main)
total_added=0
total_deleted=0
total_duplicated=0
if [ -n "$commits" ]; then
for sha in $commits; do
if git diff-tree --no-commit-id --name-only -r $sha | grep -qE "\.($(IFS=\|; echo "${FILE_EXTENSIONS[*]}"))\$"; then
read added deleted duplicated <<< $(calculate_word_changes $sha)
total_added=$((total_added + added))
total_deleted=$((total_deleted + deleted))
total_duplicated=$((total_duplicated + duplicated))
fi
done
fi
net_change=$((total_added - total_deleted))
# Output the results for this day
printf "%-10s %8s %8s %11s " "$check_date" "$(format_number $total_added)" "$(format_number $total_deleted)" "$(format_number $total_duplicated)"
if [ $net_change -gt 0 ]; then
printf "%11s\n" "+$(format_number $net_change)"
elif [ $net_change -lt 0 ]; then
printf "%11s\n" "-$(format_number ${net_change#-})"
else
printf "%11s\n" "$(format_number 0)"
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment