Last active
September 5, 2024 19:03
-
-
Save danieltomasz/e3be94d7f03ade2eb030edf63920e95a to your computer and use it in GitHub Desktop.
A simple script to count net changes in the git repository for the defined file extensions (useful for daily goals when writing thesis/articles)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Git Word Count Analyzer | |
# Copyright (c) 2024 Daniel Borek | |
# MIT License | |
# Inspired by https://www.gijsvandam.nl/post/measuring-your-writing-progress-with-a-git-word-count/ | |
# Configuration | |
# FILE_EXTENSIONS: Array of file extensions to analyze (e.g., "typ" "md") | |
# Separate multiple extensions with spaces | |
FILE_EXTENSIONS=("typ" "md" ) | |
# DAYS_TO_SEARCH: Number of days to analyze, starting from today | |
# The script will stop at the repository's creation date if reached | |
# Default is set to 1 week (7 days) | |
DAYS_TO_SEARCH=7 | |
# Function to calculate word changes for a single commit | |
calculate_word_changes() { | |
local sha=$1 | |
local parent_sha=$(git rev-parse $sha^@ 2>/dev/null || git hash-object -t tree /dev/null) | |
local file_patterns=() | |
for ext in "${FILE_EXTENSIONS[@]}"; do | |
file_patterns+=("*.$ext") | |
done | |
if [ "$parent_sha" = "$sha" ]; then | |
# This is the first commit | |
local added=$(git show --format= --name-only $sha | grep -E "\.($(IFS=\|; echo "${FILE_EXTENSIONS[*]}"))\$" | xargs cat | wc -w | xargs) | |
local deleted=0 | |
local duplicated=0 | |
else | |
local added=$(git diff --word-diff=porcelain $parent_sha..$sha -- "${file_patterns[@]}" | grep -e"^+[^+]" | wc -w | xargs) | |
local deleted=$(git diff --word-diff=porcelain $parent_sha..$sha -- "${file_patterns[@]}" | grep -e"^-[^-]" | wc -w | xargs) | |
local duplicated=$(git diff $parent_sha..$sha -- "${file_patterns[@]}" | grep -e"^+[^+]" -e"^-[^-]" | sed -e's/.//' | sort | uniq -d | wc -w | xargs) | |
fi | |
echo "$added $deleted $duplicated" | |
} | |
# Function to format numbers with leading spaces | |
format_number() { | |
printf "%6d" $1 | |
} | |
# Get the date of the earliest commit | |
earliest_date=$(git log --reverse --format=%ad --date=short | head -1) | |
# Print header | |
printf "%-10s %8s %8s %11s %11s\n" "Date" "Added" "Deleted" "Duplicated" "Net Change" | |
printf "%-10s %8s %8s %11s %11s\n" "----------" "--------" "--------" "-----------" "-----------" | |
for i in $(seq 0 $((DAYS_TO_SEARCH - 1))); do | |
# Calculate the date we're checking | |
check_date=$(date -v-${i}d +%Y-%m-%d) | |
# If we've gone past the earliest commit date, break the loop | |
if [[ "$check_date" < "$earliest_date" ]]; then | |
break | |
fi | |
j=$((i+1)) | |
next_date=$(date -v-${j}d +%Y-%m-%d) | |
# Get the list of commits for this day | |
commits=$(git rev-list --since="$next_date" --until="$check_date" main) | |
total_added=0 | |
total_deleted=0 | |
total_duplicated=0 | |
if [ -n "$commits" ]; then | |
for sha in $commits; do | |
if git diff-tree --no-commit-id --name-only -r $sha | grep -qE "\.($(IFS=\|; echo "${FILE_EXTENSIONS[*]}"))\$"; then | |
read added deleted duplicated <<< $(calculate_word_changes $sha) | |
total_added=$((total_added + added)) | |
total_deleted=$((total_deleted + deleted)) | |
total_duplicated=$((total_duplicated + duplicated)) | |
fi | |
done | |
fi | |
net_change=$((total_added - total_deleted)) | |
# Output the results for this day | |
printf "%-10s %8s %8s %11s " "$check_date" "$(format_number $total_added)" "$(format_number $total_deleted)" "$(format_number $total_duplicated)" | |
if [ $net_change -gt 0 ]; then | |
printf "%11s\n" "+$(format_number $net_change)" | |
elif [ $net_change -lt 0 ]; then | |
printf "%11s\n" "-$(format_number ${net_change#-})" | |
else | |
printf "%11s\n" "$(format_number 0)" | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment