Skip to content

Instantly share code, notes, and snippets.

@ewindisch
Created July 19, 2013 14:53
Show Gist options
  • Save ewindisch/6039682 to your computer and use it in GitHub Desktop.
Save ewindisch/6039682 to your computer and use it in GitHub Desktop.
Get the total number of results for a string match over time from a git repository. The example here uses 'def test_' to fetch time series data for the addition of unit tests to a code base.
#!/usr/bin/env python2.7
import itertools
import subprocess
import time
def soi(s):
#if isinstance(s, int):
# return s
return int(s.split(':')[-1].rstrip())
def count_pattern(pattern):
old_dates = set()
p = subprocess.Popen(['git', 'log', '-S', pattern],
stdout=subprocess.PIPE)
for line in p.stdout:
if not line.startswith('commit'):
continue
commit = line.split(' ')[-1].rstrip()
# Get the date -- usually only 2-3 lines following
for tries in range(5):
line = p.stdout.next()
if line.startswith('Date:'):
date = line.split(':', 1)[-1].lstrip().rstrip()
tries = 0
break
if tries > 0:
raise Exception("Date not found for commit.")
time.sleep(.05)
# Git-grep cannot take a ref-spec, so checkout the commit.
subprocess.Popen(['git', 'checkout', '-f', commit],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# Get pattern matches
m = subprocess.Popen(['git', 'grep', '-c', pattern],
stdout=subprocess.PIPE)
matches = sum(itertools.imap(soi, m.stdout))
# This should really apply the timezone, but we don't
# really need that level of precision here...
ts = time.strptime(date.rsplit(' ', 1)[0])
# year / mon / day formatting (easier to sort)
local_date = "%s/%s/%s" % (ts.tm_year, ts.tm_mon, ts.tm_mday)
if local_date in old_dates:
continue
old_dates.add(local_date)
print "%s,%s" % (local_date, matches)
count_pattern('def test_')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment