Created
July 19, 2013 14:53
-
-
Save ewindisch/6039682 to your computer and use it in GitHub Desktop.
Get the total number of results for a string match over time from a git repository. The example here uses 'def test_' to fetch time series data for the addition of unit tests to a code base.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
import itertools | |
import subprocess | |
import time | |
def soi(s): | |
#if isinstance(s, int): | |
# return s | |
return int(s.split(':')[-1].rstrip()) | |
def count_pattern(pattern): | |
old_dates = set() | |
p = subprocess.Popen(['git', 'log', '-S', pattern], | |
stdout=subprocess.PIPE) | |
for line in p.stdout: | |
if not line.startswith('commit'): | |
continue | |
commit = line.split(' ')[-1].rstrip() | |
# Get the date -- usually only 2-3 lines following | |
for tries in range(5): | |
line = p.stdout.next() | |
if line.startswith('Date:'): | |
date = line.split(':', 1)[-1].lstrip().rstrip() | |
tries = 0 | |
break | |
if tries > 0: | |
raise Exception("Date not found for commit.") | |
time.sleep(.05) | |
# Git-grep cannot take a ref-spec, so checkout the commit. | |
subprocess.Popen(['git', 'checkout', '-f', commit], | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
# Get pattern matches | |
m = subprocess.Popen(['git', 'grep', '-c', pattern], | |
stdout=subprocess.PIPE) | |
matches = sum(itertools.imap(soi, m.stdout)) | |
# This should really apply the timezone, but we don't | |
# really need that level of precision here... | |
ts = time.strptime(date.rsplit(' ', 1)[0]) | |
# year / mon / day formatting (easier to sort) | |
local_date = "%s/%s/%s" % (ts.tm_year, ts.tm_mon, ts.tm_mday) | |
if local_date in old_dates: | |
continue | |
old_dates.add(local_date) | |
print "%s,%s" % (local_date, matches) | |
count_pattern('def test_') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment