Created
          September 8, 2011 03:02 
        
      - 
      
- 
        Save wesm/1202507 to your computer and use it in GitHub Desktop. 
    Little script to get a time series of code churn (insertions, deletions) on a git repo
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from dateutil import parser | |
| import subprocess | |
| import os | |
| import re | |
| import sys | |
| import numpy as np | |
| from pandas import * | |
| repo_path = '/home/wesm/code/pandas' | |
| githist = ('git log --pretty=format:\"%h %ad | %s%d [%an]\" --date=short ' + | |
| repo_path + ' > githist.txt') | |
| def rungithist(): | |
| os.system(githist) | |
| def get_commit_history(): | |
| # return TimeSeries | |
| rungithist() | |
| githist = open('githist.txt').read() | |
| os.remove('githist.txt') | |
| sha_date = [] | |
| for line in githist.split('\n'): | |
| sha_date.append(line.split()[:2]) | |
| shas, dates = zip(*sha_date) | |
| dates = [parser.parse(d) for d in dates] | |
| return Series(dates, shas) | |
| def get_commit_churn(sha, prev_sha): | |
| stdout = subprocess.Popen(['git', 'diff', sha, prev_sha, '--stat'], | |
| stdout=subprocess.PIPE).stdout | |
| statline = stdout.read().split('\n')[-2] | |
| match = re.match('.*\s(.*)\sinsertions.*\s(.*)\sdeletions', statline) | |
| insertions = int(match.group(1)) | |
| deletions = int(match.group(2)) | |
| return insertions, deletions | |
| def get_code_churn(commits): | |
| shas = commits.index | |
| prev = shas[0] | |
| insertions = [np.nan] | |
| deletions = [np.nan] | |
| for cur in shas[1:]: | |
| i, d = get_commit_churn(cur, prev) | |
| insertions.append(i) | |
| deletions.append(d) | |
| prev = cur | |
| return DataFrame({'insertions' : insertions, | |
| 'deletions' : deletions}, index=shas) | |
| if __name__ == '__main__': | |
| commits = get_commit_history() | |
| churn = get_code_churn(commits) | |
| by_date = churn.groupby(commits).sum() | |
| # clean out days where I touched Cython | |
| by_date = by_date[by_date.sum(1) < 5000] | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment