Created
December 11, 2015 23:53
-
-
Save michael-erasmus/209ec081d5bd2831c149 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import glob\n", | |
"import pandas as pd\n", | |
"from StringIO import StringIO" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_file_blame_stats(fname):\n", | |
" \n", | |
" raw = os.popen('git ls-tree -r -z --name-only HEAD -- %s | xargs -0 -n1 git blame \\\n", | |
" --line-porcelain HEAD |grep \"^author \"|sort|uniq -c|sort -nr' % fname).read()\n", | |
"\n", | |
" data = pd.read_csv(StringIO(raw), names=['raw'])\n", | |
" data['lines'] = data.raw.map(lambda r: int(r.partition('author')[0]))\n", | |
" data['author'] = data.raw.map(lambda r: r.partition('author')[2])\n", | |
" return data.drop('raw',1)\n", | |
"\n", | |
"def get_file_owner(fname):\n", | |
" #\"Getting a file 'owner' is done by getting the user who've changed the most lines in the file :)\"\n", | |
" stats = get_file_blame_stats(fname)\n", | |
" return stats['author'][0]\n", | |
"\n", | |
"def get_file_last_updated(fname):\n", | |
" return os.popen('git log -1 --format=\"%ad\" -- ' + fname).read().rstrip()\n", | |
"\n", | |
"def get_number_of_lines_in_file(fname):\n", | |
" return sum(1 for line in open(fname))\n", | |
"\n", | |
"def get_number_of_commits_to_file(fname):\n", | |
" raw = os.popen('git log --name-only --pretty=format: %s | sort | uniq -c | sort -nr | tail -n 1' % fname).read()\n", | |
" return [int(s) for s in raw.split() if s.isdigit()][0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_data_for_files(path):\n", | |
" data = []\n", | |
" for fname in glob.glob(path):\n", | |
" owner = get_file_owner(fname)\n", | |
" number_of_lines = get_number_of_lines_in_file(fname)\n", | |
" number_of_commits = get_number_of_commits_to_file(fname)\n", | |
" last_updated = get_file_last_updated(fname)\n", | |
" data.append({\n", | |
" 'file' : fname, \n", | |
" 'owner' : owner, \n", | |
" 'last_updated': last_updated,\n", | |
" 'number_of_lines': number_of_lines,\n", | |
" 'number_of_commits': number_of_commits\n", | |
" })\n", | |
"\n", | |
" return pd.DataFrame(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file</th>\n", | |
" <th>last_updated</th>\n", | |
" <th>number_of_commits</th>\n", | |
" <th>number_of_lines</th>\n", | |
" <th>owner</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>actions_taken.view.lookml</td>\n", | |
" <td>Wed Nov 25 11:24:06 2015 +0000</td>\n", | |
" <td>79</td>\n", | |
" <td>876</td>\n", | |
" <td>Maxime Berthelot</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>actions_taken_by_customer.view.lookml</td>\n", | |
" <td>Fri Oct 24 22:22:50 2014 +0000</td>\n", | |
" <td>1</td>\n", | |
" <td>37</td>\n", | |
" <td>Michael Erasmus</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>actions_taken_by_user_by_day.view.lookml</td>\n", | |
" <td>Fri Nov 28 14:36:04 2014 +0000</td>\n", | |
" <td>1</td>\n", | |
" <td>76</td>\n", | |
" <td>Michael Erasmus</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>actions_taken_cohorts.view.lookml</td>\n", | |
" <td>Thu Nov 12 15:12:04 2015 +0000</td>\n", | |
" <td>3</td>\n", | |
" <td>71</td>\n", | |
" <td>Sunil Sadasivan</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>actions_taken_mrr.view.lookml</td>\n", | |
" <td>Mon Oct 27 08:38:48 2014 +0000</td>\n", | |
" <td>1</td>\n", | |
" <td>54</td>\n", | |
" <td>Michael Erasmus</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file last_updated \\\n", | |
"0 actions_taken.view.lookml Wed Nov 25 11:24:06 2015 +0000 \n", | |
"1 actions_taken_by_customer.view.lookml Fri Oct 24 22:22:50 2014 +0000 \n", | |
"2 actions_taken_by_user_by_day.view.lookml Fri Nov 28 14:36:04 2014 +0000 \n", | |
"3 actions_taken_cohorts.view.lookml Thu Nov 12 15:12:04 2015 +0000 \n", | |
"4 actions_taken_mrr.view.lookml Mon Oct 27 08:38:48 2014 +0000 \n", | |
"\n", | |
" number_of_commits number_of_lines owner \n", | |
"0 79 876 Maxime Berthelot \n", | |
"1 1 37 Michael Erasmus \n", | |
"2 1 76 Michael Erasmus \n", | |
"3 3 71 Sunil Sadasivan \n", | |
"4 1 54 Michael Erasmus " | |
] | |
}, | |
"execution_count": 56, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"view_data = get_data_for_files(\"*.view.lookml\")\n", | |
"view_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 170, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file</th>\n", | |
" <th>last_updated</th>\n", | |
" <th>owner</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>awesome.dashboard.lookml</td>\n", | |
" <td>Tue Nov 3 02:40:34 2015 +0000</td>\n", | |
" <td>Sunil Sadasivan</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>churn_dashboard.dashboard.lookml</td>\n", | |
" <td>Wed Nov 26 09:19:36 2014 +0000</td>\n", | |
" <td>Steven Cheng</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>client_dashboard.dashboard.lookml</td>\n", | |
" <td>Fri Jun 12 06:17:23 2015 +0000</td>\n", | |
" <td>Sunil Sadasivan</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>daily_pulse.dashboard.lookml</td>\n", | |
" <td>Mon Mar 23 15:46:43 2015 +0000</td>\n", | |
" <td>Steven Cheng</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>experiment_results.dashboard.lookml</td>\n", | |
" <td>Mon Mar 23 15:46:43 2015 +0000</td>\n", | |
" <td>Sunil Sadasivan</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file last_updated \\\n", | |
"0 awesome.dashboard.lookml Tue Nov 3 02:40:34 2015 +0000 \n", | |
"1 churn_dashboard.dashboard.lookml Wed Nov 26 09:19:36 2014 +0000 \n", | |
"2 client_dashboard.dashboard.lookml Fri Jun 12 06:17:23 2015 +0000 \n", | |
"3 daily_pulse.dashboard.lookml Mon Mar 23 15:46:43 2015 +0000 \n", | |
"4 experiment_results.dashboard.lookml Mon Mar 23 15:46:43 2015 +0000 \n", | |
"\n", | |
" owner \n", | |
"0 Sunil Sadasivan \n", | |
"1 Steven Cheng \n", | |
"2 Sunil Sadasivan \n", | |
"3 Steven Cheng \n", | |
"4 Sunil Sadasivan " | |
] | |
}, | |
"execution_count": 170, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dashboard_data = get_data_for_files(\"*.dashboard.lookml\")\n", | |
"dashboard_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 178, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>file</th>\n", | |
" <th>last_updated</th>\n", | |
" <th>owner</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>admin.model.lookml</td>\n", | |
" <td>Fri May 22 11:48:54 2015 +0000</td>\n", | |
" <td>Michael Erasmus</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>buffer.model.lookml</td>\n", | |
" <td>Thu Dec 3 15:02:50 2015 +0000</td>\n", | |
" <td>Sunil Sadasivan</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>business_usage.model.lookml</td>\n", | |
" <td>Tue Oct 27 19:59:32 2015 +0000</td>\n", | |
" <td>Michael Erasmus</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>customer_success.model.lookml</td>\n", | |
" <td>Wed Nov 25 00:53:49 2015 +0000</td>\n", | |
" <td>Michael Erasmus</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>event_data.model.lookml</td>\n", | |
" <td>Thu Oct 15 12:39:45 2015 +0000</td>\n", | |
" <td>Michael Erasmus</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" file last_updated \\\n", | |
"0 admin.model.lookml Fri May 22 11:48:54 2015 +0000 \n", | |
"1 buffer.model.lookml Thu Dec 3 15:02:50 2015 +0000 \n", | |
"2 business_usage.model.lookml Tue Oct 27 19:59:32 2015 +0000 \n", | |
"3 customer_success.model.lookml Wed Nov 25 00:53:49 2015 +0000 \n", | |
"4 event_data.model.lookml Thu Oct 15 12:39:45 2015 +0000 \n", | |
"\n", | |
" owner \n", | |
"0 Michael Erasmus \n", | |
"1 Sunil Sadasivan \n", | |
"2 Michael Erasmus \n", | |
"3 Michael Erasmus \n", | |
"4 Michael Erasmus " | |
] | |
}, | |
"execution_count": 178, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model_data = get_data_for_files(\"*.model.lookml\")\n", | |
"model_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 179, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"view_data.to_csv(\"view_lookml_files.csv\", index=False)\n", | |
"dashboard_data.to_csv(\"dashboard_lookml_files.csv\", index=False)\n", | |
"model_data.to_csv(\"model_lookml_files.csv\", index=False)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment