Put all files in the HTTPS Everywhere git repository. Run get_count.sh
, then plot_count.py
.
Last active
February 18, 2016 17:30
-
-
Save fuglede/462596c2384db59cf266 to your computer and use it in GitHub Desktop.
HTTPS Everywhere ruleset count
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Does the data already exist? Let's not overwrite stuff. | |
if [ -f ruleset_count_data ]; then | |
echo "Error: Ruleset data file (ruleset_count_data) exists. Remove the file to create from scratch."; | |
exit 1 | |
fi; | |
# Naive check: are we actually in a git repo? | |
if git rev-parse --git-dir > /dev/null 2>&1; then | |
echo "Creating list of rulesets by date. This might take a while."; | |
for commit in $(git rev-list --reverse master); do | |
number_of_rules=$(git ls-tree --name-only -r $commit | grep -c "src/chrome/content/rules"); | |
date_of_commit=$(git show -s --format=%ci $commit); | |
echo "$number_of_rules $date_of_commit" >> ruleset_count_data; | |
done | |
echo "Done fetching data. Execute plot_count.py to plot it."; | |
else | |
echo "Error: Https-e git repository not found. Put these files in the https-e git repo."; | |
fi; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
from datetime import date, datetime | |
import re | |
import os.path | |
from matplotlib.dates import drange, num2date, date2num | |
from matplotlib.pyplot import * | |
def main(): | |
output = 'curve_plot.png' | |
if os.path.isfile(output): | |
print "Error: Output file (%s) exists. Delete and rerun to produce a new one." % output | |
return | |
try: | |
f = open('ruleset_count_data', 'r') | |
except: | |
print "Error: Run get_count.sh to get the data first." | |
return | |
counts = [] | |
dates = [] | |
for l in f: | |
d = l.replace('\n', '') | |
# We ignore timezones since their effect is largely negligible | |
# (and because dealing with timezones in Python2 sucks). | |
dataRE = re.match(r"(\d+)\s(.*)\s[\+-]", d) | |
if dataRE is not None: | |
count = int(dataRE.group(1)) | |
date_string = dataRE.group(2) | |
date_object = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S") | |
year = int(date_object.strftime("%Y")) | |
# Manually remove some counts since someone | |
# appears to have messed up their system times. | |
if (count > 16000 and year < 2015) or (count > 19000 and year < 2016): | |
continue | |
counts.append(count) | |
dates.append(date2num(date_object)) | |
# Let's just focus on the highest ruleset counts we've seen: this | |
# makes sense since at no point a (very) large number of rulesets | |
# were deleted. There's probably a more clever way to do this. | |
dates, counts = zip(*sorted(zip(dates, counts))) | |
max_count = 0 | |
date_plot = [] | |
count_plot = [] | |
for date, count in zip(dates, counts): | |
if count > max_count: | |
max_count = count | |
date_plot.append(date) | |
count_plot.append(count) | |
# Finally it's plotting time! | |
rc('font', family='serif') | |
title('Number of rulesets in browser plugin HTTPS Everywhere') | |
ylabel('Ruleset count') | |
xlabel('Time of commit') | |
xticks(rotation=30) # Avoid overlap in tick labels | |
plot_date(date_plot, count_plot, '.') | |
gcf().subplots_adjust(bottom=0.15) # Avoid cropping | |
try: | |
rc('text', usetex=True) # Might fail depending on setup | |
savefig(output) | |
except: | |
rc('text', usetex=False) | |
savefig(output) | |
print "Plotting complete. See %s for the result." % output | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment