Created
September 24, 2018 12:17
-
-
Save gangiman/a91aca1a5cd8b601ad5d94c1f91148e5 to your computer and use it in GitHub Desktop.
Refining uMatrix rules
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Export rule list from uMatrix extension to a file\n", | |
"path_to_file = '/Users/username/Downloads/my-umatrix-rules.txt'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# reading the file with rules\n", | |
"with open(path_to_file, 'r') as fh:\n", | |
" list_of_rules = [\n", | |
" tuple(line.rstrip('\\n').split(' '))\n", | |
" for line in fh\n", | |
" ]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# top 50 rules are default \n", | |
"list_of_rules = list_of_rules[50:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# lets put all custom site rules in to DataFrame\n", | |
"import pandas as pd\n", | |
"df = pd.DataFrame(list_of_rules, columns=('orig', 'source', 'scope', 'rule'))\n", | |
"df = df[df.rule=='allow']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# same source domains are whitelised over and over\n", | |
"df.source.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Collapsing all rules irregardless of origin\n", | |
"set_of_uni_rules = {\n", | |
" ('*', source, scope, rule) if rule!='block' else (orig, source, scope, rule)\n", | |
" for orig, source, scope, rule in list_of_rules\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# lets aggregate base domains for some sites\n", | |
"set_of_unique_rules = set()\n", | |
"for full_rule in set_of_uni_rules:\n", | |
" orig,domain,cont,rule = full_rule\n", | |
" dms = domain.split('.')\n", | |
" if len(dms) > 2 and dms[0] != 'www' and dms[-1] in ('com', 'net'):\n", | |
" new_domain = '.'.join(dms[-2:])\n", | |
" print(\"Replacing domain '{}' for '{}'\".format(domain,new_domain))\n", | |
" set_of_unique_rules.add((orig, new_domain, cont, rule))\n", | |
" else:\n", | |
" set_of_unique_rules.add(full_rule)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Double check if rules make sense\n", | |
"set_of_unique_rules" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# write to file and import into extension\n", | |
"with open(\"/tmp/new-umatrix-rules.txt\", 'w+') as wfh:\n", | |
" wfh.write('\\n'.join(' '.join(t) for t in set_of_unique_rules))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment