Created
September 14, 2018 13:20
-
-
Save hellais/c7b041f039e70d40bb0954eb297dad03 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 189, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"import urllib\n", | |
"import gzip\n", | |
"import base64\n", | |
"import bson\n", | |
"import msgpack\n", | |
"import lz4.frame" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 191, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lz4.frame.compress?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 186, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"global_list = []\n", | |
"with open('../../citizenlab/test-lists/lists/global.csv') as in_file:\n", | |
" for line in in_file:\n", | |
" global_list.append(line.split(',')[0])\n", | |
"global_list = global_list[1:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 132, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"62\n", | |
"58\n", | |
"44\n" | |
] | |
} | |
], | |
"source": [ | |
"obj = {'t':\n", | |
" {'u': global_list[0], 't': 'web_connectivity'}\n", | |
" }\n", | |
"print(len(bson.dumps(obj)))\n", | |
"print(len(json.dumps(obj)))\n", | |
"print(len(msgpack.dumps([obj])))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 167, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"altchars = b'-_'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 168, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"base64.b64encode?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 199, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def b64_lz4_ooni_run(urls):\n", | |
" obj = [\n", | |
" {'u': urls, 't': 'web_connectivity'}\n", | |
" ]\n", | |
" ta = base64.b64encode(lz4.frame.compress(json.dumps(obj).encode('ascii'), compression_level=16), altchars)\n", | |
" return 'https://run.ooni.io/l/v2#' + ta.decode('ascii')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 169, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def b64_gzip_ooni_run(urls):\n", | |
" obj = [\n", | |
" {'u': urls, 't': 'web_connectivity'}\n", | |
" ]\n", | |
" ta = base64.b64encode(gzip.compress(json.dumps(obj).encode('ascii')), altchars)\n", | |
" return 'https://run.ooni.io/l/v2#' + ta.decode('ascii')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 198, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def b64_msgpack_ooni_run(urls):\n", | |
" obj = [{'u': urls, 't': 'web_connectivity'}]\n", | |
" ta = base64.b64encode(msgpack.dumps(obj), altchars)\n", | |
" return 'https://run.ooni.io/l/v2#' + ta.decode('utf-8')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 171, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def b64_bson_ooni_run(urls):\n", | |
" obj = {'u': urls, 't': 'web_connectivity'}\n", | |
" ta = base64.b64encode(bson.dumps(obj), altchars)\n", | |
" return 'https://run.ooni.io/l/v2#' + ta.decode('utf-8')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 172, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def b85_gzip_ooni_run(urls):\n", | |
" obj = [\n", | |
" {'u': urls, 't': 'web_connectivity'}\n", | |
" ]\n", | |
" ta = base64.b85encode(gzip.compress(json.dumps(obj).encode('utf-8')))\n", | |
" return 'https://run.ooni.io/l/v2#' + ta.decode('utf-8')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 173, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def b64_ooni_run(urls):\n", | |
" obj = [\n", | |
" {'u': urls, 't': 'web_connectivity'}\n", | |
" ]\n", | |
" ta = base64.b64encode(json.dumps(obj).encode('utf-8'), altchars)\n", | |
" return 'https://run.ooni.io/l/v2#' + ta.decode('utf-8')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 161, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def current_ooni_run_link(urls):\n", | |
" ta = urllib.parse.quote(json.dumps({'urls': urls}))\n", | |
" return 'https://run.ooni.io/nettest?tn=web_connectivity&ta=' + ta" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 200, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Test global_list\n", | |
"current: 43397 100.00%\n", | |
"b64: 42693 98.38%\n", | |
"b64_gzip: 11061 25.49%\n", | |
"b85_gzip: 10370 23.90%\n", | |
"b64_bson: 51801 119.37%\n", | |
"b64_msgpack: 38417 88.52%\n", | |
"b64_lz4: 15393 35.47%\n", | |
"\n", | |
"Test 30_urls\n", | |
"current: 1302 100.00%\n", | |
"b64: 1309 100.54%\n", | |
"b64_gzip: 625 48.00%\n", | |
"b85_gzip: 587 45.08%\n", | |
"b64_bson: 1501 115.28%\n", | |
"b64_msgpack: 1189 91.32%\n", | |
"b64_lz4: 849 65.21%\n", | |
"\n", | |
"Test 1_url\n", | |
"current: 100 100.00%\n", | |
"b64: 101 101.00%\n", | |
"b64_gzip: 125 125.00%\n", | |
"b85_gzip: 117 117.00%\n", | |
"b64_bson: 101 101.00%\n", | |
"b64_msgpack: 85 85.00%\n", | |
"b64_lz4: 129 129.00%\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"test_cases = {\n", | |
" 'global_list': global_list,\n", | |
" '30_urls': global_list[:30],\n", | |
" '1_url': global_list[2]\n", | |
"}\n", | |
"for item, urls in test_cases.items():\n", | |
" current_len = len(current_ooni_run_link(urls))\n", | |
" b64_len = len(b64_ooni_run(urls))\n", | |
" b64_gzip_len = len(b64_gzip_ooni_run(urls))\n", | |
" b85_gzip_len = len(b85_gzip_ooni_run(urls))\n", | |
" b64_bson_len = len(b64_bson_ooni_run(urls))\n", | |
" b64_msgpack_len = len(b64_msgpack_ooni_run(urls))\n", | |
" b64_lz4_len = len(b64_lz4_ooni_run(urls))\n", | |
"\n", | |
" print(\"Test %s\" % item)\n", | |
" print(\"current: %d %.2f%%\" % (current_len, current_len/current_len * 100))\n", | |
" print(\"b64: %d %.2f%%\" % (b64_len, b64_len/current_len * 100))\n", | |
" print(\"b64_gzip: %d %.2f%%\" % (b64_gzip_len, b64_gzip_len/current_len*100))\n", | |
" print(\"b85_gzip: %d %.2f%%\" % (b85_gzip_len, b85_gzip_len/current_len*100))\n", | |
" print(\"b64_bson: %d %.2f%%\" % (b64_bson_len, b64_bson_len/current_len*100))\n", | |
" print(\"b64_msgpack: %d %.2f%%\" % (b64_msgpack_len, b64_msgpack_len/current_len*100))\n", | |
" print(\"b64_lz4: %d %.2f%%\" % (b64_lz4_len, b64_lz4_len/current_len*100))\n", | |
" \n", | |
" print(\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 201, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"https://run.ooni.io/l/v2#H4sIALzKmlsC_4uuVipVslJQyigpKbDS1zczM0tPzE0t1stLLVHSUVAqAcmVpybFJ-fn5aUml2SWZZZUKtXGAgC4HfnkNwAAAA==\n" | |
] | |
} | |
], | |
"source": [ | |
"print(b64_gzip_ooni_run(global_list[2]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 207, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#print(current_ooni_run_link(global_list))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 206, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#print(b64_gzip_ooni_run(global_list))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment