Last active
March 3, 2023 23:52
-
-
Save shanecandoit/7ed9cc88936a9088d98deca278ff2542 to your computer and use it in GitHub Desktop.
python_list_tokens_hashing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import hashlib # use sha256" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"tokenize turns a string, or file, into a list of tokens\n", | |
"flat list no depth" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def tokenize(source:str) -> list:\n", | |
" \"\"\"Tokenizes the source string into a list of tokens\"\"\"\n", | |
" source = source \\\n", | |
" .replace('(', ' ( ').replace(')', ' ) ') \\\n", | |
" .replace('[', ' ( ').replace(']', ' ) ')\n", | |
" return source.split()" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"an example of a function, which is a nested list\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['(', 'define', '(', 'square', 'x', ')', '(', '*', 'x', 'x', ')', ')']" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"r = tokenize('(define (square x) (* x x))')\n", | |
"r" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"a simple expression, not a nested list\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['(', '+', '1', '2', '3', ')']" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t = tokenize('[+ 1 2 3]')\n", | |
"t" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def sha256(s:str) -> bytes:\n", | |
" \"\"\"Returns the SHA256 hash of the input\"\"\"\n", | |
" import hashlib\n", | |
" return hashlib.sha256(s.encode('utf-8')).hexdigest()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['32ebb1abcc1c601ceb9c4e3c4faba0caa5b85bb98c4f1e6612c40faa528a91c9',\n", | |
" 'a318c24216defe206feeb73ef5be00033fa9c4a74d0b967f6532a26ca5906d3b',\n", | |
" '6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b',\n", | |
" 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35',\n", | |
" '4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce',\n", | |
" 'ba5ec51d07a4ac0e951608704431d59a02b21a4e951acc10505a8dc407c501ee']" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t_hash = [sha256(t) for t in t]\n", | |
"t_hash" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"hash_map: {'*': '684888c0ebb17f374298b65ee2807526c066094c701bcc7ebbe1c1095f494fc1', '2': 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', '3': '4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce'}\n", | |
"hashes: ['684888c0ebb17f374298b65ee2807526c066094c701bcc7ebbe1c1095f494fc1', 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', '4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce']\n", | |
"hash_of_map: 137a5e74a65d4a4306bd5474ba4e68269cdac3547a1647f085e3d784c436dfdb\n", | |
"hash_chain: [('4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce', 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', 'ef3b42c1086b0493172457c03f4ddf2dcd0e69a9009e131e90196ee06cbc44be'), ('d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', '684888c0ebb17f374298b65ee2807526c066094c701bcc7ebbe1c1095f494fc1', 'a93e57189011b4a14fcd8f6b08825e8ddcf95d87c8d5cc3bda768b7b91fa040f')]\n", | |
"a93e57189011b4a14fcd8f6b08825e8ddcf95d87c8d5cc3bda768b7b91fa040f\n" | |
] | |
} | |
], | |
"source": [ | |
"# make a hash of a list\n", | |
"def hash_of_list(tokens:list) -> str:\n", | |
" \"\"\"Returns the SHA256 hash of the input list\"\"\"\n", | |
" if tokens[0] == '(':\n", | |
" tokens = tokens[1:-1]\n", | |
" if tokens[-1]==')':\n", | |
" tokens = tokens[:-1]\n", | |
" hash_map = {}\n", | |
" hashes = []\n", | |
" for t in tokens:\n", | |
" h = sha256(t)\n", | |
" hash_map[t] = h\n", | |
" hashes.append(h)\n", | |
" print(f'hash_map: {hash_map}')\n", | |
" print(f'hashes: {hashes}')\n", | |
" \n", | |
" hash_of_map = sha256(str(hash_map))\n", | |
" print(f'hash_of_map: {hash_of_map}')\n", | |
" # return hash_of_map\n", | |
"\n", | |
" revd = tokens[::-1]\n", | |
" hash_chain = []\n", | |
" last_hash = sha256(revd.pop(0))\n", | |
" while len(revd):\n", | |
" next_hash = sha256(revd.pop(0))\n", | |
" combine = sha256(last_hash +' '+ next_hash)\n", | |
"\n", | |
" hash_chain.append((last_hash, next_hash, combine))\n", | |
" last_hash = next_hash\n", | |
" print(f'hash_chain: {hash_chain}')\n", | |
" return hash_chain[-1][-1]\n", | |
"\n", | |
"print(hash_of_list(tokenize('(* 2 3)')))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "base", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.7" | |
}, | |
"orig_nbformat": 4, | |
"vscode": { | |
"interpreter": { | |
"hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment