Created
May 7, 2024 11:58
-
-
Save hellman/09733f57e7d0ebee6a22fa9c834b0954 to your computer and use it in GitHub Desktop.
Extending CRC32
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "589fb75f-e6c1-4e1e-94f5-eb0e03e2c537", | |
"metadata": {}, | |
"source": [ | |
"# Defining CRC32" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "d2d16187-054c-4f5d-a5d9-81df84cff1a7", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:13.705489Z", | |
"iopub.status.busy": "2024-05-07T11:56:13.704415Z", | |
"iopub.status.idle": "2024-05-07T11:56:13.719273Z", | |
"shell.execute_reply": "2024-05-07T11:56:13.716939Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:13.705354Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"POLY = 0xEDB8_8320\n", | |
"# ONE = 0x8000_0000 # 1\n", | |
"# TWO = 0x4000_0000 # x\n", | |
"# THREE = 0xC000_0000 # x + 1\n", | |
"# THREE_INV = 0x492f023f # power(0xc000_0000, 2**32-2)\n", | |
"MAGIC = 0xB6D0_FDC0 # mul(POLY, THREE_INV) = x^32 / (x+1)\n", | |
"\n", | |
"def mul_x(a):\n", | |
" # LSB\n", | |
" if a & 1:\n", | |
" a >>= 1\n", | |
" a ^= POLY\n", | |
" else:\n", | |
" a >>= 1\n", | |
" return a\n", | |
"\n", | |
"\n", | |
"def mul(a, b):\n", | |
" res = 0\n", | |
" while b:\n", | |
" if b & 0x8000_0000:\n", | |
" res ^= a\n", | |
" b ^= 0x8000_0000\n", | |
" a = mul_x(a)\n", | |
" b <<= 1\n", | |
" return res\n", | |
"\n", | |
"\n", | |
"def power(a, e):\n", | |
" # square and multiply\n", | |
" res = 0x8000_0000 # one\n", | |
" while e:\n", | |
" if e & 1:\n", | |
" res = mul(res, a)\n", | |
" e >>= 1\n", | |
" a = mul(a, a)\n", | |
" return res \n", | |
" \n", | |
"\n", | |
"def mycrc32(msg):\n", | |
" s = 0xffff_ffff\n", | |
" for byte in msg:\n", | |
" for bit in f\"{byte:08b}\"[::-1]:\n", | |
" # bit plays the role of x^31 since it's in LSB\n", | |
" s = mul_x(s ^ int(bit))\n", | |
" s ^= 0xffff_ffff\n", | |
" return s\n", | |
"\n", | |
"\n", | |
"def extend_zeros(crc, n): # n in bits\n", | |
" crc ^= 0xffff_ffff\n", | |
" crc = mul(crc, power(0x4000_0000, n))\n", | |
" crc ^= 0xffff_ffff\n", | |
" return crc\n", | |
"\n", | |
"\n", | |
"def extend_ones(crc, n): # n in bits\n", | |
" \"\"\"\n", | |
" We are repeating the step\n", | |
"\n", | |
" > c = (c + x^31) * x = c*x + x^32\n", | |
"\n", | |
" `n` times, getting\n", | |
"\n", | |
" c = c0 * x^n + x^32 * (x^(n-1) + x^(n-2) + ... + x^2 + x + 1)\n", | |
" = c0 * x^n + x^32 * (x^n - 1) / (x - 1)\n", | |
"\n", | |
" Letting\n", | |
" \n", | |
" MAGIC = x^32 / (x-1) = POLY / (x-1) = POLY * (x-1)^(2^32-2)\n", | |
"\n", | |
" We get\n", | |
"\n", | |
" c = c0 * x^n + MAGIC * (x^n-1)\n", | |
" = x^n * (c0 + MAGIC) - MAGIC\n", | |
"\n", | |
" This also means that adding MAGIC before and after digesting some text\n", | |
" corresponds to flipping all the bits in the text.\n", | |
" \"\"\"\n", | |
" crc ^= 0xffff_ffff\n", | |
" crc = mul(crc ^ MAGIC, power(0x4000_0000, n)) ^ MAGIC\n", | |
" crc ^= 0xffff_ffff\n", | |
" return crc" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "c4b3acee-feac-4299-8b90-3b1d7d926968", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:13.730593Z", | |
"iopub.status.busy": "2024-05-07T11:56:13.727921Z", | |
"iopub.status.idle": "2024-05-07T11:56:13.749154Z", | |
"shell.execute_reply": "2024-05-07T11:56:13.747708Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:13.730496Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" zlib my | msg\n", | |
"00000000 00000000 | b''\n", | |
"d202ef8d d202ef8d | b'\\x00'\n", | |
"a505df1b a505df1b | b'\\x01'\n", | |
"2144df1c 2144df1c | b'\\x00\\x00\\x00\\x00'\n", | |
"ed82cd11 ed82cd11 | b'abcd'\n", | |
"8587d865 8587d865 | b'abcde'\n" | |
] | |
} | |
], | |
"source": [ | |
"import zlib\n", | |
"\n", | |
"print(\" zlib my | msg\")\n", | |
"for m in [b\"\", b\"\\x00\", b\"\\x01\", b\"\\x00\"*4, b\"abcd\", b\"abcde\"]:\n", | |
" print(\"%08x %08x\" % (zlib.crc32(m), mycrc32(m)), \"|\", m)\n", | |
" assert zlib.crc32(m) == mycrc32(m)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7935cfe6-721e-4623-b6ac-61ce26fdb77f", | |
"metadata": {}, | |
"source": [ | |
"# Test extend with zeros" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "efe1b9b7-71de-46ee-ac60-778d5a6992c3", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:13.752903Z", | |
"iopub.status.busy": "2024-05-07T11:56:13.751733Z", | |
"iopub.status.idle": "2024-05-07T11:56:13.781665Z", | |
"shell.execute_reply": "2024-05-07T11:56:13.771365Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:13.752340Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import random, os\n", | |
"for _ in range(100):\n", | |
" a = random.randrange(1000)\n", | |
" b = random.randrange(1000)\n", | |
" s1 = os.urandom(a)\n", | |
" c = zlib.crc32(s1)\n", | |
" c1 = zlib.crc32(s1 + b\"\\x00\" * b)\n", | |
" c2 = extend_zeros(c, b*8)\n", | |
" assert c1 == c2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f464e1d2-a140-45ff-a436-b2681f609f65", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:12:36.359239Z", | |
"iopub.status.busy": "2024-05-07T11:12:36.357896Z", | |
"iopub.status.idle": "2024-05-07T11:12:36.369277Z", | |
"shell.execute_reply": "2024-05-07T11:12:36.366900Z", | |
"shell.execute_reply.started": "2024-05-07T11:12:36.359096Z" | |
} | |
}, | |
"source": [ | |
"# Test extend with ones" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "33469630-2d7f-4f3e-b867-97cdd250fe6a", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:13.787432Z", | |
"iopub.status.busy": "2024-05-07T11:56:13.786199Z", | |
"iopub.status.idle": "2024-05-07T11:56:13.824654Z", | |
"shell.execute_reply": "2024-05-07T11:56:13.822975Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:13.787261Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import random, os\n", | |
"for _ in range(100):\n", | |
" a = random.randrange(1000)\n", | |
" b = random.randrange(1000)\n", | |
" s1 = os.urandom(a)\n", | |
" c = zlib.crc32(s1)\n", | |
" c1 = zlib.crc32(s1 + b\"\\xff\" * b)\n", | |
" c2 = extend_ones(c, b*8)\n", | |
" assert c1 == c2 " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7350084c-6ea1-40e6-aae0-d6f92664a7d2", | |
"metadata": {}, | |
"source": [ | |
"# Test flip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "8f2c5a10-934b-4e1d-bc06-2097b8069ab5", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:13.830373Z", | |
"iopub.status.busy": "2024-05-07T11:56:13.828268Z", | |
"iopub.status.idle": "2024-05-07T11:56:13.854759Z", | |
"shell.execute_reply": "2024-05-07T11:56:13.852262Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:13.830142Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import random, os\n", | |
"for _ in range(100):\n", | |
" a = random.randrange(1000)\n", | |
" s1 = os.urandom(a)\n", | |
" c = zlib.crc32(s1)\n", | |
" c1 = zlib.crc32(bytes(0xff^byte for byte in s1))\n", | |
" c2 = c ^ mul(MAGIC, power(0x4000_0000, 8*a) ^ 0x8000_0000) # MAGIC * (x^n + 1)\n", | |
" assert c1 == c2 " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ccf597cc-ec3f-4f7d-a37d-546dd4cf5884", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:51:10.326870Z", | |
"iopub.status.busy": "2024-05-07T11:51:10.326168Z", | |
"iopub.status.idle": "2024-05-07T11:51:10.334345Z", | |
"shell.execute_reply": "2024-05-07T11:51:10.332169Z", | |
"shell.execute_reply.started": "2024-05-07T11:51:10.326792Z" | |
} | |
}, | |
"source": [ | |
"# Timings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "07141352-c856-4a2e-afb3-0fe0ba5e7d40", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:40.302266Z", | |
"iopub.status.busy": "2024-05-07T11:56:40.299987Z", | |
"iopub.status.idle": "2024-05-07T11:56:40.320996Z", | |
"shell.execute_reply": "2024-05-07T11:56:40.318714Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:40.301942Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'3.10.13 (f1607341da97ff5a1e93430b6e8c4af0ad1aa019, Sep 28 2023, 05:41:26)\\n[PyPy 7.3.13 with GCC 10.2.1 20210130 (Red Hat 10.2.1-11)]'" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import sys\n", | |
"sys.version" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "b525bd2b-fbb1-4ca1-89cd-4d0b50361419", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:13.860035Z", | |
"iopub.status.busy": "2024-05-07T11:56:13.858262Z", | |
"iopub.status.idle": "2024-05-07T11:56:16.169806Z", | |
"shell.execute_reply": "2024-05-07T11:56:16.167463Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:13.859233Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2.81 µs ± 62.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"# 0.5 GB = 2^29 bytes\n", | |
"%timeit extend_zeros(0xabcd1234, 2**32) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "e2ac7d50-f0c6-484a-8467-4366af970bb5", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:16.176654Z", | |
"iopub.status.busy": "2024-05-07T11:56:16.175935Z", | |
"iopub.status.idle": "2024-05-07T11:56:18.996801Z", | |
"shell.execute_reply": "2024-05-07T11:56:18.995091Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:16.176554Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"3.52 µs ± 603 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"# 0.5 GB = 2^29 bytes\n", | |
"%timeit extend_ones(0xabcd1234, 2**32)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "56dd220c-7689-4b6f-be61-5c07b9c336f9", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2024-05-07T11:56:19.001380Z", | |
"iopub.status.busy": "2024-05-07T11:56:18.999161Z", | |
"iopub.status.idle": "2024-05-07T11:56:24.205812Z", | |
"shell.execute_reply": "2024-05-07T11:56:24.204084Z", | |
"shell.execute_reply.started": "2024-05-07T11:56:19.001045Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"63.2 µs ± 3.29 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%timeit extend_zeros(0xabcd1234, 2**512)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "PyPy3", | |
"language": "python", | |
"name": "pypy3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment