Skip to content

Instantly share code, notes, and snippets.

@alimanfoo
Created November 10, 2020 18:22
Show Gist options
  • Save alimanfoo/91f00a3d327ac07fb23be7cb2e332b4b to your computer and use it in GitHub Desktop.
Save alimanfoo/91f00a3d327ac07fb23be7cb2e332b4b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numba\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"@numba.njit(nogil=True, cache=True)\n",
"def hash_columns(x):\n",
" h = np.empty((x.shape[1]), dtype=np.int64)\n",
" for j in range(x.shape[1]):\n",
" h[j] = 5381\n",
" for i in range(x.shape[0]):\n",
" h[j] = h[j] * 33 + x[i, j]\n",
" return h"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def generate_possibilities(n):\n",
" n_poss = 2**n\n",
" x = np.empty((n, 2**n), dtype='u1')\n",
" for i in range(n):\n",
" x[i] = np.arange(0, 2**n)\n",
" x[i] = (x[i] // (2**i)) % 2\n",
" return x\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1, 0, 1],\n",
" [0, 0, 1, 1]], dtype=uint8)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generate_possibilities(2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1, 0, 1, 0, 1, 0, 1],\n",
" [0, 0, 1, 1, 0, 0, 1, 1],\n",
" [0, 0, 0, 0, 1, 1, 1, 1]], dtype=uint8)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generate_possibilities(3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],\n",
" [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1],\n",
" [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1],\n",
" [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=uint8)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generate_possibilities(4)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([5859909, 5859942, 5859910, 5859943])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hash_columns(generate_possibilities(2))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([193376997, 193378086, 193377030, 193378119, 193376998, 193378087,\n",
" 193377031, 193378120])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hash_columns(generate_possibilities(3))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([6381440901, 6381476838, 6381441990, 6381477927, 6381440934,\n",
" 6381476871, 6381442023, 6381477960, 6381440902, 6381476839,\n",
" 6381441991, 6381477928, 6381440935, 6381476872, 6381442024,\n",
" 6381477961])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hash_columns(generate_possibilities(4))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def test(n):\n",
" expect = 2**n\n",
" actual = len(set(hash_columns(generate_possibilities(n))))\n",
" if expect != actual:\n",
" print(f'collision, sequence length {n}, expected {expect}, actual {actual}')\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"collision, sequence length 9, expected 512, actual 256\n",
"collision, sequence length 10, expected 1024, actual 256\n",
"collision, sequence length 11, expected 2048, actual 256\n",
"collision, sequence length 12, expected 4096, actual 256\n",
"collision, sequence length 13, expected 8192, actual 256\n",
"collision, sequence length 14, expected 16384, actual 256\n",
"collision, sequence length 15, expected 32768, actual 256\n",
"collision, sequence length 16, expected 65536, actual 256\n",
"collision, sequence length 17, expected 131072, actual 256\n",
"collision, sequence length 18, expected 262144, actual 256\n",
"collision, sequence length 19, expected 524288, actual 256\n"
]
}
],
"source": [
"for i in range(20):\n",
" test(i)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment