Last active
December 6, 2015 05:45
-
-
Save leventov/9e837c7a632a9f02e29e to your computer and use it in GitHub Desktop.
Proposed optimization for shift removal procedure in hash tables with linear probing. Inspired by https://github.com/apple/swift/blob/8d9ef80304d7b36e13619ea50e6e76f3ec9221ba/stdlib/public/core/HashedCollections.swift.gyb#L3221-L3279
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# (Pseudocode) | |
def shift_distance(chain_end, base): | |
return (chain_end - base) & (capacity - 1) | |
def next(index): | |
return (index + 1) & (capacity - 1) | |
def prev(index): | |
return (index - 1) & (capacity - 1) | |
# 1. Naive shift remove procedure: | |
def shift_remove_naive(index_to_remove): | |
index_to_shift = next(index_to_remove) | |
while !slot_empty(index_to_shift): | |
base = index(hash(key(index_to_shift))) | |
if shift_distance(index_to_shift, base) >= shift_distance(index_to_shift, index_to_remove): | |
replace_slot(index_to_remove, index_to_shift) | |
index_to_remove = index_to_shift | |
index_to_shift = next(index_to_shift) | |
clear_slot(index_to_remove) | |
# 2. Shift remove procedure by Swift devs, | |
# https://github.com/apple/swift/blob/8d9ef80304d7b36e13619ea50e6e76f3ec9221ba/stdlib/public/core/HashedCollections.swift.gyb#L3221-L3279 | |
def collision_chain_end(index): | |
last_index = next(index) | |
while !slot_empty(last_index): | |
last_index = next(last_index) | |
return prev(last_index) | |
def shift_remove_swift(index_to_remove): | |
last_index = collision_chain_end(index_to_remove) | |
outer_loop: | |
while true: | |
index_to_shift = last_index | |
while index_to_shift != index_to_remove: | |
base = index(hash(key(index_to_shift))) | |
if shift_distance(index_to_shift, base) >= shift_distance(index_to_shift, index_to_remove): | |
replace_slot(index_to_remove, index_to_shift) | |
index_to_remove = index_to_shift | |
contunue outer_loop | |
index_to_shift = prev(index_to_shift) | |
# not shifted in the above inner loop | |
# break from the outer loop | |
break | |
clear_slot(index_to_remove) | |
# The approach by Swift devs allows to make less replacements, and probably make expensive | |
# `index(hash(key(index)))` procedure (expensive because hash code computation, if not cached | |
# in the key object, is potentially expensive) for less keys. The "naive" shift remove always | |
# performs this procedure for all keys till the collistion chain end, but Swift's shift remove | |
# may perform this procedure only for some tail of the collision chain. | |
# OTOH, Swift's shift remove may perform this procedure for some keys several times. | |
# 3. The proposed optimization: | |
def shift_remove_optimized(index_to_remove): | |
last_index = collision_chain_end(index_to_remove) | |
all: | |
if last_index != index_to_remove: | |
# Fast path | |
if last_index == index_to_remove + 1: | |
if index(hash(key(last_index))) != last_index: | |
replace_slot(index_to_remove, last_index) | |
index_to_remove = last_index | |
else: | |
chain_len = shift_distance(last_index, index_to_remove) | |
# This stack should be on-stack (sorry for tautology), in languages like C. | |
# chain_len - 1 is the max capacity needed for the stack | |
stack = array(chain_len - 1) | |
stack_i = 0 | |
index_to_shift = last_index | |
while true: | |
base = index(hash(key(index_to_shift))) | |
key_distance = shift_distance(index_to_shift, base) | |
if key_distance >= shift_distance(index_to_shift, index_to_remove): | |
replace_slot(index_to_remove, index_to_shift) | |
index_to_remove = index_to_shift | |
break | |
else: | |
index_to_shift = prev(index_to_shift) | |
if index_to_shift == index_to_remove: | |
break all | |
stack[stack_i] = key_distance | |
stack_i += 1 | |
while index_to_shift != last_index: | |
index_to_shift = last_index | |
stack_i = 0 | |
while true: | |
key_distance = stack[stack_i] | |
if key_distance >= shift_distance(index_to_shift, index_to_remove): | |
replace_slot(index_to_remove, index_to_shift) | |
index_to_remove = index_to_shift | |
break | |
else: | |
index_to_shift = prev(index_to_shift) | |
if index_to_shift == index_to_remove: | |
break all | |
clear_slot(index_to_remove) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment