DanielThomas · April 21, 2025 23:48
diff --git a/HashIndexTest.java b/HashIndexTest.java
 /*
 * Copyright 2024 Netflix Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 package com.netflix.index;

 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
 import org.springframework.core.io.ClassPathResource;

 import java.io.*;
 import java.util.*;
 import java.util.function.ToLongFunction;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import java.util.stream.LongStream;
 import java.util.zip.GZIPInputStream;

 import static org.junit.jupiter.api.Assertions.*;

 class HashIndexTest {

    private static HashIndex<String> single() {
        Spliterator<String> source = Collections.singletonList("").spliterator();
        return HashIndex.ofLong(source, input -> 0, input -> 0);
    }

    private static HashIndex<String> singleWithMaxBytes(long value, int maxBytes) {
        Spliterator<String> source = Collections.singletonList("").spliterator();
        return ofBytes(source, input -> 0, input -> value, maxBytes);
    }

    private static <T> HashIndex<T> ofBytes(Spliterator<T> source, ToLongFunction<T> keyMapper, ToLongFunction<T> valueMapper, int maxBytes) {
        switch(maxBytes) {
            case Byte.BYTES: return HashIndex.ofByte(source, keyMapper, valueMapper);
            case Short.BYTES: return HashIndex.ofShort(source, keyMapper, valueMapper);
            case Integer.BYTES: return HashIndex.ofInt(source, keyMapper, valueMapper);
            case Long.BYTES: return HashIndex.ofLong(source, keyMapper, valueMapper);
        }
        throw new IllegalArgumentException();
    }

    @Test
    public void mapMissing() {
        HashIndex<String> index = single();
        String actual = index.map(0, value -> null);

        assertNull(actual);
    }

    @Test
    public void mapMaxKey() {
        HashIndex<String> index = single();
        String actual = index.map(Long.MAX_VALUE, value -> "value");

        assertNull(actual);
    }

    @Test
    public void mapPresent() {
        HashIndex<String> index = single();
        String actual = index.map(0, value -> "value");

        assertEquals("value", actual);
    }

    @Test
    public void mapNotEqual() {
        HashIndex<String> index = single();
        String actual = index.map(0, value -> null);

        assertNull(actual);
    }

    @Test
    public void mapDuplicateKeys() {
        Spliterator<Integer> source = IntStream.rangeClosed(1, 5).boxed().spliterator();
        HashIndex<Integer> index = HashIndex.ofInt(source, input -> 1, input -> input);

        List<Long> values = new ArrayList<>();

        Integer actual = index.map(1, value -> {
            values.add(value);
            if (value == 5) {
                return 5;
            }
            return null;
        });

        assertEquals(5, actual);
        assertEquals(5, values.size());
    }

    @Test
    public void indexMaxTableSize() {
        Spliterator<Integer> source = Spliterators.spliterator(Collections.<Integer>emptyList().iterator(), Integer.MAX_VALUE, Spliterator.SIZED);
        assertThrows(ArithmeticException.class, () -> HashIndex.ofInt(source, input -> 1, input -> input));
    }

    @ParameterizedTest
    @ValueSource(longs = {(long) Byte.MAX_VALUE, (long) Short.MAX_VALUE, (long) Integer.MAX_VALUE, Long.MAX_VALUE})
    public void indexMaxValueDisallowed(long expected) {
        int maxBytes = (Long.SIZE - Long.numberOfLeadingZeros(expected) + 1) / Byte.SIZE;
        IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> singleWithMaxBytes(expected, maxBytes));
        assertEquals("max value is reserved", exception.getMessage());
    }

    @ParameterizedTest
    @ValueSource(longs = {(long) Byte.MAX_VALUE, (long) Short.MAX_VALUE, (long) Integer.MAX_VALUE, Long.MAX_VALUE})
    public void indexMaxValueMinusOne(long expected) {
        int maxBytes = (Long.SIZE - Long.numberOfLeadingZeros(expected) + 1) / Byte.SIZE;
        expected = expected - 1;
        HashIndex<String> index = singleWithMaxBytes(expected, maxBytes);
        String actual = index.map(0, String::valueOf);

        assertEquals(expected, Long.valueOf(actual));
    }

    @ParameterizedTest
    @ValueSource(longs = {(long) Byte.MIN_VALUE, (long) Short.MIN_VALUE, (long) Integer.MIN_VALUE, Long.MIN_VALUE})
    public void indexMinValue(long expected) {
        int maxBytes = (Long.SIZE - Long.numberOfLeadingZeros(~expected) + 1) / Byte.SIZE;
        HashIndex<String> index = singleWithMaxBytes(expected, maxBytes);
        String actual = index.map(0, String::valueOf);

        assertEquals(expected, Long.valueOf(actual));
    }

    @ParameterizedTest
    @ValueSource(ints = {Byte.BYTES, Short.BYTES, Integer.BYTES})
    public void indexNarrowingDisallowed(int maxBytes) {
        Exception exception = assertThrows(IllegalArgumentException.class, () -> singleWithMaxBytes(1L << maxBytes * Byte.SIZE, maxBytes));
        assertEquals("value out of range", exception.getMessage());
    }

    @Test
    public void indexLargeKeySet() throws IOException {
        ClassPathResource resource = new ClassPathResource("keys-sorted.txt.gz");
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(resource.getInputStream())))) {
            List<Long> lines = reader.lines().map(Long::parseLong).collect(Collectors.toList());
            HashIndex<Long> index = HashIndex.ofInt(lines.spliterator(), input -> input, input -> input);
            lines.forEach(key -> assertEquals(key, index.map(key, candidate -> candidate == key ? candidate : null)));
        }
    }

    @Disabled
    @ParameterizedTest
    @ValueSource(ints = {Byte.BYTES, Short.BYTES, Integer.BYTES, Long.BYTES})
    public void indexAndGetAllValues(int maxBytes) {
        int numBits = maxBytes * Byte.SIZE;
        long minValue = -(1L << numBits - 1);
        long maxValue = ~minValue;
        int batch = 10000000;
        long start = minValue + 1;
        while (true) {
            long end = Math.min(maxValue, start + batch);
            Spliterator<Long> source = LongStream.range(start, end).boxed().spliterator();
            HashIndex<Long> index = ofBytes(source, value -> value, value -> value, maxBytes);
            LongStream.range(start, end).forEach(value -> index.map(value, candidate -> candidate == value ? candidate : null));
            start = end;
            if (end == maxValue) break;
        }
    }

 }
	/*
	* Copyright 2024 Netflix Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.netflix.index;

	import org.junit.jupiter.api.Disabled;
	import org.junit.jupiter.api.Test;
	import org.junit.jupiter.params.ParameterizedTest;
	import org.junit.jupiter.params.provider.ValueSource;
	import org.springframework.core.io.ClassPathResource;

	import java.io.*;
	import java.util.*;
	import java.util.function.ToLongFunction;
	import java.util.stream.Collectors;
	import java.util.stream.IntStream;
	import java.util.stream.LongStream;
	import java.util.zip.GZIPInputStream;

	import static org.junit.jupiter.api.Assertions.*;

	class HashIndexTest {

	private static HashIndex<String> single() {
	Spliterator<String> source = Collections.singletonList("").spliterator();
	return HashIndex.ofLong(source, input -> 0, input -> 0);
	}

	private static HashIndex<String> singleWithMaxBytes(long value, int maxBytes) {
	Spliterator<String> source = Collections.singletonList("").spliterator();
	return ofBytes(source, input -> 0, input -> value, maxBytes);
	}

	private static <T> HashIndex<T> ofBytes(Spliterator<T> source, ToLongFunction<T> keyMapper, ToLongFunction<T> valueMapper, int maxBytes) {
	switch(maxBytes) {
	case Byte.BYTES: return HashIndex.ofByte(source, keyMapper, valueMapper);
	case Short.BYTES: return HashIndex.ofShort(source, keyMapper, valueMapper);
	case Integer.BYTES: return HashIndex.ofInt(source, keyMapper, valueMapper);
	case Long.BYTES: return HashIndex.ofLong(source, keyMapper, valueMapper);
	}
	throw new IllegalArgumentException();
	}

	@Test
	public void mapMissing() {
	HashIndex<String> index = single();
	String actual = index.map(0, value -> null);

	assertNull(actual);
	}

	@Test
	public void mapMaxKey() {
	HashIndex<String> index = single();
	String actual = index.map(Long.MAX_VALUE, value -> "value");

	assertNull(actual);
	}

	@Test
	public void mapPresent() {
	HashIndex<String> index = single();
	String actual = index.map(0, value -> "value");

	assertEquals("value", actual);
	}

	@Test
	public void mapNotEqual() {
	HashIndex<String> index = single();
	String actual = index.map(0, value -> null);

	assertNull(actual);
	}

	@Test
	public void mapDuplicateKeys() {
	Spliterator<Integer> source = IntStream.rangeClosed(1, 5).boxed().spliterator();
	HashIndex<Integer> index = HashIndex.ofInt(source, input -> 1, input -> input);

	List<Long> values = new ArrayList<>();

	Integer actual = index.map(1, value -> {
	values.add(value);
	if (value == 5) {
	return 5;
	}
	return null;
	});

	assertEquals(5, actual);
	assertEquals(5, values.size());
	}

	@Test
	public void indexMaxTableSize() {
	Spliterator<Integer> source = Spliterators.spliterator(Collections.<Integer>emptyList().iterator(), Integer.MAX_VALUE, Spliterator.SIZED);
	assertThrows(ArithmeticException.class, () -> HashIndex.ofInt(source, input -> 1, input -> input));
	}

	@ParameterizedTest
	@ValueSource(longs = {(long) Byte.MAX_VALUE, (long) Short.MAX_VALUE, (long) Integer.MAX_VALUE, Long.MAX_VALUE})
	public void indexMaxValueDisallowed(long expected) {
	int maxBytes = (Long.SIZE - Long.numberOfLeadingZeros(expected) + 1) / Byte.SIZE;
	IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> singleWithMaxBytes(expected, maxBytes));
	assertEquals("max value is reserved", exception.getMessage());
	}

	@ParameterizedTest
	@ValueSource(longs = {(long) Byte.MAX_VALUE, (long) Short.MAX_VALUE, (long) Integer.MAX_VALUE, Long.MAX_VALUE})
	public void indexMaxValueMinusOne(long expected) {
	int maxBytes = (Long.SIZE - Long.numberOfLeadingZeros(expected) + 1) / Byte.SIZE;
	expected = expected - 1;
	HashIndex<String> index = singleWithMaxBytes(expected, maxBytes);
	String actual = index.map(0, String::valueOf);

	assertEquals(expected, Long.valueOf(actual));
	}

	@ParameterizedTest
	@ValueSource(longs = {(long) Byte.MIN_VALUE, (long) Short.MIN_VALUE, (long) Integer.MIN_VALUE, Long.MIN_VALUE})
	public void indexMinValue(long expected) {
	int maxBytes = (Long.SIZE - Long.numberOfLeadingZeros(~expected) + 1) / Byte.SIZE;
	HashIndex<String> index = singleWithMaxBytes(expected, maxBytes);
	String actual = index.map(0, String::valueOf);

	assertEquals(expected, Long.valueOf(actual));
	}

	@ParameterizedTest
	@ValueSource(ints = {Byte.BYTES, Short.BYTES, Integer.BYTES})
	public void indexNarrowingDisallowed(int maxBytes) {
	Exception exception = assertThrows(IllegalArgumentException.class, () -> singleWithMaxBytes(1L << maxBytes * Byte.SIZE, maxBytes));
	assertEquals("value out of range", exception.getMessage());
	}

	@Test
	public void indexLargeKeySet() throws IOException {
	ClassPathResource resource = new ClassPathResource("keys-sorted.txt.gz");
	try (BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(resource.getInputStream())))) {
	List<Long> lines = reader.lines().map(Long::parseLong).collect(Collectors.toList());
	HashIndex<Long> index = HashIndex.ofInt(lines.spliterator(), input -> input, input -> input);
	lines.forEach(key -> assertEquals(key, index.map(key, candidate -> candidate == key ? candidate : null)));
	}
	}

	@Disabled
	@ParameterizedTest
	@ValueSource(ints = {Byte.BYTES, Short.BYTES, Integer.BYTES, Long.BYTES})
	public void indexAndGetAllValues(int maxBytes) {
	int numBits = maxBytes * Byte.SIZE;
	long minValue = -(1L << numBits - 1);
	long maxValue = ~minValue;
	int batch = 10000000;
	long start = minValue + 1;
	while (true) {
	long end = Math.min(maxValue, start + batch);
	Spliterator<Long> source = LongStream.range(start, end).boxed().spliterator();
	HashIndex<Long> index = ofBytes(source, value -> value, value -> value, maxBytes);
	LongStream.range(start, end).forEach(value -> index.map(value, candidate -> candidate == value ? candidate : null));
	start = end;
	if (end == maxValue) break;
	}
	}

	}