Skip to content

Instantly share code, notes, and snippets.

@bodiam
Created May 5, 2026 09:30
Show Gist options
  • Select an option

  • Save bodiam/c19c71a0990cabaa10d265302d976218 to your computer and use it in GitHub Desktop.

Select an option

Save bodiam/c19c71a0990cabaa10d265302d976218 to your computer and use it in GitHub Desktop.
/*
* Copyright 2014-2025 Real Logic Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.agrona.concurrent;
import org.agrona.UnsafeApi;
import java.util.Collection;
import java.util.function.Consumer;
/**
* One producer to one consumer concurrent queue that is array backed. The algorithm is a variation of Fast Flow
* adapted to work with the Java Memory Model on arrays by using {@link sun.misc.Unsafe}.
*
* @param <E> type of the elements stored in the {@link java.util.Queue}.
*/
@SuppressWarnings("removal")
public class OneToOneConcurrentArrayQueue<E> extends AbstractConcurrentArrayQueue<E>
{
/**
* Constructs queue with the requested capacity.
*
* @param requestedCapacity of the queue.
*/
public OneToOneConcurrentArrayQueue(final int requestedCapacity)
{
super(requestedCapacity);
}
/**
* {@inheritDoc}
*/
public boolean offer(final E e)
{
if (null == e)
{
throw new NullPointerException("Null is not a valid element");
}
final int capacity = this.capacity;
final Object[] buffer = this.buffer;
final long mask = capacity - 1;
long currentHead = headCache;
long bufferLimit = currentHead + capacity;
final long currentTail = UnsafeApi.getLong(this, TAIL_OFFSET);
if (currentTail >= bufferLimit)
{
currentHead = head;
bufferLimit = currentHead + capacity;
if (currentTail >= bufferLimit)
{
return false;
}
headCache = currentHead;
}
final long elementOffset = sequenceToBufferOffset(currentTail, mask);
UnsafeApi.putReferenceRelease(buffer, elementOffset, e);
UnsafeApi.putLongRelease(this, TAIL_OFFSET, currentTail + 1);
return true;
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public E poll()
{
final Object[] buffer = this.buffer;
final long currentHead = UnsafeApi.getLong(this, HEAD_OFFSET);
final long elementOffset = sequenceToBufferOffset(currentHead, capacity - 1);
final Object e = UnsafeApi.getReferenceVolatile(buffer, elementOffset);
if (null != e)
{
UnsafeApi.putReference(buffer, elementOffset, null);
UnsafeApi.putLongRelease(this, HEAD_OFFSET, currentHead + 1);
}
return (E)e;
}
/**
* {@inheritDoc}
*/
public int drain(final Consumer<E> elementConsumer)
{
return drain(elementConsumer, (int)(tail - head));
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public int drain(final Consumer<E> elementConsumer, final int limit)
{
final Object[] buffer = this.buffer;
final long mask = this.capacity - 1;
final long currentHead = head;
long nextSequence = currentHead;
final long limitSequence = nextSequence + limit;
while (nextSequence < limitSequence)
{
final long elementOffset = sequenceToBufferOffset(nextSequence, mask);
final Object item = UnsafeApi.getReferenceVolatile(buffer, elementOffset);
if (null == item)
{
break;
}
UnsafeApi.putReferenceRelease(buffer, elementOffset, null);
nextSequence++;
UnsafeApi.putLongRelease(this, HEAD_OFFSET, nextSequence);
elementConsumer.accept((E)item);
}
return (int)(nextSequence - currentHead);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
public int drainTo(final Collection<? super E> target, final int limit)
{
final Object[] buffer = this.buffer;
final long mask = this.capacity - 1;
long nextSequence = head;
int count = 0;
while (count < limit)
{
final long elementOffset = sequenceToBufferOffset(nextSequence, mask);
final Object item = UnsafeApi.getReferenceVolatile(buffer, elementOffset);
if (null == item)
{
break;
}
UnsafeApi.putReferenceRelease(buffer, elementOffset, null);
nextSequence++;
UnsafeApi.putLongRelease(this, HEAD_OFFSET, nextSequence);
count++;
target.add((E)item);
}
return count;
}
}
@bodiam
Copy link
Copy Markdown
Author

bodiam commented May 5, 2026

/*
 * Copyright 2014-2025 Real Logic Limited.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.agrona.concurrent;

import org.agrona.UnsafeApi;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Group;
import org.openjdk.jmh.annotations.GroupThreads;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.util.Collection;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;

/**
 * Benchmarks for the {@link OneToOneConcurrentArrayQueue#offer(Object)} and
 * {@link OneToOneConcurrentArrayQueue#poll()} hot paths.
 */
@Fork(
    value = 3,
    jvmArgsPrepend =
    {
        "-Dagrona.disable.bounds.checks=true",
        "--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED"
    })
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 10, time = 1)
public class OneToOneConcurrentArrayQueueBenchmark
{
    private static final int CAPACITY = 1024;
    private static final int BURST_SIZE = 1024;
    private static final Object ELEMENT = new Object();

    /**
     * Default constructor.
     */
    public OneToOneConcurrentArrayQueueBenchmark()
    {
    }

    /**
     * Benchmark the same-thread offer/poll cost with deterministic queue occupancy.
     *
     * @param state benchmark state.
     * @param blackhole for consuming the polled values.
     */
    @Benchmark
    @OperationsPerInvocation(BURST_SIZE)
    public void offerPoll(final QueueState state, final Blackhole blackhole)
    {
        final QueuedPipe<Object> queue = state.queue;

        for (int i = 0; i < BURST_SIZE; i++)
        {
            queue.offer(ELEMENT);
            blackhole.consume(queue.poll());
        }
    }

    /**
     * Producer side of the concurrent SPSC benchmark.
     *
     * @param state benchmark state.
     * @return whether the offer succeeded.
     */
    @Benchmark
    @Group("spsc")
    @GroupThreads(1)
    public boolean spscOffer(final GroupState state)
    {
        return state.queue.offer(ELEMENT);
    }

    /**
     * Consumer side of the concurrent SPSC benchmark.
     *
     * @param state benchmark state.
     * @return the polled value.
     */
    @Benchmark
    @Group("spsc")
    @GroupThreads(1)
    public Object spscPoll(final GroupState state)
    {
        return state.queue.poll();
    }

    /**
     * Benchmark state for same-thread round trips.
     */
    @State(Scope.Benchmark)
    public static class QueueState
    {
        /**
         * Queue implementation to benchmark.
         */
        @Param({ "BASELINE", "OPTIMIZED" })
        public QueueType queueType;

        private QueuedPipe<Object> queue;

        /**
         * Setup.
         */
        @Setup
        public void setup()
        {
            queue = queueType.newQueue();
        }
    }

    /**
     * Benchmark state shared by the producer and consumer in the SPSC group.
     */
    @State(Scope.Group)
    public static class GroupState
    {
        /**
         * Queue implementation to benchmark.
         */
        @Param({ "BASELINE", "OPTIMIZED" })
        public QueueType queueType;

        private QueuedPipe<Object> queue;

        /**
         * Setup.
         */
        @Setup
        public void setup()
        {
            queue = queueType.newQueue();
        }
    }

    /**
     * Queue implementations being compared.
     */
    public enum QueueType
    {
        /**
         * Implementation before the optimized offer/poll changes.
         */
        BASELINE
        {
            QueuedPipe<Object> newQueue()
            {
                return new BaselineOneToOneConcurrentArrayQueue<>(CAPACITY);
            }
        },

        /**
         * Current implementation.
         */
        OPTIMIZED
        {
            QueuedPipe<Object> newQueue()
            {
                return new OneToOneConcurrentArrayQueue<>(CAPACITY);
            }
        };

        abstract QueuedPipe<Object> newQueue();
    }

    /**
     * Runner method that allows starting benchmark directly.
     *
     * @param args for the main method.
     * @throws RunnerException in case if JMH throws while starting the benchmark.
     */
    public static void main(final String[] args) throws RunnerException
    {
        new Runner(new OptionsBuilder()
            .include(OneToOneConcurrentArrayQueueBenchmark.class.getName())
            .shouldFailOnError(true)
            .build())
            .run();
    }

    @SuppressWarnings("removal")
    private static final class BaselineOneToOneConcurrentArrayQueue<E> extends AbstractConcurrentArrayQueue<E>
    {
        private BaselineOneToOneConcurrentArrayQueue(final int requestedCapacity)
        {
            super(requestedCapacity);
        }

        public boolean offer(final E e)
        {
            if (null == e)
            {
                throw new NullPointerException("Null is not a valid element");
            }

            final int capacity = this.capacity;
            long currentHead = headCache;
            long bufferLimit = currentHead + capacity;
            final long currentTail = tail;
            if (currentTail >= bufferLimit)
            {
                currentHead = head;
                bufferLimit = currentHead + capacity;
                if (currentTail >= bufferLimit)
                {
                    return false;
                }

                headCache = currentHead;
            }

            final long elementOffset = sequenceToBufferOffset(currentTail, capacity - 1);

            UnsafeApi.putReferenceRelease(buffer, elementOffset, e);
            UnsafeApi.putLongRelease(this, TAIL_OFFSET, currentTail + 1);

            return true;
        }

        @SuppressWarnings("unchecked")
        public E poll()
        {
            final Object[] buffer = this.buffer;
            final long currentHead = head;
            final long elementOffset = sequenceToBufferOffset(currentHead, capacity - 1);

            final Object e = UnsafeApi.getReferenceVolatile(buffer, elementOffset);
            if (null != e)
            {
                UnsafeApi.putReferenceRelease(buffer, elementOffset, null);
                UnsafeApi.putLongRelease(this, HEAD_OFFSET, currentHead + 1);
            }

            return (E)e;
        }

        public int drain(final Consumer<E> elementConsumer)
        {
            return drain(elementConsumer, (int)(tail - head));
        }

        @SuppressWarnings("unchecked")
        public int drain(final Consumer<E> elementConsumer, final int limit)
        {
            final Object[] buffer = this.buffer;
            final long mask = this.capacity - 1;
            final long currentHead = head;
            long nextSequence = currentHead;
            final long limitSequence = nextSequence + limit;

            while (nextSequence < limitSequence)
            {
                final long elementOffset = sequenceToBufferOffset(nextSequence, mask);
                final Object item = UnsafeApi.getReferenceVolatile(buffer, elementOffset);

                if (null == item)
                {
                    break;
                }

                UnsafeApi.putReferenceRelease(buffer, elementOffset, null);
                nextSequence++;
                UnsafeApi.putLongRelease(this, HEAD_OFFSET, nextSequence);
                elementConsumer.accept((E)item);
            }

            return (int)(nextSequence - currentHead);
        }

        @SuppressWarnings("unchecked")
        public int drainTo(final Collection<? super E> target, final int limit)
        {
            final Object[] buffer = this.buffer;
            final long mask = this.capacity - 1;
            long nextSequence = head;
            int count = 0;

            while (count < limit)
            {
                final long elementOffset = sequenceToBufferOffset(nextSequence, mask);
                final Object item = UnsafeApi.getReferenceVolatile(buffer, elementOffset);
                if (null == item)
                {
                    break;
                }

                UnsafeApi.putReferenceRelease(buffer, elementOffset, null);
                nextSequence++;
                UnsafeApi.putLongRelease(this, HEAD_OFFSET, nextSequence);
                count++;
                target.add((E)item);
            }

            return count;
        }
    }
}

Results:

  offerPoll BASELINE   6.085 ns/op
  offerPoll OPTIMIZED  5.801 ns/op

  spsc BASELINE        20.035 ns/op
  spsc OPTIMIZED        8.223 ns/op
  spscOffer BASELINE   20.075 ns/op
  spscOffer OPTIMIZED   7.383 ns/op
  spscPoll BASELINE    19.995 ns/op
  spscPoll OPTIMIZED    9.063 ns/op

@bodiam
Copy link
Copy Markdown
Author

bodiam commented May 5, 2026

  • offer: caches buffer/mask, uses a plain UnsafeApi.getLong for producer-owned tail, keeps release publication semantics.
  • poll: uses a plain UnsafeApi.getLong for consumer-owned head, keeps the volatile element read, clears the slot with a plain store, then release-publishes head.

@bodiam
Copy link
Copy Markdown
Author

bodiam commented May 5, 2026

Token usage: total=194,013 input=178,771 (+ 3,697,536 cached) output=15,242 (reasoning 4,703)

@bodiam
Copy link
Copy Markdown
Author

bodiam commented May 5, 2026

Done using gpt 5.5 high

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment