Last active
July 19, 2023 06:47
-
-
Save guss77/7cc92e5631b069e965de6a7cebd5a477 to your computer and use it in GitHub Desktop.
Counter based rate limiter class for Java.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package coil.geek; | |
import java.util.concurrent.atomic.AtomicLong; | |
/** | |
* A token-based rate limiter, inspired by https://stackoverflow.com/a/55349487/53538 | |
* | |
* The original implementation boasts an O(1) time complexity as well as O(1) space complexity, but controls | |
* concurrency using synchronization. This implementation uses atomic operations to offer lock-less concurrency | |
* and reentrancy. | |
* | |
* This implementation also attempts to improve on the original in the following aspects: | |
* 1. Allow lock-less concurrent access - the call to test the rate will never block on concurrent access. | |
* 2. Somewhat faster by doing less calculations for each call. | |
* 3. Allow differential costs - different calls can cost a different amount of tokens. | |
* 4. In addition to the the yes/no API (implemented by {@link #allowRequest()}, there's | |
* an API for implementations that can delay the request internally to facilitate the rate limit, | |
* so this API can tell the implementation how long to wait, and either check again after that | |
* or to just pre-consume the rate token. | |
* 5. For use cases that throttle performance by delaying responses, this implementation offers a | |
* usecase where the caller makes a single call into {@link #whenNextAllowed(int, boolean)}, gets a | |
* delay value and commits to performing the request - by specifying {@code true} in the second | |
* parameter, the token pool can be put into a deficit that will simply cause a request rate higher | |
* than the pool fill rate to produce larger and larger delay values. | |
* 6. Allow a fill rate that is different than the limit using the {@link #RateLimiter(int, int, long)} | |
* constructor where the first argument is the limit and you can specify a fill rate using the second | |
* and third argument as with the shorter constructor. | |
* | |
* The other main difference between this and the original is that in order to use CAS to implement | |
* lock-less concurrency, this implementation dropped the arbitrary precision and instead uses cardinal | |
* nanoseconds to measure passage of time. The interval is still specified as milliseconds - which means | |
* that the amount of available tokens is measured in 0.000001 (millionth) tokens (affectionately | |
* nicknamed tockins here) which allows us a 6 digits of precision - which we believe should be enough | |
* for most uses. | |
* | |
* For example, to create a limiter that will allow a concurrent rate of 10 calls per second with a 50 | |
* call burst, use: {@code new RateLimiter(50, 50, 10, 1000)} | |
* | |
* License: CC BY-SA 4.0. | |
* @author Oded Arbel <[email protected]>, https://stackoverflow.com/users/8781330/tonywl | |
*/ | |
public class RateLimiter { | |
private final static long TOKEN_SIZE = 1_000_000 /* tockins per token */; | |
private final double tokenRate; // measured in tokens per ms | |
private final double tockinRate; // measured in tockins per ms | |
private final long tockinsLimit; | |
private AtomicLong available; | |
private AtomicLong lastTimeStamp; | |
/** | |
* Create a new rate limiter with the token fill rate specified as | |
* {@code limit}/{@code interval} and a maximum token pool size of {@code limit} | |
* @param limit The maximum number of tokens in the pool | |
* @param interval The time for the pool to be completely filled, in ms | |
*/ | |
public RateLimiter(int limit, long interval) { | |
this(0, limit, limit, interval); | |
} | |
/** | |
* Create a new rate limiter with the token fill rate specified as | |
* {@code fill}/{@code interval} and a maximum token pool size of {@code limit} | |
* @param limit The maximum number of tokens in the pool (burst size) | |
* @param fill How many tokens will be filled in the pool by waiting {@code interval} time | |
* @param interval How long will it take to get {@code fill} tokens back in the pool in ms | |
*/ | |
public RateLimiter(int limit, int fill, long interval) { | |
this(0, limit, fill, interval); | |
} | |
/** | |
* Create a new rate limiter with the token fill rate specified as | |
* {@code fill}/{@code interval} and a maximum token pool size of {@code limit}, starting | |
* with a {@code prefill} amount of tokens ready to be used. | |
* @param prefill instead of starting with an empty pool, assume we "start from rest" and | |
* have tokens to consume. This value is clamped to {@code limit}. | |
* @param limit The maximum number of tokens in the pool (burst size) | |
* @param fill How many tokens will be filled in the pool by waiting {@code interval} time | |
* @param interval How long will it take to get {@code fill} tokens back in the pool in ms | |
*/ | |
public RateLimiter(int prefill, int limit, int fill, long interval) { | |
this.tokenRate = (double)fill / interval; | |
this.tockinsLimit = TOKEN_SIZE * limit; | |
this.tockinRate = tokenRate * TOKEN_SIZE; | |
this.lastTimeStamp = new AtomicLong(System.nanoTime()); | |
this.available = new AtomicLong(Math.max(prefill, limit) * TOKEN_SIZE); | |
} | |
/** | |
* Check if a request is allowed through the rate limiter. | |
* If this call has returned {@code true} then a rate token was consumed, otherwise | |
* the request should be blocked. | |
* @return whether to allow the request | |
*/ | |
public boolean allowRequest() { | |
return whenNextAllowed(1, false) == 0; | |
} | |
/** | |
* Check if a request is allowed through the rate limiter, that has a token cost higher than 1. | |
* If this call has returned {@code true} then a rate token was consumed, otherwise | |
* the request should be blocked. | |
* @param cost how many tokens to consume from the pool, if the request is allowed | |
* @return whether to allow the request | |
*/ | |
public boolean allowRequest(int cost) { | |
return whenNextAllowed(cost, false) == 0; | |
} | |
/** | |
* Check when will the next call be allowed, according to the specified rate. | |
* The value returned is in milliseconds. If the result is 0 - or if {@code alwaysConsume} was | |
* specified then the RateLimiter has recorded that the call has been allowed. | |
* @param alwaysConsume if set to {@code true} this method assumes that the caller will delay | |
* the action that is rate limited but will perform it without checking again - so it will | |
* consume the specified number of tokens as if the action has gone through. This means that | |
* the pool can get into a deficit, which will further delay additional actions. | |
* @return how many milliseconds before this request should be let through. | |
*/ | |
public long whenNextAllowed(boolean alwaysConsume) { | |
return whenNextAllowed(1, alwaysConsume); | |
} | |
/** | |
* Check when will the next call be allowed, according to the specified rate. | |
* The value returned is in milliseconds. If the result is 0 - or if {@code alwaysConsume} was | |
* specified then the RateLimiter has recorded that the call has been allowed. | |
* @param cost How costly is the requested action. The base rate is 1 token per request, | |
* but the client can declare a more costly action that consumes more tokens. | |
* @param alwaysConsume if set to {@code true} this method assumes that the caller will delay | |
* the action that is rate limited but will perform it without checking again - so it will | |
* consume the specified number of tokens as if the action has gone through. This means that | |
* the pool can get into a deficit, which will further delay additional actions. | |
* @return how many milliseconds before this request should be let through. | |
*/ | |
public long whenNextAllowed(int cost, boolean alwaysConsume) { | |
var now = System.nanoTime(); | |
var last = lastTimeStamp.getAndSet(now); | |
// calculate how many tockins we got since last call | |
// if the previous call was less than a microsecond ago, we still accumulate at least | |
// one tockin, which is probably more than we should, but this is too small to matter - right? | |
var add = (long)Math.ceil(tokenRate * (now - last)); | |
var nowAvailable = available.addAndGet(add); | |
while (nowAvailable > tockinsLimit) { | |
available.compareAndSet(nowAvailable, tockinsLimit); | |
nowAvailable = available.get(); | |
} | |
// answer the question | |
var toWait = (long)Math.ceil(Math.max(0, (TOKEN_SIZE - nowAvailable) / tockinRate)); | |
if (alwaysConsume || toWait == 0) // the caller will let the request go through, so consume a token now | |
available.addAndGet(-TOKEN_SIZE); | |
return toWait; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment