-
-
Save justinvanwinkle/d9f04950083c4554835c1a35f9d22dad to your computer and use it in GitHub Desktop.
# I was looking for a rate limiting library to call rate limited apis as closely | |
# as possible to their enforced limits. I looked at the first few python libraries | |
# that I found, and when I glanced at the source, they were all clearly broken. | |
# Curious how this could be, I took all the top google and pip search results for: python rate limiting | |
# and tried to get them to do the wrong thing and fail to rate limit in situations that could come up | |
# in normal use (though in some cases very specific use) | |
# https://github.com/tomasbasham/ratelimit | |
# Where broken: | |
# https://github.com/tomasbasham/ratelimit/blob/master/ratelimit/decorators.py#L71 | |
# Result: you will often exceed the rate, sometimes by as much as 2x | |
# Rating: 0/5 stars, this library is fundementally broken, do not use it | |
# example breakage | |
from threading import Thread | |
from ratelimit import limits | |
from ratelimit import sleep_and_retry | |
from time import sleep | |
from time import time | |
TEN_SECONDS = 10 | |
started = time() | |
def main(): | |
threads = [Thread(target=action, daemon=True) for _ in range(10)] | |
start = time() | |
sleep(9) | |
for thread in threads: | |
thread.start() | |
for count in range(20): | |
call_api() | |
if time() - start < 12: | |
break | |
sleep(1) | |
calls = [] | |
def action(): | |
while True: | |
call_api() | |
@sleep_and_retry | |
@limits(calls=10, period=TEN_SECONDS) | |
def call_api(): | |
# this code should be rate limited to 10 calls in any 10 second period | |
# it will be called 20 times in less than 1 second. | |
calls.append(1) | |
print(time() - started, sum(calls)) | |
if __name__ == '__main__': | |
main() | |
# OUTPUT | |
""" | |
9.006941556930542 1 | |
9.007139444351196 2 | |
9.00729751586914 4 | |
9.00724458694458 3 | |
9.007545471191406 6 | |
9.007737636566162 7 | |
9.007864713668823 8 | |
9.0074622631073 5 | |
9.00792121887207 9 | |
9.00809097290039 10 | |
10.001093864440918 11 | |
10.001198291778564 12 | |
10.001333236694336 14 | |
10.001388311386108 15 | |
10.001487493515015 17 | |
10.001596450805664 19 | |
10.001261949539185 13 | |
10.001437902450562 16 | |
10.001664876937866 20 | |
10.001538276672363 18 | |
python test_rate_limiter.py 0.04s user 0.00s system 0% cpu 11.042 total | |
""" | |
# https://github.com/RazerM/ratelimiter | |
# Where broken: | |
# https://github.com/RazerM/ratelimiter/blob/master/ratelimiter/_sync.py#L66 | |
# Result: you can arbitrarily exceed the rate limit | |
# Rating: 0/5 stars, this library is fundementally broken to an absurd degree, | |
# do not use it | |
# example breakage | |
from threading import Thread | |
from ratelimiter import RateLimiter | |
from time import sleep | |
from time import time | |
started = time() | |
def main(): | |
threads = [Thread(target=action, daemon=True) for _ in range(50)] | |
start = time() | |
for thread in threads: | |
thread.start() | |
for count in range(40): | |
call_api() | |
sleep(1) | |
calls = [] | |
def action(): | |
while True: | |
call_api() | |
rl = RateLimiter(10, 1) | |
def call_api(): | |
with rl: | |
calls.append(1) | |
print(time() - started, sum(calls)) | |
sleep(10) | |
if __name__ == '__main__': | |
main() | |
# Output | |
'''0.0005972385406494141 1 | |
0.0008141994476318359 2 | |
0.001020669937133789 3 | |
0.0012218952178955078 4 | |
0.001392364501953125 5 | |
0.0015513896942138672 6 | |
0.0016713142395019531 7 | |
0.0018002986907958984 8 | |
0.0020058155059814453 9 | |
0.0022101402282714844 10 | |
0.0024023056030273438 11 | |
0.002510547637939453 12 | |
0.002669811248779297 13 | |
0.002866506576538086 14 | |
0.0030846595764160156 15 | |
0.0033309459686279297 16 | |
0.0035207271575927734 17 | |
0.0037696361541748047 18 | |
0.0039501190185546875 19 | |
0.0042018890380859375 20 | |
0.00445556640625 21 | |
0.004648447036743164 22 | |
0.004866838455200195 23 | |
0.0050928592681884766 24 | |
0.0053119659423828125 25 | |
0.0054950714111328125 26 | |
0.005700588226318359 27 | |
0.0059320926666259766 28 | |
0.0061359405517578125 29 | |
0.006322622299194336 30 | |
0.006501436233520508 31 | |
0.00665736198425293 32 | |
0.0068204402923583984 33 | |
0.00705265998840332 34 | |
0.007256984710693359 35 | |
0.007478237152099609 36 | |
0.007687568664550781 37 | |
0.007866144180297852 38 | |
0.00804591178894043 39 | |
0.008222103118896484 40 | |
0.008393526077270508 41 | |
0.008669376373291016 42 | |
0.008849859237670898 43 | |
0.009053468704223633 44 | |
0.009227514266967773 45 | |
0.009397029876708984 46 | |
0.009583473205566406 47 | |
0.00977182388305664 48 | |
0.009982109069824219 49 | |
0.010182380676269531 50 | |
0.010230064392089844 51 | |
''' | |
# https://gist.github.com/gregburek/1441055 | |
# Where broken: | |
# Not fundementally broken when run in single thread, does not work at all | |
# in threaded environment | |
# Note: There is a 'threaded' version in the comments, this sleeps inside a | |
# lock and therefore makes no sense. It will rate limit calls but it will | |
# also serialize them, so it's basically pointless. Then there are | |
# progressively more complex riffs on the idea which are not evaluated here | |
# Result: correct as far as it can be used | |
# Rating: 3/5 stars, not broken but lack of threading limits usefulness | |
# https://github.com/kburnik/rate-limiter | |
# Where broken: | |
# I didn't bother to test this, it doesn't even attempt to be thread safe, | |
# it appears to be written by a beginner and it's a fine little learning | |
# project. | |
# Rating: 0/5, don't use this for any purpose. | |
# https://medium.com/clover-platform-blog/conquering-api-rate-limiting-dcac5552714d | |
# Where broken: | |
# This was a google result about python and rate-limiting, but it just shows a | |
# niave exponential backoff implementation. Never use exponential backoff | |
# without some kind of reasonable maximum backoff value, exponential functions | |
# grow very quickly. If an API goes down for a bit, you don't need | |
# to wait twice as long as it was down before you try it again. It never | |
# makes any sense. | |
# https://github.com/cmeister2/requests_ratelimit_adapter | |
# Where broken: | |
# Only counts within pre-defined intervals, has same issue as ratelimit above. | |
# https://github.com/enricobacis/limit | |
# Where broken: | |
# Doesn't schedule the resources to be made available again until after the | |
# limited function runs. This one almost had me convinced that it worked, | |
# and it has the critical attribute of actually keeping you below your | |
# requested rate limit no matter what, and in a threadsafe way. | |
# Rating: 2/5, you probably don't want to be kept arbitrarily below the rate | |
# you specify. Good work on actually rate limiting though. | |
# example breakage: | |
from threading import Thread | |
from time import sleep | |
from time import time | |
from limit import limit | |
started = time() | |
def main(): | |
threads = [Thread(target=action, daemon=True) for _ in range(50)] | |
# let it build up some steam | |
for thread in threads: | |
thread.start() | |
for thread in threads: | |
thread.join() | |
calls = [] | |
def action(): | |
while True: | |
call_api() | |
@limit(10, 1) # 10 per second | |
def call_api(): | |
calls.append(1) | |
print(time() - started, sum(calls)) | |
sleep(10) | |
if __name__ == '__main__': | |
main() | |
# Output: (should allow 10 per second) | |
''' | |
0.0006387233734130859 1 | |
0.0008552074432373047 2 | |
0.0010530948638916016 3 | |
0.0012822151184082031 4 | |
0.0014865398406982422 5 | |
0.0016107559204101562 6 | |
0.0018110275268554688 7 | |
0.00199127197265625 8 | |
0.0022041797637939453 9 | |
0.0023641586303710938 10 | |
11.006683111190796 11 | |
11.010920763015747 12 | |
11.011091947555542 13 | |
11.011242866516113 14 | |
11.011475563049316 15 | |
11.01183557510376 16 | |
11.012040853500366 17 | |
11.012373208999634 18 | |
11.013072729110718 19 | |
11.013236045837402 20 | |
''' | |
# https://limits.readthedocs.io/en/stable/ | |
# Where broken: | |
# Seems to work! Complex to set up, documentation isn't great, but | |
# seems flexible and I don't see how it would break. | |
# Rating: 4.5/5, seems like a good library to me, although I didn't | |
# evaluate the Redis or Memcached backends. | |
# I wouldn't be very sporting if I didn't offer my own implementation. | |
# My goal here was to have something correct, relatively efficient, | |
# but that also didn't cluster calls in bursts, and which would hug the | |
# rate limit set as closely as possible. Please feel free to embarass me | |
# with examples of how it fails! | |
from functools import wraps | |
from time import monotonic | |
from time import sleep | |
from threading import RLock | |
class RateLimiter: | |
def __init__(self, rate_limit, replenish_seconds): | |
self.capacity = rate_limit | |
self._tokens = 0 | |
self.replenish_seconds = replenish_seconds | |
self.refill_per_second = rate_limit / replenish_seconds | |
self.last_update = monotonic() | |
self._lock = RLock() | |
def go_or_fail(self): | |
with self._lock: | |
if 1 <= self.tokens: | |
self._tokens -= 1 | |
return True | |
return False | |
@property | |
def expected_wait(self): | |
with self._lock: | |
tokens = self.tokens | |
if tokens >= 1: | |
return 0 | |
expected_wait = (1 - tokens) / self.refill_per_second | |
return expected_wait | |
def wait(self): | |
while not self.go_or_fail(): | |
sleep(self.expected_wait) | |
def __call__(self, wrapee): | |
@wraps(wrapee) | |
def wrapper(*args, **kw): | |
self.wait() | |
return wrapee(*args, **kw) | |
return wrapper | |
@property | |
def tokens(self): | |
with self._lock: | |
now = monotonic() | |
delta = self.refill_per_second * (now - self.last_update) | |
self._tokens = min(self.capacity, self._tokens + delta) | |
self.last_update = now | |
return self._tokens | |
# Example of use | |
from threading import Thread | |
from time import sleep | |
from time import time | |
from rate_limiter import RateLimiter | |
rl = RateLimiter(10, 1) | |
started = time() | |
def main(): | |
threads = [Thread(target=action, daemon=True) for _ in range(100)] | |
# let it build up some steam | |
for thread in threads: | |
thread.start() | |
for thread in threads: | |
thread.join() | |
calls = [] | |
def action(): | |
while True: | |
call_api() | |
@rl | |
def call_api(): | |
calls.append(1) | |
print(time() - started, sum(calls)) | |
sleep(10) | |
if __name__ == '__main__': | |
main() | |
# Output: | |
''' | |
0.10018634796142578 1 | |
0.20020723342895508 2 | |
0.30017566680908203 3 | |
0.40017271041870117 4 | |
0.5002062320709229 5 | |
0.6002218723297119 6 | |
0.7001841068267822 7 | |
0.8001668453216553 8 | |
0.9001715183258057 9 | |
1.0001983642578125 10 | |
1.1002120971679688 11 | |
1.2002060413360596 12 | |
1.3002047538757324 13 | |
1.4002130031585693 14 | |
1.5002012252807617 15 | |
1.6002020835876465 16 | |
1.7002291679382324 17 | |
1.8001976013183594 18 | |
1.9002094268798828 19 | |
2.0001697540283203 20 | |
2.100170373916626 21 | |
2.2001984119415283 22 | |
2.300184488296509 23 | |
2.4002139568328857 24 | |
2.5002031326293945 25 | |
2.600311517715454 26 | |
2.700232982635498 27 | |
2.800203800201416 28 | |
2.9001505374908447 29 | |
3.000216007232666 30 | |
3.1002166271209717 31 | |
3.200213670730591 32 | |
3.300201416015625 33 | |
3.4001760482788086 34 | |
3.5001845359802246 35 | |
3.600203037261963 36 | |
3.7001852989196777 37 | |
3.800187110900879 38 | |
3.9002015590667725 39 | |
4.000197410583496 40 | |
4.1002280712127686 41 | |
4.200199365615845 42 | |
4.300195932388306 43 | |
4.4001991748809814 44 | |
4.500220537185669 45 | |
4.600196838378906 46 | |
4.700223684310913 47 | |
4.800203323364258 48 | |
4.9001946449279785 49 | |
5.000195026397705 50 | |
5.1001951694488525 51 | |
5.200188398361206 52 | |
5.3001954555511475 53 | |
5.400208473205566 54 | |
5.500195741653442 55 | |
5.600203990936279 56 | |
5.700193405151367 57 | |
5.8002028465271 58 | |
5.900341987609863 59 | |
6.000202178955078 60 | |
6.100194215774536 61 | |
6.2001917362213135 62 | |
6.300190448760986 63 | |
6.400193214416504 64 | |
6.500201463699341 65 | |
''' |
"Seems to work" from this survey is high praise!
But am I missing something in limits?
I can't figure out how to make it block until the limiter has capacity.
I opened an issue on the repo to get more info: alisaifee/limits#221
(found in this thread tomasbasham/ratelimit#51 discussing maintained alternatives)
Oh hey that was me! I pretty much went through the same steps as this gist (and possibly all 7 stages of grief) while trying to find something that worked. FWIW, I am still using pyrate-limiter, added some features to it, and made requests-ratelimiter based on that.
One gotcha I'd add is that the latest "complete" version is pyrate-limiter v2.10. The current v3.x releases are a WIP rewrite that's missing some features I personally need (but may or may not apply to you).
i stumbled across this library https://github.com/vutran1710/PyrateLimiter and noticed it wasn't on your list, might be worth a look
(found in this thread tomasbasham/ratelimit#51 discussing maintained alternatives)
ps: i've been using
tomasbasham/ratelimit
for a couple of years and didn't realise it can surpass rate limits, thanks for pointing it out!