Skip to content

Instantly share code, notes, and snippets.

@TerenceLiu98
Last active April 20, 2025 13:17
Show Gist options
  • Save TerenceLiu98/5bab5b05a5fa6ec781e66ef8b184891c to your computer and use it in GitHub Desktop.
Save TerenceLiu98/5bab5b05a5fa6ec781e66ef8b184891c to your computer and use it in GitHub Desktop.
genai-async.py
import os
import csv
import time
import json
import random
import pandas as pd
from tqdm import tqdm
from google import genai
import random
import asyncio
from tqdm.asyncio import tqdm_asyncio
import time
import threading
from concurrent.futures import ThreadPoolExecutor
import asyncio
from tqdm.asyncio import tqdm_asyncio
class RateLimiter:
def __init__(self, rpm):
self.max_calls = rpm
self.interval = 60.0 / rpm
self.lock = threading.Lock()
self.last_call = 0.0
def wait(self):
with self.lock:
now = time.time()
wait_time = self.interval - (now - self.last_call)
if wait_time > 0:
time.sleep(wait_time)
self.last_call = time.time()
rate_limiter = RateLimiter(rpm=30)
def sync_infer(prompt):
rate_limiter.wait()
client = genai.Client(api_key=API_KEY)
try:
response = client.models.generate_content(model=model_name, contents=prompt)
return {"prompt": prompt, "response": extract_answer(response.text)}
except Exception as e:
print(f"Error: {str(e)}")
return None
async def batch_inference(prompts, max_workers=5):
loop = asyncio.get_running_loop()
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [loop.run_in_executor(executor, sync_infer, p) for p in prompts]
results = await tqdm_asyncio.gather(*futures, desc="Running Inference")
return results
prompts = ["Hello, Who are you", "Tell me about yourself", "为什么天空是蓝色的", "如何证明 1+1 = 2"]
results = asyncio.run(batch_inference(prompts=prompts, max_concurrency=4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment