TerenceLiu98 · April 20, 2025 13:17
diff --git a/genai-async.py b/genai-async.py
 import os
 import csv
 import time
 import json
 import random
 import pandas as pd
 from tqdm import tqdm
 from google import genai

 import random
 import asyncio
 from tqdm.asyncio import tqdm_asyncio 

 import time
 import threading
 from concurrent.futures import ThreadPoolExecutor
 import asyncio
 from tqdm.asyncio import tqdm_asyncio


 class RateLimiter:
    def __init__(self, rpm):
        self.max_calls = rpm
        self.interval = 60.0 / rpm
        self.lock = threading.Lock()
        self.last_call = 0.0

    def wait(self):
        with self.lock:
            now = time.time()
            wait_time = self.interval - (now - self.last_call)
            if wait_time > 0:
                time.sleep(wait_time)
            self.last_call = time.time()

 rate_limiter = RateLimiter(rpm=30)

 def sync_infer(prompt):
    rate_limiter.wait()  
    client = genai.Client(api_key=API_KEY)
    try:
        response = client.models.generate_content(model=model_name, contents=prompt)
        return {"prompt": prompt, "response": extract_answer(response.text)}
    except Exception as e:
        print(f"Error: {str(e)}")
        return None

 async def batch_inference(prompts, max_workers=5):
    loop = asyncio.get_running_loop()
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [loop.run_in_executor(executor, sync_infer, p) for p in prompts]
        results = await tqdm_asyncio.gather(*futures, desc="Running Inference")
        return results 


 prompts = ["Hello, Who are you", "Tell me about yourself", "为什么天空是蓝色的", "如何证明 1+1 = 2"]
 results = asyncio.run(batch_inference(prompts=prompts, max_concurrency=4))
	import os
	import csv
	import time
	import json
	import random
	import pandas as pd
	from tqdm import tqdm
	from google import genai

	import random
	import asyncio
	from tqdm.asyncio import tqdm_asyncio

	import time
	import threading
	from concurrent.futures import ThreadPoolExecutor
	import asyncio
	from tqdm.asyncio import tqdm_asyncio


	class RateLimiter:
	def __init__(self, rpm):
	self.max_calls = rpm
	self.interval = 60.0 / rpm
	self.lock = threading.Lock()
	self.last_call = 0.0

	def wait(self):
	with self.lock:
	now = time.time()
	wait_time = self.interval - (now - self.last_call)
	if wait_time > 0:
	time.sleep(wait_time)
	self.last_call = time.time()

	rate_limiter = RateLimiter(rpm=30)

	def sync_infer(prompt):
	rate_limiter.wait()
	client = genai.Client(api_key=API_KEY)
	try:
	response = client.models.generate_content(model=model_name, contents=prompt)
	return {"prompt": prompt, "response": extract_answer(response.text)}
	except Exception as e:
	print(f"Error: {str(e)}")
	return None

	async def batch_inference(prompts, max_workers=5):
	loop = asyncio.get_running_loop()
	with ThreadPoolExecutor(max_workers=max_workers) as executor:
	futures = [loop.run_in_executor(executor, sync_infer, p) for p in prompts]
	results = await tqdm_asyncio.gather(*futures, desc="Running Inference")
	return results


	prompts = ["Hello, Who are you", "Tell me about yourself", "为什么天空是蓝色的", "如何证明 1+1 = 2"]
	results = asyncio.run(batch_inference(prompts=prompts, max_concurrency=4))