Skip to content

Instantly share code, notes, and snippets.

@grahama1970
Last active December 20, 2024 14:44
Show Gist options
  • Save grahama1970/ebe5fa78eab07efea16c62b4dce4f4f7 to your computer and use it in GitHub Desktop.
Save grahama1970/ebe5fa78eab07efea16c62b4dce4f4f7 to your computer and use it in GitHub Desktop.
Comaparing lorax requests to openai call: curl = 11 seconds, request = 19 seconds, OpenAI: 30 seconds
2024-12-20 09:39:32.640 | INFO | __main__:run_curl_version:10 -
=== Running curl version ===
2024-12-20 09:39:32.641 | INFO | __main__:run_curl_version:43 - Initial request time: 0.00 seconds
2024-12-20 09:39:32.641 | INFO | __main__:run_curl_version:47 - Response tokens:
To determine the number of rugby players on a touch rugby team, we can refer to the relevant section of the document.
1. **Understanding Team Composition**: The document states that a team consists of a maximum of 14 players. However, this number includes reserves, meaning that only six (6) players are allowed on the field at any given time during a match.
2. **Player Limitation**: Additionally, teams are encouraged to include mixed genders (four males and four females), indicating that2024-12-20 09:39:43.862 | INFO | __main__:run_curl_version:68 -
Tokens generated: 100
2024-12-20 09:39:43.863 | INFO | __main__:run_curl_version:69 - Token processing time: 11.22 seconds
2024-12-20 09:39:43.863 | INFO | __main__:run_curl_version:70 - Total execution time: 11.22 seconds
2024-12-20 09:39:43.864 | INFO | __main__:<module>:162 -
==================================================
2024-12-20 09:39:43.865 | INFO | __main__:run_requests_version:74 -
=== Running requests version ===
2024-12-20 09:39:43.873 | INFO | __main__:run_requests_version:96 - Initial request time: 0.01 seconds
2024-12-20 09:39:43.873 | INFO | __main__:run_requests_version:100 - Response tokens:
To determine how many rugby players are on a touch rugby team, we can refer to the rules provided in the document.
1. A team consists of a maximum of 14 players.
2. Each team has a maximum of 6 players on the field at any time.
3. If fewer than 4 players are on the field due to sin bins, dismissals, or injuries, the team will be declared to have a player less.
4. No more than 32024-12-20 09:40:03.338 | INFO | __main__:run_requests_version:118 -
Tokens generated: 100
2024-12-20 09:40:03.339 | INFO | __main__:run_requests_version:119 - Token processing time: 19.47 seconds
2024-12-20 09:40:03.339 | INFO | __main__:run_requests_version:120 - Total execution time: 19.47 seconds
2024-12-20 09:40:03.339 | INFO | __main__:<module>:164 -
==================================================
2024-12-20 09:40:03.339 | INFO | __main__:run_openai_version:124 -
=== Running OpenAI client version ===
2024-12-20 09:40:03.417 | INFO | __main__:run_openai_version:134 - Response tokens:
To determine how many rugby players are on a touch rugby team, we can refer to the relevant section of the document.
1. The document states that a team consists of a maximum of 14 players.
2. It also mentions that teams are formed by mixing community and competition players, suggesting that the number of players per team is consistent.
3. Therefore, the total number of players on a touch rugby team is 14.
So, the final answer is: 142024-12-20 09:40:31.319 | INFO | __main__:run_openai_version:152 -
Tokens generated: 98
2024-12-20 09:40:31.320 | INFO | __main__:run_openai_version:153 - Total execution time: 27.98 seconds
2024-12-20 09:40:31.320 | INFO | __main__:<module>:176 -
=== Performance Comparison ===
+----------+------------+--------------------+
| Method | Time (s) | Diff vs Curl (s) |
+==========+============+====================+
| Curl | 11.22 | 0 |
+----------+------------+--------------------+
| Requests | 19.47 | 8.25 |
+----------+------------+--------------------+
| OpenAI | 27.98 | 16.76 |
+----------+------------+--------------------+
import json
import time
import requests
import subprocess
from openai import OpenAI
from loguru import logger
def run_curl_version():
logger.info("\n=== Running curl version ===")
start_time = time.time()
curl_command = """
curl http://127.0.0.1:30002/v1/chat/completions \
-X POST \
-H "Content-Type: application/json" \
-d '{
"model": "Trelis/Meta-Llama-3.1-8B-Instruct-touch-rugby-2-adapters",
"messages": [
{
"role": "system",
"content": "You are a friendly chatbot"
},
{
"role": "user",
"content": "How many rugby players are on a touch rugby team?"
}
],
"max_tokens": 100,
"stream": true
}'
"""
request_start = time.time()
process = subprocess.Popen(
curl_command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
request_time = time.time() - request_start
logger.info(f"Initial request time: {request_time:.2f} seconds")
tokens_start = time.time()
token_count = 0
logger.info("Response tokens:")
while True:
line = process.stdout.readline()
if not line:
break
if line.startswith('data: '):
line = line[6:]
if line.strip() == '[DONE]':
continue
try:
chunk = json.loads(line)
if chunk['choices'][0]['delta'].get('content'):
token_count += 1
print(chunk['choices'][0]['delta']['content'], end='', flush=True)
except json.JSONDecodeError:
continue
process.wait()
tokens_time = time.time() - tokens_start
total_time = time.time() - start_time
logger.info(f"\nTokens generated: {token_count}")
logger.info(f"Token processing time: {tokens_time:.2f} seconds")
logger.info(f"Total execution time: {total_time:.2f} seconds")
return total_time
def run_requests_version():
logger.info("\n=== Running requests version ===")
start_time = time.time()
request_start = time.time()
response = requests.post(
"http://127.0.0.1:30002/v1/chat/completions",
headers={"Content-Type": "application/json"},
json={
"model": "Trelis/Meta-Llama-3.1-8B-Instruct-touch-rugby-2-adapters",
"messages": [
{
"role": "system",
"content": "You are a friendly chatbot",
},
{"role": "user", "content": "How many rugby players are on a touch rugby team?"},
],
"max_tokens": 100,
"stream": True
},
stream=True
)
request_time = time.time() - request_start
logger.info(f"Initial request time: {request_time:.2f} seconds")
tokens_start = time.time()
token_count = 0
logger.info("Response tokens:")
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
line = line[6:]
if line.strip() == '[DONE]':
continue
try:
chunk = json.loads(line)
if chunk['choices'][0]['delta'].get('content'):
token_count += 1
print(chunk['choices'][0]['delta']['content'], end='', flush=True)
except json.JSONDecodeError:
continue
tokens_time = time.time() - tokens_start
total_time = time.time() - start_time
logger.info(f"\nTokens generated: {token_count}")
logger.info(f"Token processing time: {tokens_time:.2f} seconds")
logger.info(f"Total execution time: {total_time:.2f} seconds")
return total_time
def run_openai_version():
logger.info("\n=== Running OpenAI client version ===")
start_time = time.time()
client = OpenAI(
api_key="EMPTY",
base_url="http://127.0.0.1:30002/v1",
)
client_start = time.time()
token_count = 0
logger.info("Response tokens:")
for chunk in client.chat.completions.create(
model="Trelis/Meta-Llama-3.1-8B-Instruct-touch-rugby-2-adapters",
messages=[
{
"role": "system",
"content": "You are a friendly chatbot",
},
{"role": "user", "content": "How many rugby players are on a touch rugby team?"},
],
max_tokens=100,
stream=True
):
if chunk.choices[0].delta.content is not None:
token_count += 1
print(chunk.choices[0].delta.content, end="", flush=True)
total_time = time.time() - start_time
logger.info(f"\nTokens generated: {token_count}")
logger.info(f"Total execution time: {total_time:.2f} seconds")
return total_time
if __name__ == "__main__":
from tabulate import tabulate
# Run all three versions
curl_time = run_curl_version()
logger.info("\n" + "="*50)
requests_time = run_requests_version()
logger.info("\n" + "="*50)
openai_time = run_openai_version()
# Prepare table data
table_data = [
["Method", "Time (s)", "Diff vs Curl (s)"],
["Curl", f"{curl_time:.2f}", "0.00"],
["Requests", f"{requests_time:.2f}", f"{requests_time - curl_time:.2f}"],
["OpenAI", f"{openai_time:.2f}", f"{openai_time - curl_time:.2f}"]
]
# Print comparison table
logger.info("\n=== Performance Comparison ===")
print("\n" + tabulate(table_data, headers="firstrow", tablefmt="grid"))
=== Performance Comparison ===
+----------+------------+--------------------+
| Method | Time (s) | Diff vs Curl (s) |
+==========+============+====================+
| Curl | 11.22 | 0 |
+----------+------------+--------------------+
| Requests | 19.47 | 8.25 |
+----------+------------+--------------------+
| OpenAI | 27.98 | 16.76 |
+----------+------------+--------------------+
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment