Skip to content

Instantly share code, notes, and snippets.

@cianclarke
Last active September 4, 2025 21:27
Show Gist options
  • Save cianclarke/005ff41ad1891d2995d63a7a058adeba to your computer and use it in GitHub Desktop.
Save cianclarke/005ff41ad1891d2995d63a7a058adeba to your computer and use it in GitHub Desktop.
Calling bullshit

Results:

Python Results (averaged over 50 calls):

Direct API - Avg: 692ms, Min: 377ms, Max: 1796ms.
LangChain+Memory - Avg: 735ms, Min: 412ms, Max: 1376ms.

Overhead:
LangChain+Memory vs Direct: 43ms (LangChain + Memory overhead).

Node.js Results (averaged over 50 calls):

Direct OpenAI API - Avg: 709ms, Min: 358ms, Max: 2548ms.
LangChain+Memory - Avg: 800ms, Min: 440ms, Max: 1883ms.

Overhead:
LangChain+Memory vs Direct: 91ms (LangChain + Memory overhead).

import OpenAI from 'openai';
import { ChatOpenAI } from '@langchain/openai';
import { ConversationChain } from 'langchain/chains';
import { BufferMemory } from 'langchain/memory';
import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system';
import { HumanMessage, AIMessage } from '@langchain/core/messages';
const prompt = "What is 2+2? Answer in one word.";
async function benchmarkDirectAPI() {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
console.log('πŸš€ Benchmarking Direct OpenAI API...');
const times = [];
for (let i = 0; i < 50; i++) {
const start = Date.now();
await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [{ role: 'user', content: prompt }],
max_tokens: 10
});
const end = Date.now();
times.push(end - start);
console.log(` Call ${i + 1}: ${end - start}ms`);
}
return times;
}
async function benchmarkLangChainWithMemory() {
const llm = new ChatOpenAI({
modelName: 'gpt-4o-mini',
openAIApiKey: process.env.OPENAI_API_KEY,
});
// Create file-based memory for persistence
const memory = new BufferMemory({
chatHistory: new FileSystemChatMessageHistory('./conversation_history.json'),
returnMessages: true,
memoryKey: 'chat_history'
});
console.log('\nπŸ”— Benchmarking LangChain JS with Memory...');
const times = [];
for (let i = 0; i < 50; i++) {
const start = Date.now();
// Get conversation history from memory
const chatHistory = await memory.chatHistory.getMessages();
// Create messages array with history + new prompt
const messages = [
...chatHistory,
new HumanMessage(prompt)
];
// Call LLM with conversation history
const response = await llm.invoke(messages);
// Save the response to memory
await memory.chatHistory.addMessage(new HumanMessage(prompt));
await memory.chatHistory.addMessage(response);
const end = Date.now();
times.push(end - start);
console.log(` Call ${i + 1}: ${end - start}ms`);
}
return times;
}
function calculateStats(times) {
const avg = times.reduce((a, b) => a + b, 0) / times.length;
const min = Math.min(...times);
const max = Math.max(...times);
return { avg: Math.round(avg), min, max };
}
async function main() {
if (!process.env.OPENAI_API_KEY) {
console.error('❌ Please set OPENAI_API_KEY environment variable');
process.exit(1);
}
console.log('πŸ“Š GPT-4o-mini Latency Benchmark\n');
const directTimes = await benchmarkDirectAPI();
const langchainMemoryTimes = await benchmarkLangChainWithMemory();
const directStats = calculateStats(directTimes);
const langchainMemoryStats = calculateStats(langchainMemoryTimes);
console.log('\nπŸ“ˆ Results:');
console.log(`Direct API - Avg: ${directStats.avg}ms, Min: ${directStats.min}ms, Max: ${directStats.max}ms`);
console.log(`LangChain+Memory - Avg: ${langchainMemoryStats.avg}ms, Min: ${langchainMemoryStats.min}ms, Max: ${langchainMemoryStats.max}ms`);
console.log(`\nOverhead:`);
console.log(`LangChain+Memory vs Direct: ${langchainMemoryStats.avg - directStats.avg}ms (LangChain + Memory overhead)`);
}
main().catch(console.error);
import os
import time
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.schema import HumanMessage, AIMessage
prompt = "What is 2+2? Answer in one word."
def benchmark_direct_api():
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
print('πŸš€ Benchmarking Direct OpenAI API...')
times = []
for i in range(50):
start = time.time()
response = client.chat.completions.create(
model='gpt-4o-mini',
messages=[{'role': 'user', 'content': prompt}],
max_tokens=10
)
end = time.time()
latency_ms = int((end - start) * 1000)
times.append(latency_ms)
print(f' Call {i + 1}: {latency_ms}ms')
return times
def benchmark_langchain_with_memory():
llm = ChatOpenAI(
model_name='gpt-4o-mini',
openai_api_key=os.getenv('OPENAI_API_KEY')
)
# Create memory for conversation history
memory = ConversationBufferMemory(return_messages=True)
print('\nπŸ”— Benchmarking LangChain Python with Memory...')
times = []
for i in range(50):
start = time.time()
# Get conversation history from memory
chat_history = memory.chat_memory.messages
# Create messages array with history + new prompt
messages = chat_history + [HumanMessage(content=prompt)]
# Call LLM with conversation history
response = llm.invoke(messages)
# Save the conversation to memory
memory.chat_memory.add_user_message(prompt)
memory.chat_memory.add_ai_message(response.content)
end = time.time()
latency_ms = int((end - start) * 1000)
times.append(latency_ms)
print(f' Call {i + 1}: {latency_ms}ms')
return times
def calculate_stats(times):
avg = sum(times) / len(times)
min_time = min(times)
max_time = max(times)
return {'avg': round(avg), 'min': min_time, 'max': max_time}
def main():
if not os.getenv('OPENAI_API_KEY'):
print('❌ Please set OPENAI_API_KEY environment variable')
return
print('πŸ“Š GPT-4o-mini Latency Benchmark\n')
direct_times = benchmark_direct_api()
langchain_memory_times = benchmark_langchain_with_memory()
direct_stats = calculate_stats(direct_times)
langchain_memory_stats = calculate_stats(langchain_memory_times)
print('\nπŸ“ˆ Results:')
print(f'Direct API - Avg: {direct_stats["avg"]}ms, Min: {direct_stats["min"]}ms, Max: {direct_stats["max"]}ms')
print(f'LangChain+Memory - Avg: {langchain_memory_stats["avg"]}ms, Min: {langchain_memory_stats["min"]}ms, Max: {langchain_memory_stats["max"]}ms')
print(f'\nOverhead:')
print(f'LangChain+Memory vs Direct: {langchain_memory_stats["avg"] - direct_stats["avg"]}ms (LangChain + Memory overhead)')
if __name__ == '__main__':
main()
{
"name": "langbench",
"version": "1.0.0",
"description": "Benchmark OpenAI GPT-5-mini vs LangChain JS latency",
"main": "benchmark.js",
"type": "module",
"scripts": {
"start": "node benchmark.js"
},
"dependencies": {
"openai": "^4.0.0",
"langchain": "^0.3.0",
"@langchain/openai": "^0.3.0",
"@langchain/community": "^0.3.0"
}
}
openai>=1.12.0
langchain>=0.3.0,<0.4.0
langchain-openai>=0.3.0,<0.4.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment