cianclarke · September 4, 2025 21:27
diff --git a/README.md b/README.md
diff --git a/benchmark.js b/benchmark.js
 import OpenAI from 'openai';
 import { ChatOpenAI } from '@langchain/openai';
 import { ConversationChain } from 'langchain/chains';
 import { BufferMemory } from 'langchain/memory';
 import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system';
 import { HumanMessage, AIMessage } from '@langchain/core/messages';

 const prompt = "What is 2+2? Answer in one word.";

 async function benchmarkDirectAPI() {
  const openai = new OpenAI({
      apiKey: process.env.OPENAI_API_KEY,
  });
  console.log('🚀 Benchmarking Direct OpenAI API...');
  const times = [];
  
  for (let i = 0; i < 50; i++) {
    const start = Date.now();
    await openai.chat.completions.create({
      model: 'gpt-4o-mini',
      messages: [{ role: 'user', content: prompt }],
      max_tokens: 10
    });
    const end = Date.now();
    times.push(end - start);
    console.log(`  Call ${i + 1}: ${end - start}ms`);
  }
  
  return times;
 }

 async function benchmarkLangChainWithMemory() {
  const llm = new ChatOpenAI({
    modelName: 'gpt-4o-mini',
    openAIApiKey: process.env.OPENAI_API_KEY,
  });
  
  // Create file-based memory for persistence
  const memory = new BufferMemory({
    chatHistory: new FileSystemChatMessageHistory('./conversation_history.json'),
    returnMessages: true,
    memoryKey: 'chat_history'
  });
  
  console.log('\n🔗 Benchmarking LangChain JS with Memory...');
  const times = [];
  
  for (let i = 0; i < 50; i++) {
    const start = Date.now();
    
    // Get conversation history from memory
    const chatHistory = await memory.chatHistory.getMessages();
    
    // Create messages array with history + new prompt
    const messages = [
      ...chatHistory,
      new HumanMessage(prompt)
    ];
    
    // Call LLM with conversation history
    const response = await llm.invoke(messages);
    
    // Save the response to memory
    await memory.chatHistory.addMessage(new HumanMessage(prompt));
    await memory.chatHistory.addMessage(response);
    
    const end = Date.now();
    times.push(end - start);
    console.log(`  Call ${i + 1}: ${end - start}ms`);
  }
  
  return times;
 }

 function calculateStats(times) {
  const avg = times.reduce((a, b) => a + b, 0) / times.length;
  const min = Math.min(...times);
  const max = Math.max(...times);
  return { avg: Math.round(avg), min, max };
 }

 async function main() {
  if (!process.env.OPENAI_API_KEY) {
    console.error('❌ Please set OPENAI_API_KEY environment variable');
    process.exit(1);
  }

  console.log('📊 GPT-4o-mini Latency Benchmark\n');
  
  const directTimes = await benchmarkDirectAPI();
  const langchainMemoryTimes = await benchmarkLangChainWithMemory();
  
  const directStats = calculateStats(directTimes);
  const langchainMemoryStats = calculateStats(langchainMemoryTimes);
  
  console.log('\n📈 Results:');
  console.log(`Direct API        - Avg: ${directStats.avg}ms, Min: ${directStats.min}ms, Max: ${directStats.max}ms`);
  console.log(`LangChain+Memory  - Avg: ${langchainMemoryStats.avg}ms, Min: ${langchainMemoryStats.min}ms, Max: ${langchainMemoryStats.max}ms`);
  console.log(`\nOverhead:`);
  console.log(`LangChain+Memory vs Direct: ${langchainMemoryStats.avg - directStats.avg}ms (LangChain + Memory overhead)`);
 }

 main().catch(console.error);
diff --git a/benchmark.py b/benchmark.py
 import os
 import time
 from openai import OpenAI
 from langchain_openai import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.schema import HumanMessage, AIMessage

 prompt = "What is 2+2? Answer in one word."

 def benchmark_direct_api():
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
    print('🚀 Benchmarking Direct OpenAI API...')
    times = []
    
    for i in range(50):
        start = time.time()
        response = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[{'role': 'user', 'content': prompt}],
            max_tokens=10
        )
        end = time.time()
        latency_ms = int((end - start) * 1000)
        times.append(latency_ms)
        print(f'  Call {i + 1}: {latency_ms}ms')
    
    return times

 def benchmark_langchain_with_memory():
    llm = ChatOpenAI(
        model_name='gpt-4o-mini',
        openai_api_key=os.getenv('OPENAI_API_KEY')
    )
    
    # Create memory for conversation history
    memory = ConversationBufferMemory(return_messages=True)
    
    print('\n🔗 Benchmarking LangChain Python with Memory...')
    times = []
    
    for i in range(50):
        start = time.time()
        
        # Get conversation history from memory
        chat_history = memory.chat_memory.messages
        
        # Create messages array with history + new prompt
        messages = chat_history + [HumanMessage(content=prompt)]
        
        # Call LLM with conversation history
        response = llm.invoke(messages)
        
        # Save the conversation to memory
        memory.chat_memory.add_user_message(prompt)
        memory.chat_memory.add_ai_message(response.content)
        
        end = time.time()
        latency_ms = int((end - start) * 1000)
        times.append(latency_ms)
        print(f'  Call {i + 1}: {latency_ms}ms')
    
    return times

 def calculate_stats(times):
    avg = sum(times) / len(times)
    min_time = min(times)
    max_time = max(times)
    return {'avg': round(avg), 'min': min_time, 'max': max_time}

 def main():
    if not os.getenv('OPENAI_API_KEY'):
        print('❌ Please set OPENAI_API_KEY environment variable')
        return
    
    print('📊 GPT-4o-mini Latency Benchmark\n')
    
    direct_times = benchmark_direct_api()
    langchain_memory_times = benchmark_langchain_with_memory()
    
    direct_stats = calculate_stats(direct_times)
    langchain_memory_stats = calculate_stats(langchain_memory_times)
    
    print('\n📈 Results:')
    print(f'Direct API        - Avg: {direct_stats["avg"]}ms, Min: {direct_stats["min"]}ms, Max: {direct_stats["max"]}ms')
    print(f'LangChain+Memory  - Avg: {langchain_memory_stats["avg"]}ms, Min: {langchain_memory_stats["min"]}ms, Max: {langchain_memory_stats["max"]}ms')
    print(f'\nOverhead:')
    print(f'LangChain+Memory vs Direct: {langchain_memory_stats["avg"] - direct_stats["avg"]}ms (LangChain + Memory overhead)')

 if __name__ == '__main__':
    main()
diff --git a/package.json b/package.json
 {
  "name": "langbench",
  "version": "1.0.0",
  "description": "Benchmark OpenAI GPT-5-mini vs LangChain JS latency",
  "main": "benchmark.js",
  "type": "module",
  "scripts": {
    "start": "node benchmark.js"
  },
  "dependencies": {
    "openai": "^4.0.0",
    "langchain": "^0.3.0",
    "@langchain/openai": "^0.3.0",
    "@langchain/community": "^0.3.0"
  }
 }
diff --git a/requirements.txt b/requirements.txt
 openai>=1.12.0
 langchain>=0.3.0,<0.4.0
 langchain-openai>=0.3.0,<0.4.0
	import OpenAI from 'openai';
	import { ChatOpenAI } from '@langchain/openai';
	import { ConversationChain } from 'langchain/chains';
	import { BufferMemory } from 'langchain/memory';
	import { FileSystemChatMessageHistory } from '@langchain/community/stores/message/file_system';
	import { HumanMessage, AIMessage } from '@langchain/core/messages';

	const prompt = "What is 2+2? Answer in one word.";

	async function benchmarkDirectAPI() {
	const openai = new OpenAI({
	apiKey: process.env.OPENAI_API_KEY,
	});
	console.log('🚀 Benchmarking Direct OpenAI API...');
	const times = [];

	for (let i = 0; i < 50; i++) {
	const start = Date.now();
	await openai.chat.completions.create({
	model: 'gpt-4o-mini',
	messages: [{ role: 'user', content: prompt }],
	max_tokens: 10
	});
	const end = Date.now();
	times.push(end - start);
	console.log(` Call ${i + 1}: ${end - start}ms`);
	}

	return times;
	}

	async function benchmarkLangChainWithMemory() {
	const llm = new ChatOpenAI({
	modelName: 'gpt-4o-mini',
	openAIApiKey: process.env.OPENAI_API_KEY,
	});

	// Create file-based memory for persistence
	const memory = new BufferMemory({
	chatHistory: new FileSystemChatMessageHistory('./conversation_history.json'),
	returnMessages: true,
	memoryKey: 'chat_history'
	});

	console.log('\n🔗 Benchmarking LangChain JS with Memory...');
	const times = [];

	for (let i = 0; i < 50; i++) {
	const start = Date.now();

	// Get conversation history from memory
	const chatHistory = await memory.chatHistory.getMessages();

	// Create messages array with history + new prompt
	const messages = [
	...chatHistory,
	new HumanMessage(prompt)
	];

	// Call LLM with conversation history
	const response = await llm.invoke(messages);

	// Save the response to memory
	await memory.chatHistory.addMessage(new HumanMessage(prompt));
	await memory.chatHistory.addMessage(response);

	const end = Date.now();
	times.push(end - start);
	console.log(` Call ${i + 1}: ${end - start}ms`);
	}

	return times;
	}

	function calculateStats(times) {
	const avg = times.reduce((a, b) => a + b, 0) / times.length;
	const min = Math.min(...times);
	const max = Math.max(...times);
	return { avg: Math.round(avg), min, max };
	}

	async function main() {
	if (!process.env.OPENAI_API_KEY) {
	console.error('❌ Please set OPENAI_API_KEY environment variable');
	process.exit(1);
	}

	console.log('📊 GPT-4o-mini Latency Benchmark\n');

	const directTimes = await benchmarkDirectAPI();
	const langchainMemoryTimes = await benchmarkLangChainWithMemory();

	const directStats = calculateStats(directTimes);
	const langchainMemoryStats = calculateStats(langchainMemoryTimes);

	console.log('\n📈 Results:');
	console.log(`Direct API - Avg: ${directStats.avg}ms, Min: ${directStats.min}ms, Max: ${directStats.max}ms`);
	console.log(`LangChain+Memory - Avg: ${langchainMemoryStats.avg}ms, Min: ${langchainMemoryStats.min}ms, Max: ${langchainMemoryStats.max}ms`);
	console.log(`\nOverhead:`);
	console.log(`LangChain+Memory vs Direct: ${langchainMemoryStats.avg - directStats.avg}ms (LangChain + Memory overhead)`);
	}

	main().catch(console.error);
	import os
	import time
	from openai import OpenAI
	from langchain_openai import ChatOpenAI
	from langchain.memory import ConversationBufferMemory
	from langchain.schema import HumanMessage, AIMessage

	prompt = "What is 2+2? Answer in one word."

	def benchmark_direct_api():
	client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
	print('🚀 Benchmarking Direct OpenAI API...')
	times = []

	for i in range(50):
	start = time.time()
	response = client.chat.completions.create(
	model='gpt-4o-mini',
	messages=[{'role': 'user', 'content': prompt}],
	max_tokens=10
	)
	end = time.time()
	latency_ms = int((end - start) * 1000)
	times.append(latency_ms)
	print(f' Call {i + 1}: {latency_ms}ms')

	return times

	def benchmark_langchain_with_memory():
	llm = ChatOpenAI(
	model_name='gpt-4o-mini',
	openai_api_key=os.getenv('OPENAI_API_KEY')
	)

	# Create memory for conversation history
	memory = ConversationBufferMemory(return_messages=True)

	print('\n🔗 Benchmarking LangChain Python with Memory...')
	times = []

	for i in range(50):
	start = time.time()

	# Get conversation history from memory
	chat_history = memory.chat_memory.messages

	# Create messages array with history + new prompt
	messages = chat_history + [HumanMessage(content=prompt)]

	# Call LLM with conversation history
	response = llm.invoke(messages)

	# Save the conversation to memory
	memory.chat_memory.add_user_message(prompt)
	memory.chat_memory.add_ai_message(response.content)

	end = time.time()
	latency_ms = int((end - start) * 1000)
	times.append(latency_ms)
	print(f' Call {i + 1}: {latency_ms}ms')

	return times

	def calculate_stats(times):
	avg = sum(times) / len(times)
	min_time = min(times)
	max_time = max(times)
	return {'avg': round(avg), 'min': min_time, 'max': max_time}

	def main():
	if not os.getenv('OPENAI_API_KEY'):
	print('❌ Please set OPENAI_API_KEY environment variable')
	return

	print('📊 GPT-4o-mini Latency Benchmark\n')

	direct_times = benchmark_direct_api()
	langchain_memory_times = benchmark_langchain_with_memory()

	direct_stats = calculate_stats(direct_times)
	langchain_memory_stats = calculate_stats(langchain_memory_times)

	print('\n📈 Results:')
	print(f'Direct API - Avg: {direct_stats["avg"]}ms, Min: {direct_stats["min"]}ms, Max: {direct_stats["max"]}ms')
	print(f'LangChain+Memory - Avg: {langchain_memory_stats["avg"]}ms, Min: {langchain_memory_stats["min"]}ms, Max: {langchain_memory_stats["max"]}ms')
	print(f'\nOverhead:')
	print(f'LangChain+Memory vs Direct: {langchain_memory_stats["avg"] - direct_stats["avg"]}ms (LangChain + Memory overhead)')

	if __name__ == '__main__':
	main()
	{
	"name": "langbench",
	"version": "1.0.0",
	"description": "Benchmark OpenAI GPT-5-mini vs LangChain JS latency",
	"main": "benchmark.js",
	"type": "module",
	"scripts": {
	"start": "node benchmark.js"
	},
	"dependencies": {
	"openai": "^4.0.0",
	"langchain": "^0.3.0",
	"@langchain/openai": "^0.3.0",
	"@langchain/community": "^0.3.0"
	}
	}
	openai>=1.12.0
	langchain>=0.3.0,<0.4.0
	langchain-openai>=0.3.0,<0.4.0