Created
September 2, 2025 17:52
-
-
Save rochacbruno/537ca8e166fd892dbd2e4a36943cb7e5 to your computer and use it in GitHub Desktop.
Generate Random Huge 2GB Sample Log for Nginx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import asyncio | |
import random | |
import datetime | |
import time | |
import os | |
from concurrent.futures import ThreadPoolExecutor | |
# Configuration | |
BATCH_SIZE = 10000 | |
CONCURRENT_BATCHES = 10 | |
WRITE_BUFFER_SIZE = 50 * 1024 * 1024 # 50MB | |
TARGET_SIZE_GB = 2 | |
NUM_THREADS = 8 | |
# Pre-generate data pools | |
IPS = [ | |
f"{random.randint(1, 255)}.{random.randint(0, 255)}." | |
f"{random.randint(0, 255)}.{random.randint(1, 255)}" | |
for _ in range(1000) | |
] | |
METHODS = ["GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS"] | |
PATHS = [ | |
"/", | |
"/api/users", | |
"/api/products", | |
"/api/orders", | |
"/api/auth", | |
"/login", | |
"/logout", | |
"/dashboard", | |
"/profile", | |
"/settings", | |
"/static/css/main.css", | |
"/static/js/app.js", | |
"/static/js/vendor.js", | |
"/images/logo.png", | |
"/images/banner.jpg", | |
"/favicon.ico", | |
"/api/v1/data", | |
"/api/v2/users", | |
"/health", | |
"/metrics", | |
"/admin/dashboard", | |
"/admin/users", | |
"/admin/settings", | |
"/products/1234", | |
"/products/5678", | |
"/cart", | |
"/checkout", | |
] | |
STATUS_CODES = [ | |
200, | |
201, | |
204, | |
301, | |
302, | |
304, | |
400, | |
401, | |
403, | |
404, | |
500, | |
502, | |
503, | |
] | |
USER_AGENTS = [ | |
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0.4472.124", | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Safari/605.1.15", | |
"curl/7.68.0", | |
"Python/3.9 aiohttp/3.7.4", | |
"PostmanRuntime/7.28.4", | |
] | |
REFERERS = [ | |
"-", | |
"https://example.com", | |
"https://google.com", | |
"https://github.com", | |
] | |
def generate_log_lines(count: int) -> str: | |
"""Generate multiple log lines as a single string.""" | |
lines = [] | |
for _ in range(count): | |
ip = random.choice(IPS) | |
timestamp = datetime.datetime.now() - datetime.timedelta( | |
days=random.randint(0, 30), | |
hours=random.randint(0, 23), | |
minutes=random.randint(0, 59), | |
) | |
timestamp_str = timestamp.strftime("%d/%b/%Y:%H:%M:%S +0000") | |
method = random.choice(METHODS) | |
path = random.choice(PATHS) | |
status = random.choice(STATUS_CODES) | |
size = random.randint(100, 50000) | |
referer = random.choice(REFERERS) | |
user_agent = random.choice(USER_AGENTS) | |
response_time = round(random.uniform(0.001, 5.0), 3) | |
lines.append( | |
f'{ip} - - [{timestamp_str}] "{method} {path} HTTP/1.1" {status} ' | |
f'{size} "{referer}" "{user_agent}" {response_time}\n' | |
) | |
return "".join(lines) | |
async def generate_batch(executor: ThreadPoolExecutor, batch_size: int) -> str: | |
"""Generate a batch of log lines using thread pool.""" | |
loop = asyncio.get_event_loop() | |
return await loop.run_in_executor(executor, generate_log_lines, batch_size) | |
async def write_to_file(file_handle, data: str, stats: dict): | |
"""Write data to file using thread pool.""" | |
loop = asyncio.get_event_loop() | |
await loop.run_in_executor(None, file_handle.write, data) | |
stats["bytes_written"] += len(data.encode("utf-8")) | |
stats["lines_written"] += data.count("\n") | |
async def generate_nginx_log( | |
filename: str = "nginx_sample.log", target_size_gb: float = 2 | |
): | |
"""Main async function to generate the log file.""" | |
target_size = int(target_size_gb * 1024 * 1024 * 1024) | |
stats = {"bytes_written": 0, "lines_written": 0} | |
print(f"Generating {target_size_gb}GB nginx log file...") | |
print( | |
f"Using {CONCURRENT_BATCHES} concur generators, {NUM_THREADS} threads" | |
) | |
start_time = time.time() | |
last_report_time = start_time | |
last_bytes = 0 | |
with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: | |
with open(filename, "w", buffering=WRITE_BUFFER_SIZE) as f: | |
pending_tasks = [] | |
while stats["bytes_written"] < target_size: | |
# Keep queue filled with concurrent batch generations | |
while ( | |
len(pending_tasks) < CONCURRENT_BATCHES | |
and stats["bytes_written"] < target_size | |
): | |
task = asyncio.create_task( | |
generate_batch(executor, BATCH_SIZE) | |
) | |
pending_tasks.append(task) | |
# Wait for at least one batch to complete | |
done, pending = await asyncio.wait( | |
pending_tasks, return_when=asyncio.FIRST_COMPLETED | |
) | |
pending_tasks = list(pending) | |
# Write completed batches | |
for task in done: | |
batch_data = await task | |
await write_to_file(f, batch_data, stats) | |
# Progress reporting | |
current_time = time.time() | |
if current_time - last_report_time >= 0.5: | |
elapsed = current_time - start_time | |
speed = (stats["bytes_written"] - last_bytes) / ( | |
(current_time - last_report_time) * 1024 * 1024 | |
) | |
avg_speed = stats["bytes_written"] / ( | |
elapsed * 1024 * 1024 | |
) | |
progress = (stats["bytes_written"] / target_size) * 100 | |
gb_written = stats["bytes_written"] / (1024**3) | |
print( | |
f"\rProgress: {progress:.1f}% ({gb_written:.2f}GB) | " | |
f"Spd: {speed:.0f} MB/s | Avg: {avg_speed:.0f} MB/s | " | |
f"Lines: {stats['lines_written']:,}", | |
end="", | |
flush=True, | |
) | |
last_report_time = current_time | |
last_bytes = stats["bytes_written"] | |
# Process any remaining tasks | |
if pending_tasks: | |
remaining = await asyncio.gather(*pending_tasks) | |
for batch_data in remaining: | |
if stats["bytes_written"] < target_size: | |
await write_to_file(f, batch_data, stats) | |
elapsed = time.time() - start_time | |
final_size = os.path.getsize(filename) | |
print(f"\n✓ Completed in {elapsed:.2f} seconds") | |
print(f"Final file size: {final_size / (1024**3):.2f}GB") | |
print(f"Average speed: {(final_size / (1024**2)) / elapsed:.0f} MB/s") | |
print(f"Total lines: {stats['lines_written']:,}") | |
async def main(): | |
await generate_nginx_log("nginx_sample.log", 2) | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment