Skip to content

Instantly share code, notes, and snippets.

@mzhang77
Last active June 6, 2025 03:26
Show Gist options
  • Save mzhang77/73a8f9f2304b43d991f222699c203916 to your computer and use it in GitHub Desktop.
Save mzhang77/73a8f9f2304b43d991f222699c203916 to your computer and use it in GitHub Desktop.
import mysql.connector
import json
import os
import random
import string
from datetime import datetime, timedelta
# Config — replace with your actual TiDB credentials
DB_CONFIG = {
'host': '127.0.0.1',
'port': 4000,
'user': 'root',
'password': '',
'database': 'test'
}
# Constants
TARGET_SIZE_BYTES = 5000 * 1024 * 1024
BATCH_SIZE = 100
PAYLOAD_MIN_KB = 5
PAYLOAD_MAX_KB = 10
def random_hex(length):
return ''.join(random.choices('0123456789abcdef', k=length))
def random_string(length):
return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
def generate_payload():
size_kb = random.randint(PAYLOAD_MIN_KB, PAYLOAD_MAX_KB)
dummy = random_string(size_kb * 1024)
return json.dumps({"data": dummy})
def generate_row(ts_base, i):
timestamp = ts_base + timedelta(seconds=i)
institution_id = f"INST{random.randint(1, 100):03d}"
event_id = random_hex(32)
event_type = random.choice(['login', 'logout', 'purchase', 'update'])
payload = generate_payload()
created_at = timestamp + timedelta(seconds=1)
return (timestamp.strftime('%Y-%m-%d %H:%M:%S'), institution_id, bytes.fromhex(event_id), event_type, payload, created_at.strftime('%Y-%m-%d %H:%M:%S')), len(payload)
def main():
conn = mysql.connector.connect(**DB_CONFIG)
cursor = conn.cursor()
total_bytes = 0
ts_base = datetime(2025, 6, 5, 10, 0, 0)
i = 0
insert_sql = """
INSERT INTO events (`timestamp`, `institution_id`, `event_id`, `event_type`, `payload`, `created_at`)
VALUES (%s, %s, %s, %s, %s, %s)
"""
while total_bytes < TARGET_SIZE_BYTES:
batch = []
for _ in range(BATCH_SIZE):
row, size = generate_row(ts_base, i)
batch.append(row)
total_bytes += size
i += 1
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"Inserted {len(batch)} rows. Total size: {total_bytes / (1024 * 1024):.2f} MiB")
cursor.close()
conn.close()
print("✅ Insertion complete.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment