Skip to content

Instantly share code, notes, and snippets.

@AdheipSingh
Last active February 16, 2025 21:16
Show Gist options
  • Save AdheipSingh/db17c6c9affef7556a56523e780459c6 to your computer and use it in GitHub Desktop.
Save AdheipSingh/db17c6c9affef7556a56523e780459c6 to your computer and use it in GitHub Desktop.
Write dummy to data postgres
#!/bin/bash
# Configuration
NAMESPACE=${1:-"default"}
RELEASE_NAME=${2:-"postgres"}
DB_NAME=${3:-"postgres"}
DB_USER=${4:-"postgres"}
NUM_USERS=${5:-1000}
NUM_ORDERS=${6:-5000}
NUM_PRODUCTS=${7:-100}
# Get the pod name
POD_NAME=$(kubectl get pods -n $NAMESPACE -l app.kubernetes.io/instance=$RELEASE_NAME,app.kubernetes.io/name=postgresql -o jsonpath="{.items[0].metadata.name}")
if [ -z "$POD_NAME" ]; then
echo "Error: PostgreSQL pod not found!"
exit 1
fi
echo "Found PostgreSQL pod: $POD_NAME"
# Get the PostgreSQL password from secret
DB_PASSWORD=$(kubectl get secret -n $NAMESPACE $RELEASE_NAME-postgresql -o jsonpath="{.data.postgres-password}" | base64 --decode)
if [ -z "$DB_PASSWORD" ]; then
echo "Error: Could not get PostgreSQL password from secret!"
exit 1
fi
# Function to execute SQL commands inside the pod
execute_sql() {
kubectl exec -n $NAMESPACE $POD_NAME -- bash -c "PGPASSWORD='$DB_PASSWORD' psql -U $DB_USER -d $DB_NAME -c \"$1\""
}
echo "Creating tables..."
# Create tables
execute_sql "
CREATE TABLE IF NOT EXISTS users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL,
email VARCHAR(100) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);"
execute_sql "
CREATE TABLE IF NOT EXISTS products (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
price DECIMAL(10,2) NOT NULL,
stock INT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);"
execute_sql "
CREATE TABLE IF NOT EXISTS orders (
id SERIAL PRIMARY KEY,
user_id INT REFERENCES users(id),
product_id INT REFERENCES products(id),
quantity INT NOT NULL,
total_amount DECIMAL(10,2) NOT NULL,
status VARCHAR(20) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);"
echo "Generating sample data..."
# Generate sample data using a single transaction
BATCH_SIZE=100
# Generate users in batches
echo "Generating users..."
for ((i=0; i<NUM_USERS; i+=BATCH_SIZE)); do
VALUES=""
for ((j=0; j<BATCH_SIZE && (i+j)<NUM_USERS; j++)); do
if [ ! -z "$VALUES" ]; then
VALUES="$VALUES,"
fi
USERNAME="user$((i+j+1))"
EMAIL="user$((i+j+1))@example.com"
VALUES="$VALUES ('$USERNAME', '$EMAIL')"
done
execute_sql "INSERT INTO users (username, email) VALUES $VALUES;"
echo "Generated $((i+BATCH_SIZE)) users..."
done
# Generate products in batches
echo "Generating products..."
for ((i=0; i<NUM_PRODUCTS; i+=BATCH_SIZE)); do
VALUES=""
for ((j=0; j<BATCH_SIZE && (i+j)<NUM_PRODUCTS; j++)); do
if [ ! -z "$VALUES" ]; then
VALUES="$VALUES,"
fi
NAME="Product$((i+j+1))"
PRICE=$(( RANDOM % 1000 + 1 ))
STOCK=$(( RANDOM % 1000 + 1 ))
VALUES="$VALUES ('$NAME', $PRICE, $STOCK)"
done
execute_sql "INSERT INTO products (name, price, stock) VALUES $VALUES;"
echo "Generated $((i+BATCH_SIZE)) products..."
done
# Generate orders in batches
echo "Generating orders..."
for ((i=0; i<NUM_ORDERS; i+=BATCH_SIZE)); do
VALUES=""
for ((j=0; j<BATCH_SIZE && (i+j)<NUM_ORDERS; j++)); do
if [ ! -z "$VALUES" ]; then
VALUES="$VALUES,"
fi
USER_ID=$(( RANDOM % NUM_USERS + 1 ))
PRODUCT_ID=$(( RANDOM % NUM_PRODUCTS + 1 ))
QUANTITY=$(( RANDOM % 5 + 1 ))
STATUS_ARRAY=("pending" "processing" "completed" "cancelled")
STATUS=${STATUS_ARRAY[RANDOM % 4]}
PRICE=$(( RANDOM % 1000 + 1 ))
TOTAL=$((PRICE * QUANTITY))
VALUES="$VALUES ($USER_ID, $PRODUCT_ID, $QUANTITY, $TOTAL, '$STATUS')"
done
execute_sql "INSERT INTO orders (user_id, product_id, quantity, total_amount, status) VALUES $VALUES;"
echo "Generated $((i+BATCH_SIZE)) orders..."
done
echo "Creating indexes for better performance..."
# Create indexes for better query performance
execute_sql "
CREATE INDEX IF NOT EXISTS idx_users_username ON users(username);
CREATE INDEX IF NOT EXISTS idx_products_name ON products(name);
CREATE INDEX IF NOT EXISTS idx_orders_user_id ON orders(user_id);
CREATE INDEX IF NOT EXISTS idx_orders_product_id ON orders(product_id);
CREATE INDEX IF NOT EXISTS idx_orders_status ON orders(status);
CREATE INDEX IF NOT EXISTS idx_orders_created_at ON orders(created_at);"
echo "Generating some queries to test the data..."
# Run some test queries
echo "Top 5 products by order quantity:"
execute_sql "
SELECT p.name, COUNT(*) as order_count, SUM(o.quantity) as total_quantity
FROM products p
JOIN orders o ON p.id = o.product_id
GROUP BY p.name
ORDER BY total_quantity DESC
LIMIT 5;"
echo "Order statistics by status:"
execute_sql "
SELECT status, COUNT(*) as count,
SUM(total_amount) as total_amount,
AVG(total_amount) as avg_amount
FROM orders
GROUP BY status;"
echo "Data generation completed!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment