Skip to content

Instantly share code, notes, and snippets.

@shortjared
Created November 12, 2024 00:44
Show Gist options
  • Save shortjared/9d95586e0de1603407e91dac7b05e8e5 to your computer and use it in GitHub Desktop.
Save shortjared/9d95586e0de1603407e91dac7b05e8e5 to your computer and use it in GitHub Desktop.
Create Rebrickable Postgres Database
#!/bin/bash
BASE_URL="https://cdn.rebrickable.com/media/downloads"
TIMESTAMP=$(date +%s)
IMPORT_DIR="import"
DB_NAME="lego"
DB_USER="postgres"
DB_PASSWORD="postgres"
CONTAINER_NAME="lego-db"
# Create import directory if it doesn't exist
mkdir -p "${IMPORT_DIR}"
# Stop and remove any existing container with the same name
docker stop ${CONTAINER_NAME} 2>/dev/null || true
docker rm ${CONTAINER_NAME} 2>/dev/null || true
# Start PostgreSQL container
docker run --name ${CONTAINER_NAME} \
-e POSTGRES_DB=${DB_NAME} \
-e POSTGRES_USER=${DB_USER} \
-e POSTGRES_PASSWORD=${DB_PASSWORD} \
-p 5432:5432 \
-d postgres:17
# Wait for PostgreSQL to start
sleep 5
# Create database schema
docker cp schema.sql ${CONTAINER_NAME}:/schema.sql
docker exec -i ${CONTAINER_NAME} psql -U ${DB_USER} -d ${DB_NAME} -f /schema.sql
# Files in dependency order based on schema relationships
FILES=(
"part_categories.csv.gz"
"colors.csv.gz"
"parts.csv.gz"
"part_relationships.csv.gz"
"elements.csv.gz"
"themes.csv.gz"
"sets.csv.gz"
"minifigs.csv.gz"
"inventories.csv.gz"
"inventory_parts.csv.gz"
"inventory_sets.csv.gz"
"inventory_minifigs.csv.gz"
)
for file in "${FILES[@]}"; do
# Download and extract if needed
if [ ! -f "${IMPORT_DIR}/${file%.gz}" ]; then
wget "${BASE_URL}/${file}?${TIMESTAMP}" -O "${IMPORT_DIR}/${file}"
gunzip -c "${IMPORT_DIR}/${file}" > "${IMPORT_DIR}/${file%.gz}"
rm "${IMPORT_DIR}/${file}"
fi
# Import CSV into PostgreSQL
table_name="${file%.csv.gz}"
echo "Importing ${table_name}..."
docker cp "${IMPORT_DIR}/${file%.gz}" ${CONTAINER_NAME}:/tmp/${file%.gz}
docker exec -i ${CONTAINER_NAME} psql -U ${DB_USER} -d ${DB_NAME} -c "\COPY ${table_name} FROM '/tmp/${file%.gz}' CSV HEADER;"
done
# Create compressed database dump in custom format
echo "Creating compressed database dump..."
docker exec -i ${CONTAINER_NAME} pg_dump -U ${DB_USER} -Fc ${DB_NAME} > lego_db_dump.pgdump
echo "Database import and dump complete!"
-- Create part_categories table
CREATE TABLE part_categories (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL
);
-- Create colors table
CREATE TABLE colors (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
rgb TEXT NOT NULL,
is_trans BOOLEAN NOT NULL DEFAULT false
);
-- Create parts table
CREATE TABLE parts (
part_num TEXT PRIMARY KEY,
name TEXT NOT NULL,
part_cat_id INTEGER NOT NULL REFERENCES part_categories(id),
part_material TEXT
);
-- Create part_relationships table
CREATE TABLE part_relationships (
rel_type TEXT NOT NULL,
child_part_num TEXT NOT NULL REFERENCES parts(part_num),
parent_part_num TEXT NOT NULL REFERENCES parts(part_num),
PRIMARY KEY (rel_type,child_part_num, parent_part_num)
);
-- Create elements table
CREATE TABLE elements (
element_id TEXT PRIMARY KEY,
part_num TEXT NOT NULL REFERENCES parts(part_num),
color_id INTEGER NOT NULL REFERENCES colors(id),
design_id TEXT
);
-- Create themes table
CREATE TABLE themes (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
parent_id INTEGER REFERENCES themes(id)
);
-- Create sets table
CREATE TABLE sets (
set_num TEXT PRIMARY KEY,
name TEXT NOT NULL,
year INTEGER NOT NULL,
theme_id INTEGER NOT NULL REFERENCES themes(id),
num_parts INTEGER NOT NULL,
img_url TEXT
);
-- Create minifigs table
CREATE TABLE minifigs (
fig_num TEXT PRIMARY KEY,
name TEXT NOT NULL,
num_parts INTEGER NOT NULL,
img_url TEXT
);
-- Create inventories table
CREATE TABLE inventories (
id SERIAL PRIMARY KEY,
version INTEGER NOT NULL,
set_num TEXT NOT NULL
);
-- Create inventory_parts table
CREATE TABLE inventory_parts (
inventory_id INTEGER NOT NULL REFERENCES inventories(id),
part_num TEXT NOT NULL REFERENCES parts(part_num),
color_id INTEGER NOT NULL REFERENCES colors(id),
quantity INTEGER NOT NULL,
is_spare BOOLEAN NOT NULL DEFAULT false,
img_url TEXT,
PRIMARY KEY (inventory_id, part_num, color_id, is_spare)
);
-- Create inventory_sets table
CREATE TABLE inventory_sets (
inventory_id INTEGER NOT NULL REFERENCES inventories(id),
set_num TEXT NOT NULL REFERENCES sets(set_num),
quantity INTEGER NOT NULL,
PRIMARY KEY (inventory_id, set_num)
);
-- Create inventory_minifigs table
CREATE TABLE inventory_minifigs (
inventory_id INTEGER NOT NULL REFERENCES inventories(id),
fig_num TEXT NOT NULL REFERENCES minifigs(fig_num),
quantity INTEGER NOT NULL,
PRIMARY KEY (inventory_id, fig_num)
);
-- Add indexes for common lookup patterns
CREATE INDEX idx_parts_category ON parts(part_cat_id);
CREATE INDEX idx_sets_theme ON sets(theme_id);
CREATE INDEX idx_inventories_set ON inventories(set_num);
CREATE INDEX idx_inventory_parts_part ON inventory_parts(part_num);
CREATE INDEX idx_inventory_parts_color ON inventory_parts(color_id);
CREATE INDEX idx_themes_parent ON themes(parent_id);
-- Enhanced indexing strategy for reporting and analytics
-- Parts and Categories
CREATE INDEX idx_parts_category ON parts(part_cat_id);
CREATE INDEX idx_parts_name_search ON parts USING gin(name gin_trgm_ops); -- Requires pg_trgm extension for partial name searches
CREATE INDEX idx_part_categories_name ON part_categories(name);
-- Colors
CREATE INDEX idx_colors_name ON colors(name);
CREATE INDEX idx_colors_is_trans ON colors(is_trans); -- Useful for filtering transparent parts
CREATE INDEX idx_colors_rgb ON colors(rgb);
-- Sets and Themes
CREATE INDEX idx_sets_theme ON sets(theme_id);
CREATE INDEX idx_sets_year ON sets(year); -- For temporal analysis
CREATE INDEX idx_sets_name_search ON sets USING gin(name gin_trgm_ops);
CREATE INDEX idx_sets_num_parts ON sets(num_parts); -- For size-based analysis
CREATE INDEX idx_sets_theme_year ON sets(theme_id, year); -- Compound index for theme-based temporal analysis
CREATE INDEX idx_themes_parent ON themes(parent_id);
CREATE INDEX idx_themes_name ON themes(name);
-- Inventory Management
CREATE INDEX idx_inventories_set ON inventories(set_num);
CREATE INDEX idx_inventories_version ON inventories(version);
-- Inventory Parts (critical for many aggregation queries)
CREATE INDEX idx_inventory_parts_part ON inventory_parts(part_num);
CREATE INDEX idx_inventory_parts_color ON inventory_parts(color_id);
CREATE INDEX idx_inventory_parts_quantity ON inventory_parts(quantity);
CREATE INDEX idx_inventory_parts_spare ON inventory_parts(is_spare);
CREATE INDEX idx_inventory_parts_color_part ON inventory_parts(color_id, part_num); -- Compound index for color-based part analysis
CREATE INDEX idx_inventory_parts_inv_qty ON inventory_parts(inventory_id, quantity); -- For quantity-based analysis per inventory
-- Minifigs
CREATE INDEX idx_minifigs_name_search ON minifigs USING gin(name gin_trgm_ops);
CREATE INDEX idx_minifigs_num_parts ON minifigs(num_parts);
-- Inventory Sets
CREATE INDEX idx_inventory_sets_set ON inventory_sets(set_num);
CREATE INDEX idx_inventory_sets_quantity ON inventory_sets(quantity);
-- Inventory Minifigs
CREATE INDEX idx_inventory_minifigs_fig ON inventory_minifigs(fig_num);
CREATE INDEX idx_inventory_minifigs_quantity ON inventory_minifigs(quantity);
-- Part Relationships
CREATE INDEX idx_part_relationships_child ON part_relationships(child_part_num);
CREATE INDEX idx_part_relationships_parent ON part_relationships(parent_part_num);
CREATE INDEX idx_part_relationships_type ON part_relationships(rel_type);
-- Elements (for color variant analysis)
CREATE INDEX idx_elements_part ON elements(part_num);
CREATE INDEX idx_elements_color ON elements(color_id);
CREATE INDEX idx_elements_part_color ON elements(part_num, color_id);
-- Add extension for text search capabilities
CREATE EXTENSION IF NOT EXISTS pg_trgm;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment