Created
November 12, 2024 00:44
-
-
Save shortjared/9d95586e0de1603407e91dac7b05e8e5 to your computer and use it in GitHub Desktop.
Create Rebrickable Postgres Database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
BASE_URL="https://cdn.rebrickable.com/media/downloads" | |
TIMESTAMP=$(date +%s) | |
IMPORT_DIR="import" | |
DB_NAME="lego" | |
DB_USER="postgres" | |
DB_PASSWORD="postgres" | |
CONTAINER_NAME="lego-db" | |
# Create import directory if it doesn't exist | |
mkdir -p "${IMPORT_DIR}" | |
# Stop and remove any existing container with the same name | |
docker stop ${CONTAINER_NAME} 2>/dev/null || true | |
docker rm ${CONTAINER_NAME} 2>/dev/null || true | |
# Start PostgreSQL container | |
docker run --name ${CONTAINER_NAME} \ | |
-e POSTGRES_DB=${DB_NAME} \ | |
-e POSTGRES_USER=${DB_USER} \ | |
-e POSTGRES_PASSWORD=${DB_PASSWORD} \ | |
-p 5432:5432 \ | |
-d postgres:17 | |
# Wait for PostgreSQL to start | |
sleep 5 | |
# Create database schema | |
docker cp schema.sql ${CONTAINER_NAME}:/schema.sql | |
docker exec -i ${CONTAINER_NAME} psql -U ${DB_USER} -d ${DB_NAME} -f /schema.sql | |
# Files in dependency order based on schema relationships | |
FILES=( | |
"part_categories.csv.gz" | |
"colors.csv.gz" | |
"parts.csv.gz" | |
"part_relationships.csv.gz" | |
"elements.csv.gz" | |
"themes.csv.gz" | |
"sets.csv.gz" | |
"minifigs.csv.gz" | |
"inventories.csv.gz" | |
"inventory_parts.csv.gz" | |
"inventory_sets.csv.gz" | |
"inventory_minifigs.csv.gz" | |
) | |
for file in "${FILES[@]}"; do | |
# Download and extract if needed | |
if [ ! -f "${IMPORT_DIR}/${file%.gz}" ]; then | |
wget "${BASE_URL}/${file}?${TIMESTAMP}" -O "${IMPORT_DIR}/${file}" | |
gunzip -c "${IMPORT_DIR}/${file}" > "${IMPORT_DIR}/${file%.gz}" | |
rm "${IMPORT_DIR}/${file}" | |
fi | |
# Import CSV into PostgreSQL | |
table_name="${file%.csv.gz}" | |
echo "Importing ${table_name}..." | |
docker cp "${IMPORT_DIR}/${file%.gz}" ${CONTAINER_NAME}:/tmp/${file%.gz} | |
docker exec -i ${CONTAINER_NAME} psql -U ${DB_USER} -d ${DB_NAME} -c "\COPY ${table_name} FROM '/tmp/${file%.gz}' CSV HEADER;" | |
done | |
# Create compressed database dump in custom format | |
echo "Creating compressed database dump..." | |
docker exec -i ${CONTAINER_NAME} pg_dump -U ${DB_USER} -Fc ${DB_NAME} > lego_db_dump.pgdump | |
echo "Database import and dump complete!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Create part_categories table | |
CREATE TABLE part_categories ( | |
id SERIAL PRIMARY KEY, | |
name TEXT NOT NULL | |
); | |
-- Create colors table | |
CREATE TABLE colors ( | |
id SERIAL PRIMARY KEY, | |
name TEXT NOT NULL, | |
rgb TEXT NOT NULL, | |
is_trans BOOLEAN NOT NULL DEFAULT false | |
); | |
-- Create parts table | |
CREATE TABLE parts ( | |
part_num TEXT PRIMARY KEY, | |
name TEXT NOT NULL, | |
part_cat_id INTEGER NOT NULL REFERENCES part_categories(id), | |
part_material TEXT | |
); | |
-- Create part_relationships table | |
CREATE TABLE part_relationships ( | |
rel_type TEXT NOT NULL, | |
child_part_num TEXT NOT NULL REFERENCES parts(part_num), | |
parent_part_num TEXT NOT NULL REFERENCES parts(part_num), | |
PRIMARY KEY (rel_type,child_part_num, parent_part_num) | |
); | |
-- Create elements table | |
CREATE TABLE elements ( | |
element_id TEXT PRIMARY KEY, | |
part_num TEXT NOT NULL REFERENCES parts(part_num), | |
color_id INTEGER NOT NULL REFERENCES colors(id), | |
design_id TEXT | |
); | |
-- Create themes table | |
CREATE TABLE themes ( | |
id SERIAL PRIMARY KEY, | |
name TEXT NOT NULL, | |
parent_id INTEGER REFERENCES themes(id) | |
); | |
-- Create sets table | |
CREATE TABLE sets ( | |
set_num TEXT PRIMARY KEY, | |
name TEXT NOT NULL, | |
year INTEGER NOT NULL, | |
theme_id INTEGER NOT NULL REFERENCES themes(id), | |
num_parts INTEGER NOT NULL, | |
img_url TEXT | |
); | |
-- Create minifigs table | |
CREATE TABLE minifigs ( | |
fig_num TEXT PRIMARY KEY, | |
name TEXT NOT NULL, | |
num_parts INTEGER NOT NULL, | |
img_url TEXT | |
); | |
-- Create inventories table | |
CREATE TABLE inventories ( | |
id SERIAL PRIMARY KEY, | |
version INTEGER NOT NULL, | |
set_num TEXT NOT NULL | |
); | |
-- Create inventory_parts table | |
CREATE TABLE inventory_parts ( | |
inventory_id INTEGER NOT NULL REFERENCES inventories(id), | |
part_num TEXT NOT NULL REFERENCES parts(part_num), | |
color_id INTEGER NOT NULL REFERENCES colors(id), | |
quantity INTEGER NOT NULL, | |
is_spare BOOLEAN NOT NULL DEFAULT false, | |
img_url TEXT, | |
PRIMARY KEY (inventory_id, part_num, color_id, is_spare) | |
); | |
-- Create inventory_sets table | |
CREATE TABLE inventory_sets ( | |
inventory_id INTEGER NOT NULL REFERENCES inventories(id), | |
set_num TEXT NOT NULL REFERENCES sets(set_num), | |
quantity INTEGER NOT NULL, | |
PRIMARY KEY (inventory_id, set_num) | |
); | |
-- Create inventory_minifigs table | |
CREATE TABLE inventory_minifigs ( | |
inventory_id INTEGER NOT NULL REFERENCES inventories(id), | |
fig_num TEXT NOT NULL REFERENCES minifigs(fig_num), | |
quantity INTEGER NOT NULL, | |
PRIMARY KEY (inventory_id, fig_num) | |
); | |
-- Add indexes for common lookup patterns | |
CREATE INDEX idx_parts_category ON parts(part_cat_id); | |
CREATE INDEX idx_sets_theme ON sets(theme_id); | |
CREATE INDEX idx_inventories_set ON inventories(set_num); | |
CREATE INDEX idx_inventory_parts_part ON inventory_parts(part_num); | |
CREATE INDEX idx_inventory_parts_color ON inventory_parts(color_id); | |
CREATE INDEX idx_themes_parent ON themes(parent_id); | |
-- Enhanced indexing strategy for reporting and analytics | |
-- Parts and Categories | |
CREATE INDEX idx_parts_category ON parts(part_cat_id); | |
CREATE INDEX idx_parts_name_search ON parts USING gin(name gin_trgm_ops); -- Requires pg_trgm extension for partial name searches | |
CREATE INDEX idx_part_categories_name ON part_categories(name); | |
-- Colors | |
CREATE INDEX idx_colors_name ON colors(name); | |
CREATE INDEX idx_colors_is_trans ON colors(is_trans); -- Useful for filtering transparent parts | |
CREATE INDEX idx_colors_rgb ON colors(rgb); | |
-- Sets and Themes | |
CREATE INDEX idx_sets_theme ON sets(theme_id); | |
CREATE INDEX idx_sets_year ON sets(year); -- For temporal analysis | |
CREATE INDEX idx_sets_name_search ON sets USING gin(name gin_trgm_ops); | |
CREATE INDEX idx_sets_num_parts ON sets(num_parts); -- For size-based analysis | |
CREATE INDEX idx_sets_theme_year ON sets(theme_id, year); -- Compound index for theme-based temporal analysis | |
CREATE INDEX idx_themes_parent ON themes(parent_id); | |
CREATE INDEX idx_themes_name ON themes(name); | |
-- Inventory Management | |
CREATE INDEX idx_inventories_set ON inventories(set_num); | |
CREATE INDEX idx_inventories_version ON inventories(version); | |
-- Inventory Parts (critical for many aggregation queries) | |
CREATE INDEX idx_inventory_parts_part ON inventory_parts(part_num); | |
CREATE INDEX idx_inventory_parts_color ON inventory_parts(color_id); | |
CREATE INDEX idx_inventory_parts_quantity ON inventory_parts(quantity); | |
CREATE INDEX idx_inventory_parts_spare ON inventory_parts(is_spare); | |
CREATE INDEX idx_inventory_parts_color_part ON inventory_parts(color_id, part_num); -- Compound index for color-based part analysis | |
CREATE INDEX idx_inventory_parts_inv_qty ON inventory_parts(inventory_id, quantity); -- For quantity-based analysis per inventory | |
-- Minifigs | |
CREATE INDEX idx_minifigs_name_search ON minifigs USING gin(name gin_trgm_ops); | |
CREATE INDEX idx_minifigs_num_parts ON minifigs(num_parts); | |
-- Inventory Sets | |
CREATE INDEX idx_inventory_sets_set ON inventory_sets(set_num); | |
CREATE INDEX idx_inventory_sets_quantity ON inventory_sets(quantity); | |
-- Inventory Minifigs | |
CREATE INDEX idx_inventory_minifigs_fig ON inventory_minifigs(fig_num); | |
CREATE INDEX idx_inventory_minifigs_quantity ON inventory_minifigs(quantity); | |
-- Part Relationships | |
CREATE INDEX idx_part_relationships_child ON part_relationships(child_part_num); | |
CREATE INDEX idx_part_relationships_parent ON part_relationships(parent_part_num); | |
CREATE INDEX idx_part_relationships_type ON part_relationships(rel_type); | |
-- Elements (for color variant analysis) | |
CREATE INDEX idx_elements_part ON elements(part_num); | |
CREATE INDEX idx_elements_color ON elements(color_id); | |
CREATE INDEX idx_elements_part_color ON elements(part_num, color_id); | |
-- Add extension for text search capabilities | |
CREATE EXTENSION IF NOT EXISTS pg_trgm; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment