Skip to content

Instantly share code, notes, and snippets.

@channainfo
Last active December 10, 2021 08:12
Show Gist options
  • Save channainfo/6b6a6ecea53ab8370307046aaf172215 to your computer and use it in GitHub Desktop.
Save channainfo/6b6a6ecea53ab8370307046aaf172215 to your computer and use it in GitHub Desktop.
Export items to panda data wrangling for items dataset in recommendation
WITH
ancesstor_taxons AS (
SELECT
taxon.id as taxon_id,
taxon.taxon_type as taxon_type,
taxon.parent_id,
ancesstor.id AS ancesstor_id,
ancesstor.parent_id AS ancesstor_parent_id,
trans.name AS ancesstor_name,
taxon.lft,
taxon.rgt,
taxon.depth
FROM spree_taxons AS taxon, spree_taxons AS ancesstor
INNER JOIN spree_taxon_translations AS trans ON trans.spree_taxon_id = ancesstor.id
WHERE
trans.locale = 'en'
AND ancesstor.lft <= taxon.lft
AND ancesstor.rgt >= taxon.rgt
AND ancesstor.parent_id IS NOT NULL
ORDER BY ancesstor.lft
),
taxon_with_list_ancesstor AS (
SElECT
taxon_id,
taxon_type,
STRING_AGG(ancesstor_id::varchar, ' -> ' ORDER BY ancesstor_id ASC ) AS ancesstor_ids,
STRING_AGG(ancesstor_name::varchar, ' -> ' ORDER BY ancesstor_id ASC ) AS ancesstor_names
FROM ancesstor_taxons
GROUP BY taxon_id, taxon_type
),
product_with_taxon_ids AS (
SELECT
pt.product_id,
taxon.taxon_type,
STRING_AGG( taxon.taxon_id::varchar, '|' ORDER BY taxon.taxon_type ASC ) AS taxon_ids,
STRING_AGG( taxon.ancesstor_ids, '|' ORDER BY taxon.taxon_type ASC ) AS agg_ancesstor_ids,
STRING_AGG( taxon.ancesstor_names, '|' ORDER BY taxon.taxon_type ASC ) AS agg_ancesstor_names
FROM spree_products_taxons AS pt INNER JOIN taxon_with_list_ancesstor AS taxon ON pt.taxon_id = taxon.taxon_id
GROUP BY pt.product_id, taxon.taxon_type
),
date_in_50yrs AS (
SELECT
(extract(epoch FROM now()) * 1000)::BIGINT + (365::BIGINT * 50 * 24 * 3600 * 1000) as date_in_50yrs
),
available_products AS (
SELECT
p.id,
SUM(s.backorderable::int) AS total_backorderable,
SUM(s.count_on_hand) AS total_count_on_hand
FROM spree_products p
INNER JOIN spree_variants v ON p.id = v.product_id
INNER JOIN spree_stock_items s on v.id = s.variant_id
GROUP BY p.id
),
product_items AS (
SELECT
p.id,
ptran.description,
ptran.name,
ptran.slug,
pt.taxon_type,
sp.amount,
vd.vendor_type,
p.vendor_id,
pt.taxon_ids,
pt.agg_ancesstor_ids,
pt.agg_ancesstor_names,
ap.total_backorderable,
ap.total_count_on_hand,
(extract(epoch FROM p.available_on ) * 1000)::BIGINT AS available_on,
(extract(epoch FROM p.discontinue_on ) * 1000)::BIGINT AS discontinue_on,
(extract(epoch FROM p.created_at ) * 1000)::BIGINT AS created_at
FROM spree_products AS p
INNER JOIN product_with_taxon_ids AS pt ON p.id = pt.product_id
INNER JOIN spree_product_translations AS ptran ON ptran.spree_product_id = p.id
INNER JOIN available_products AS ap ON ap.id = p.id
INNER JOIN spree_variants AS v ON v.product_id = p.id
INNER JOIN spree_prices AS sp ON sp.variant_id = v.id
INNER JOIN spree_vendors as vd ON vd.id = p.vendor_id
WHERE ptran.locale = 'en' AND sp.currency = 'USD' AND v.is_master = true
),
items AS (
SELECT
p.id AS "ITEM_ID",
p.taxon_type AS "TAXON_TYPE",
p.slug AS "SLUG",
p.taxon_ids AS "CATEGORY_L1",
p.agg_ancesstor_ids,
p.agg_ancesstor_names,
p.available_on AS "AVAILABLE_ON",
p.created_at AS "CREATION_TIMESTAMP",
p.vendor_type AS "VENDOR_TYPE",
p.amount AS "PRICE",
'none' AS "AGE_GROUP",
'none' AS "ADULT",
'none' AS "GENDER",
CASE
WHEN p.vendor_id IS NULL
THEN 0 ELSE p.vendor_id
END "VENDOR_ID",
-- p.total_backorderable AS "BACKORDERABLE",
-- p.total_count_on_hand AS "COUNT_ON_HAND",
CASE
WHEN total_backorderable + p.total_count_on_hand > 0
THEN 1 ELSE 0
END "AVAILABILITY",
-- p.discontinue_on AS "DISCONTINUE_ON"
CASE
WHEN p.discontinue_on IS NULL
THEN d.date_in_50yrs ELSE p.discontinue_on
END "DISCONTINUE_ON"
FROM product_items as p, date_in_50yrs as d
ORDER BY p.id
)
-- Prototype and testing
-- SELECT * ancesstor_taxons
-- SELECT * taxon_with_list_ancesstor
-- SELECT * product_with_taxon_ids
-- SELECT * available_products
-- SELECT * product_items
SELECT * FROM items
@channainfo
Copy link
Author

Edit - ML-Item with category nested 2021-12-10 15-07-27

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment