Last active
December 10, 2021 08:12
-
-
Save channainfo/6b6a6ecea53ab8370307046aaf172215 to your computer and use it in GitHub Desktop.
Export items to panda data wrangling for items dataset in recommendation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
ancesstor_taxons AS ( | |
SELECT | |
taxon.id as taxon_id, | |
taxon.taxon_type as taxon_type, | |
taxon.parent_id, | |
ancesstor.id AS ancesstor_id, | |
ancesstor.parent_id AS ancesstor_parent_id, | |
trans.name AS ancesstor_name, | |
taxon.lft, | |
taxon.rgt, | |
taxon.depth | |
FROM spree_taxons AS taxon, spree_taxons AS ancesstor | |
INNER JOIN spree_taxon_translations AS trans ON trans.spree_taxon_id = ancesstor.id | |
WHERE | |
trans.locale = 'en' | |
AND ancesstor.lft <= taxon.lft | |
AND ancesstor.rgt >= taxon.rgt | |
AND ancesstor.parent_id IS NOT NULL | |
ORDER BY ancesstor.lft | |
), | |
taxon_with_list_ancesstor AS ( | |
SElECT | |
taxon_id, | |
taxon_type, | |
STRING_AGG(ancesstor_id::varchar, ' -> ' ORDER BY ancesstor_id ASC ) AS ancesstor_ids, | |
STRING_AGG(ancesstor_name::varchar, ' -> ' ORDER BY ancesstor_id ASC ) AS ancesstor_names | |
FROM ancesstor_taxons | |
GROUP BY taxon_id, taxon_type | |
), | |
product_with_taxon_ids AS ( | |
SELECT | |
pt.product_id, | |
taxon.taxon_type, | |
STRING_AGG( taxon.taxon_id::varchar, '|' ORDER BY taxon.taxon_type ASC ) AS taxon_ids, | |
STRING_AGG( taxon.ancesstor_ids, '|' ORDER BY taxon.taxon_type ASC ) AS agg_ancesstor_ids, | |
STRING_AGG( taxon.ancesstor_names, '|' ORDER BY taxon.taxon_type ASC ) AS agg_ancesstor_names | |
FROM spree_products_taxons AS pt INNER JOIN taxon_with_list_ancesstor AS taxon ON pt.taxon_id = taxon.taxon_id | |
GROUP BY pt.product_id, taxon.taxon_type | |
), | |
date_in_50yrs AS ( | |
SELECT | |
(extract(epoch FROM now()) * 1000)::BIGINT + (365::BIGINT * 50 * 24 * 3600 * 1000) as date_in_50yrs | |
), | |
available_products AS ( | |
SELECT | |
p.id, | |
SUM(s.backorderable::int) AS total_backorderable, | |
SUM(s.count_on_hand) AS total_count_on_hand | |
FROM spree_products p | |
INNER JOIN spree_variants v ON p.id = v.product_id | |
INNER JOIN spree_stock_items s on v.id = s.variant_id | |
GROUP BY p.id | |
), | |
product_items AS ( | |
SELECT | |
p.id, | |
ptran.description, | |
ptran.name, | |
ptran.slug, | |
pt.taxon_type, | |
sp.amount, | |
vd.vendor_type, | |
p.vendor_id, | |
pt.taxon_ids, | |
pt.agg_ancesstor_ids, | |
pt.agg_ancesstor_names, | |
ap.total_backorderable, | |
ap.total_count_on_hand, | |
(extract(epoch FROM p.available_on ) * 1000)::BIGINT AS available_on, | |
(extract(epoch FROM p.discontinue_on ) * 1000)::BIGINT AS discontinue_on, | |
(extract(epoch FROM p.created_at ) * 1000)::BIGINT AS created_at | |
FROM spree_products AS p | |
INNER JOIN product_with_taxon_ids AS pt ON p.id = pt.product_id | |
INNER JOIN spree_product_translations AS ptran ON ptran.spree_product_id = p.id | |
INNER JOIN available_products AS ap ON ap.id = p.id | |
INNER JOIN spree_variants AS v ON v.product_id = p.id | |
INNER JOIN spree_prices AS sp ON sp.variant_id = v.id | |
INNER JOIN spree_vendors as vd ON vd.id = p.vendor_id | |
WHERE ptran.locale = 'en' AND sp.currency = 'USD' AND v.is_master = true | |
), | |
items AS ( | |
SELECT | |
p.id AS "ITEM_ID", | |
p.taxon_type AS "TAXON_TYPE", | |
p.slug AS "SLUG", | |
p.taxon_ids AS "CATEGORY_L1", | |
p.agg_ancesstor_ids, | |
p.agg_ancesstor_names, | |
p.available_on AS "AVAILABLE_ON", | |
p.created_at AS "CREATION_TIMESTAMP", | |
p.vendor_type AS "VENDOR_TYPE", | |
p.amount AS "PRICE", | |
'none' AS "AGE_GROUP", | |
'none' AS "ADULT", | |
'none' AS "GENDER", | |
CASE | |
WHEN p.vendor_id IS NULL | |
THEN 0 ELSE p.vendor_id | |
END "VENDOR_ID", | |
-- p.total_backorderable AS "BACKORDERABLE", | |
-- p.total_count_on_hand AS "COUNT_ON_HAND", | |
CASE | |
WHEN total_backorderable + p.total_count_on_hand > 0 | |
THEN 1 ELSE 0 | |
END "AVAILABILITY", | |
-- p.discontinue_on AS "DISCONTINUE_ON" | |
CASE | |
WHEN p.discontinue_on IS NULL | |
THEN d.date_in_50yrs ELSE p.discontinue_on | |
END "DISCONTINUE_ON" | |
FROM product_items as p, date_in_50yrs as d | |
ORDER BY p.id | |
) | |
-- Prototype and testing | |
-- SELECT * ancesstor_taxons | |
-- SELECT * taxon_with_list_ancesstor | |
-- SELECT * product_with_taxon_ids | |
-- SELECT * available_products | |
-- SELECT * product_items | |
SELECT * FROM items |
Author
channainfo
commented
Dec 10, 2021
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment