Created
August 19, 2017 18:47
-
-
Save mac2000/3f4f460d441aa41a0dfa3fa8e8fc443d to your computer and use it in GitHub Desktop.
bigquery recommendations based on user ratings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH d AS ( | |
-- should return (cid - user id, vid - item id, rating) | |
SELECT | |
clientId as cid, | |
REGEXP_EXTRACT(page.pagePath, r'/company\d+/vacancy(\d+)') AS vid, | |
MAX( | |
CASE | |
WHEN page.pagePath LIKE '%apply=thanks%' THEN 5 | |
WHEN page.pagePath LIKE '%mode=apply%' THEN 2 | |
ELSE 1 | |
END | |
) AS rating | |
FROM `majestic-cairn-171208`.bigdata.ga WHERE | |
_PARTITIONTIME BETWEEN TIMESTAMP_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 * 24 HOUR),DAY) AND TIMESTAMP_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 * 24 HOUR),DAY) | |
AND type = 'pageview' | |
AND page.pagePath LIKE '/company%/vacancy%' | |
GROUP BY cid, vid | |
), v AS ( | |
-- optional, just remove join in last query, should return (vid - item id, title) | |
SELECT DISTINCT | |
REGEXP_EXTRACT(page.pagePath, r'/company\d+/vacancy(\d+)') AS vid, | |
REGEXP_REPLACE(REGEXP_EXTRACT(page.title, r'(.+) в .+ - .+ | Rabota.ua'), r'\d+ грн$', '') AS title | |
FROM `majestic-cairn-171208`.bigdata.ga WHERE | |
_PARTITIONTIME BETWEEN TIMESTAMP_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 * 24 HOUR),DAY) AND TIMESTAMP_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 * 24 HOUR),DAY) | |
AND type = 'pageview' | |
AND page.pagePath LIKE '/company%/vacancy%' | |
AND page.pagePath NOT LIKE '%mode=apply%' | |
AND page.pagePath NOT LIKE '%apply=thanks%' | |
), c AS ( | |
-- correlation | |
SELECT a.vid as vid, b.vid as similar_vid, CORR(a.rating, b.rating) as correlation, count(*) as `count` | |
FROM d AS a | |
JOIN d AS b | |
ON a.cid = b.cid | |
WHERE a.vid <> b.vid | |
GROUP BY a.vid, b.vid | |
) | |
-- get related items to desc | |
select c.*, v.title from c | |
join v on c.similar_vid = v.vid | |
where c.vid = '6849385' | |
order by `count` desc | |
limit 10 |
Author
mac2000
commented
Jan 9, 2018
WITH d AS (
select 'mac' as user, 'beer' as item, 5.0 as rating
UNION ALL
select 'mac' as user, 'vine' as item, 1.0 as rating
UNION ALL
select 'mac' as user, 'konjak' as item, 5.0 as rating
-- UNION ALL
-- select 'mac' as user, 'viski' as item, 1.0 as rating
union all
select 'michael' as user, 'beer' as item, 3.0 as rating
UNION ALL
select 'michael' as user, 'vine' as item, 1.0 as rating
UNION ALL
select 'michael' as user, 'konjak' as item, 4.0 as rating
UNION ALL
select 'michael' as user, 'viski' as item, 4.0 as rating
union all
select 'maria' as user, 'beer' as item, 3.0 as rating
UNION ALL
select 'maria' as user, 'vine' as item, 5.0 as rating
UNION ALL
select 'maria' as user, 'konjak' as item, 1.0 as rating
UNION ALL
select 'maria' as user, 'viski' as item, 1.0 as rating
), i2i as (
SELECT a.item as item, b.item as similar, CORR(a.rating, b.rating) as correlation, count(*) as count
FROM d AS a
JOIN d AS b
ON a.user = b.user
WHERE a.item <> b.item
GROUP BY a.item, b.item
), u2u as (
SELECT a.user as user, b.user as similar, CORR(a.rating, b.rating) as correlation, count(*) as count
FROM d AS a
JOIN d AS b
ON a.item = b.item
WHERE a.user <> b.user
GROUP BY a.user, b.user
)
select
u2u.user as u2u_user,
u2u.similar as u2u_similar,
u2u.correlation as u2u_correlation,
d1.item as d1_item,
d2.item as d2_item,
i2i.correlation as i2i_correlation
from u2u
join d as d1 on u2u.user = d1.user
join d as d2 on u2u.similar = d2.user
left join i2i on i2i.item = d1.item and i2i.similar = d2.item
where u2u.correlation > 0
and i2i.correlation > 0
and d2.item not in (select item from d as x where x.user = d1.user)
-- select
-- u2u.user as u2u_user,
-- -- u2u.similar as u2u_similar,
-- -- u2u.correlation as u2u_correlation,
-- -- d1.item as d1_item,
-- d2.item as d2_item,
-- i2i.correlation as i2i_correlation
-- from u2u
-- join d as d1 on u2u.user = d1.user
-- join d as d2 on u2u.similar = d2.user
-- left join i2i on i2i.item = d1.item and i2i.similar = d2.item
-- where u2u.correlation > 0
-- and i2i.correlation > 0
-- and d2.item not in (select item from d as x where x.user = d1.user)
-- order by i2i.correlation desc
-- -- select * from d where user in (select similar from simusers)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment