Last active
January 15, 2017 10:49
-
-
Save snakers4/43c31262d5511607709927cf243fddfd to your computer and use it in GitHub Desktop.
DoubleData illustration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
bot.vk_id, | |
bot.rest_review_count_per_critic as rest_review_count_per_critic, | |
bot.is_bot as is_bot, | |
vks.* | |
FROM | |
bot_anomaly_dataset bot | |
JOIN vk_social_attr vks ON bot.vk_id = 'id' || vks.social_network_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fine casual expensive cheap Max of Кафе Max of Европейская кухня Max of Русская кухня Max of Фастфуд Max of Пицца Max of Итальянская кухня Max of Бары Max of Японская кухня Max of Паста Max of Суши Max of Сэндвичи Max of Кондитерские Max of Бургеры Max of Кофейни Max of Американская кухня Max of Вок Max of Кальяны Max of Грузинская кухня Max of Коктейльная карта Max of Узбекская кухня Max of Блины Max of Пироги Max of Стейки Max of Караоке Max of Хачапури Max of Спортбары Max of Пекарни Max of Китайская кухня Max of Пивные рестораны Max of Пельмени Max of Банкетные залы Max of Столовые Max of Вегетарианское меню Max of Паназиатская кухня Max of Стритфуд Max of Хинкали Max of Азербайджанская кухня Max of Кулинарии Max of Винотеки Max of Армянская кухня Max of Пабы Max of Турецкая кухня Max of Мексиканская кухня Max of Живая музыка Max of Татарская кухня Max of Средиземноморская кухня Max of Чебуреки Max of Французская кухня Max of Немецкая кухня Max of Арабская кухня Max of Украинская кухня Max of Чайные Max of Концерты Max of Ресторанная программа Visa Premium Max of Куда пойти с детьми Max of Английская кухня Max of Корейская кухня Max of Халяльные рестораны Max of Торты на заказ Max of Тайская кухня Max of Рыбные рестораны Max of Гастропабы Max of Где пить пиво Max of Где пить кофе Max of Башкирская кухня Max of Индийская кухня Max of Греческая кухня Max of Чешская кухня Max of Лучшие места с дешевой едой Max of Вьетнамская кухня Max of Где есть бургеры Max of Где поесть ночью Max of Лучшие итальянские рестораны Max of Испанская кухня Max of Осетинская кухня Max of Где лечить похмелье Max of Лучшие рестораны в Петербурге Max of Сербская кухня Max of Где есть горячие супы Max of Лучшие рестораны в Москве Max of Еврейская кухня Max of Где есть мясо Max of Авторская кухня Max of Куда пойти с бабушкой и дедушкой Max of Бельгийская кухня Max of Ливанская кухня Max of Где пить вино Max of Шорт-лист премии Wheretoeat Max of Лучший фастфуд Max of Где есть пиццу Max of Лучшие ирландские пабы Max of Где отметить праздник Max of Лучшие бары в Петербурге Max of Где смотреть футбол в Петербурге Max of Лучшее мороженое Max of Лучшие завтраки в Москве Max of Где смотреть футбол в Москве Max of Где пить сидр Max of Лучшие бары Москвы Max of Уйгурская кухня Max of Где смотреть футбол в Москве2 Max of Лучшие кондитерские Max of Лучшие новые рестораны Max of Лучшие кавказские рестораны Max of Лучшие места на Петроградской стороне Max of Где пить водку Max of Где есть пельмени Max of Где есть суши Max of Где есть устриц Max of Латиноамериканская кухня |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT DISTINCT | |
'http://www.afisha.ru/msk/restaurant/' || rl.afisha_rest_id as rest_url, | |
rl.rest_name as rest_name, | |
rl.price as rest_price, | |
rl.cuisine as cuisine, | |
rl.tags as tags, | |
rest_tags_review_count.review_count_per_restaurant as review_count_per_restaurant, | |
COUNT (DISTINCT rest_tags_review_count.tag) as tags_per_restaurant, | |
CASE | |
WHEN rl.afisha_rest_id IN (173934, 20549, 28017, 45152, 61072, 151051, 18785, 197696, 186396, 259664, 22334, 16586, 29762, 20481, 22222, 32875, 40192, 29585, 29688, 41431, 59453, 36750, 18677, 29048, 30600, 22793, 22281, 20482, 20482, 28114, 35958, 30256, 36752, 16700, 20439, 18593, 32591, 32591) THEN 1 | |
ELSE 0 | |
END as novikov, | |
r2cl.cluster_id as cluster_id, | |
r2r.rating as rest_rating | |
FROM | |
( | |
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') as tag, | |
count(r2u.afisha_user_id) as review_count_per_restaurant | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
GROUP BY | |
rl.afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') | |
) rest_tags_review_count | |
JOIN rest_list rl ON rl.afisha_rest_id = rest_tags_review_count.afisha_rest_id | |
JOIN rest2cluster r2cl ON r2cl.afisha_rest_id = rl.afisha_rest_id | |
JOIN rest_rating r2r ON r2r.afisha_rest_id = rl.afisha_rest_id | |
GROUP BY | |
'http://www.afisha.ru/msk/restaurant/' || rl.afisha_rest_id, | |
rl.price, | |
rl.cuisine, | |
rl.rest_name, | |
rest_tags_review_count.review_count_per_restaurant, | |
rl.tags, | |
r2cl.cluster_id, | |
r2r.rating, | |
CASE | |
WHEN rl.afisha_rest_id IN (173934, 20549, 28017, 45152, 61072, 151051, 18785, 197696, 186396, 259664, 22334, 16586, 29762, 20481, 22222, 32875, 40192, 29585, 29688, 41431, 59453, 36750, 18677, 29048, 30600, 22793, 22281, 20482, 20482, 28114, 35958, 30256, 36752, 16700, 20439, 18593, 32591, 32591) THEN 1 | |
ELSE 0 | |
END | |
ORDER BY | |
rest_tags_review_count.review_count_per_restaurant DESC | |
/* | |
vga.group_name as group_name, | |
vga."сategory" as group_category, | |
vga.subcategory as subcategory, | |
'https://vk.com/club' || vga.vk_group_id as url, | |
u2g.vk_group_id as vk_group_id, | |
COUNT (DISTINCT r2u.afisha_user_id) as count_afisha_user_id, | |
COUNT (DISTINCT rl.afisha_rest_id) as count_afisha_rest_id, | |
vga.subscribers_int | |
*/ | |
/* | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
JOIN afisha_user_id2social_netw a2n ON a2n."type" = 1 AND a2n.user_id = r2u.afisha_user_id | |
JOIN vk_id2vk_group u2g ON 'id' || u2g.vk_id = a2n.social_network_id | |
JOIN vk_group_attr vga ON vga.vk_group_id = u2g.vk_group_id | |
*/ | |
/* | |
u2g.vk_group_id, | |
vga.subscribers_int, | |
'https://vk.com/club' || vga.vk_group_id, | |
vga."сategory", | |
vga.subcategory, | |
vga.group_name | |
*/ | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT DISTINCT | |
'http://www.afisha.ru/msk/restaurant/' || rl.afisha_rest_id as rest_url, | |
rl.rest_name as rest_name, | |
rl.price as rest_price, | |
rl.cuisine as cuisine, | |
rl.tags as tags, | |
rest_tags_review_count.review_count_per_restaurant as review_count_per_restaurant, | |
COUNT (DISTINCT rest_tags_review_count.tag) as tags_per_restaurant, | |
CASE | |
WHEN rl.afisha_rest_id IN (173934, 20549, 28017, 45152, 61072, 151051, 18785, 197696, 186396, 259664, 22334, 16586, 29762, 20481, 22222, 32875, 40192, 29585, 29688, 41431, 59453, 36750, 18677, 29048, 30600, 22793, 22281, 20482, 20482, 28114, 35958, 30256, 36752, 16700, 20439, 18593, 32591, 32591) THEN 1 | |
ELSE 0 | |
END as novikov | |
FROM | |
( | |
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') as tag, | |
count(r2u.afisha_user_id) as review_count_per_restaurant | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
GROUP BY | |
rl.afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') | |
) rest_tags_review_count | |
JOIN rest_list rl ON rl.afisha_rest_id = rest_tags_review_count.afisha_rest_id | |
GROUP BY | |
'http://www.afisha.ru/msk/restaurant/' || rl.afisha_rest_id, | |
rl.price, | |
rl.cuisine, | |
rl.rest_name, | |
rest_tags_review_count.review_count_per_restaurant, | |
rl.tags, | |
CASE | |
WHEN rl.afisha_rest_id IN (173934, 20549, 28017, 45152, 61072, 151051, 18785, 197696, 186396, 259664, 22334, 16586, 29762, 20481, 22222, 32875, 40192, 29585, 29688, 41431, 59453, 36750, 18677, 29048, 30600, 22793, 22281, 20482, 20482, 28114, 35958, 30256, 36752, 16700, 20439, 18593, 32591, 32591) THEN 1 | |
ELSE 0 | |
END | |
ORDER BY | |
rest_tags_review_count.review_count_per_restaurant DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
COUNT(rl.afisha_rest_id) as restaurant_count, | |
COUNT(r2u.afisha_user_id) as review_user_count, | |
rl.cuisine as cuisine, | |
--array_length(regexp_split_to_array(rl.tags, ','), 1) as tag_count, | |
CASE | |
WHEN rl.price = 'До 700 рублей' THEN 1 | |
ELSE 0 | |
END as cheap, | |
CASE | |
WHEN rl.price = '700–1500 рублей' THEN 1 | |
ELSE 0 | |
END as casual, | |
CASE | |
WHEN rl.price = '1500–2500 рублей' THEN 1 | |
ELSE 0 | |
END as fine, | |
CASE | |
WHEN rl.price = 'Больше 2500 рублей' THEN 1 | |
ELSE 0 | |
END as expensive | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
GROUP BY | |
rl.cuisine, | |
rl.price | |
--array_length(regexp_split_to_array(rl.tags, ','), 1) | |
ORDER BY | |
restaurant_count DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT DISTINCT | |
a2n.afisha_user_id as afisha_user_id, | |
count(DISTINCT r2u.afisha_rest_id) as rest_review_count_per_critic, | |
'https://vk.com/' || a2n.social_network_id as vk_url | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
/* Тут все join-ы левые, т.к. очевидно что социальный граф есть у меньшинства */ | |
LEFT JOIN afisha_user_id2social_netw a2n ON a2n."type" = 1 AND a2n.user_id = r2u.afisha_user_id | |
LEFT JOIN vk_social_attr vsoc ON 'id' || vsoc.social_network_id = a2n.social_network_id | |
GROUP BY | |
a2n.afisha_user_id, | |
'https://vk.com/' || a2n.social_network_id | |
ORDER BY | |
count(DISTINCT r2u.afisha_rest_id) DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
* | |
FROM | |
( | |
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') as tags, | |
rl.price as price | |
/* Сюда так и просится расстояние до метро...и индикатор центра города */ | |
FROM | |
rest_list rl | |
) raw | |
WHERE | |
raw.tags <> '0' AND raw.price <> '0' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
/* тут все очень резко, просто и arbitrary - можно делать кривую мягче и настроить под клиента */ | |
(log(rest_tags_review_count.review_count_per_restaurant + 1)^0.1/1.06) as reviews_score, | |
(count(DISTINCT a2n.social_network_id)::NUMERIC / (count(a2n.*)::NUMERIC +1) ) as social_score, | |
avg(vkf.group_fit) as group_fit_score | |
FROM | |
( | |
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
count(r2u.afisha_user_id) as review_count_per_restaurant | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
GROUP BY | |
rl.afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') | |
) rest_tags_review_count | |
JOIN rest_list rl ON rl.afisha_rest_id = rest_tags_review_count.afisha_rest_id | |
LEFT JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
LEFT JOIN afisha_user_id2social_netw a2n ON a2n."type" = 1 AND a2n.user_id = r2u.afisha_user_id | |
LEFT JOIN vk_id2vk_group u2g ON 'id' || u2g.vk_id = a2n.social_network_id | |
LEFT JOIN vk_group_fit vkf ON vkf.vk_group_id = u2g.vk_group_id | |
GROUP BY | |
rl.afisha_rest_id, | |
(log(rest_tags_review_count.review_count_per_restaurant + 1)^0.1/1.06) | |
LIMIT 1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
raw_data.afisha_rest_id as afisha_rest_id, | |
raw_data.reviews_score as reviews_score, | |
round (log(1/(raw_data.social_score+0.01))/3 :: NUMERIC, 2) as social_score, | |
raw_data.group_fit_score as group_fit_score | |
FROM | |
( | |
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
/* тут все очень резко, просто и arbitrary - можно делать кривую мягче и настроить под клиента */ | |
round( (log(rest_tags_review_count.review_count_per_restaurant + 1)^0.1/1.06) :: NUMERIC, 3) as reviews_score, | |
round( (count(DISTINCT a2n.social_network_id)::NUMERIC / (count(a2n.*)::NUMERIC +1) ), 3) as social_score, | |
round( avg(vkf.group_fit) :: NUMERIC, 3) as group_fit_score | |
FROM | |
( | |
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
count(r2u.afisha_user_id) as review_count_per_restaurant | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
GROUP BY | |
rl.afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') | |
) rest_tags_review_count | |
JOIN rest_list rl ON rl.afisha_rest_id = rest_tags_review_count.afisha_rest_id | |
LEFT JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
LEFT JOIN afisha_user_id2social_netw a2n ON a2n."type" = 1 AND a2n.user_id = r2u.afisha_user_id | |
LEFT JOIN vk_id2vk_group u2g ON 'id' || u2g.vk_id = a2n.social_network_id | |
LEFT JOIN vk_group_fit vkf ON vkf.vk_group_id = u2g.vk_group_id | |
GROUP BY | |
rl.afisha_rest_id, | |
(log(rest_tags_review_count.review_count_per_restaurant + 1)^0.1/1.06) | |
) raw_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT DISTINCT | |
rest_tags_review_count.tag as tag, | |
count(rest_tags_review_count.afisha_rest_id) as rest_count_per_tag | |
FROM | |
( | |
SELECT | |
rl.afisha_rest_id as afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') as tag, | |
count(r2u.afisha_user_id) as review_count_per_restaurant | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
GROUP BY | |
rl.afisha_rest_id, | |
regexp_split_to_table(rl.tags, ',') | |
) rest_tags_review_count | |
JOIN rest_list rl ON rl.afisha_rest_id = rest_tags_review_count.afisha_rest_id | |
GROUP BY | |
rest_tags_review_count.tag | |
ORDER BY | |
count(rest_tags_review_count.afisha_rest_id) DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT DISTINCT | |
'https://vk.com/club' || vga.vk_group_id as url, | |
u2g.vk_group_id as vk_group_id, | |
COUNT (DISTINCT r2u.afisha_user_id) as count_afisha_user_id, | |
COUNT (DISTINCT rl.afisha_rest_id) as count_afisha_rest_id, | |
vga.subscribers_int | |
FROM | |
rest_list rl | |
JOIN rest_id2afisha_user_id r2u ON r2u.afisha_rest_id = rl.afisha_rest_id | |
JOIN afisha_user_id2social_netw a2n ON a2n."type" = 1 AND a2n.user_id = r2u.afisha_user_id | |
JOIN vk_id2vk_group u2g ON 'id' || u2g.vk_id = a2n.social_network_id | |
JOIN vk_group_attr vga ON vga.vk_group_id = u2g.vk_group_id | |
GROUP BY | |
u2g.vk_group_id, | |
vga.subscribers_int, | |
'https://vk.com/club' || vga.vk_group_id | |
ORDER BY | |
count_afisha_user_id DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
count (vkg.*) as total_groups, | |
sum(CASE WHEN vkg.subscribers_int NOTNULL THEN 1 ELSE 0 END) as not_null_subscribers, | |
sum(CASE WHEN vkg.visitors_int NOTNULL THEN 1 ELSE 0 END) as not_null_visitors, | |
sum(CASE WHEN vkg.coverage_int NOTNULL THEN 1 ELSE 0 END) as not_null_coverage | |
FROM | |
vk_group_attr vkg | |
SELECT | |
corr(vkg.subscribers_int,vkg.visitors_int) subscribers_int_visitors_int, | |
corr(vkg.subscribers_int,vkg.coverage_int), | |
corr(vkg.visitors_int,vkg.coverage_int) | |
/* | |
vkg.subscribers_int | |
vkg.visitors_int | |
vkg.coverage_int | |
*/ | |
FROM | |
vk_group_attr vkg |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment