Skip to content

Instantly share code, notes, and snippets.

View futoase's full-sized avatar
🐱
Focusing

Keiji Matsuzaki futoase

🐱
Focusing
  • Japan
View GitHub Profile
# Add Cloudera repositories
sudo sh -c 'echo "deb http://archive.cloudera.com/debian `lsb_release -c -s`-cdh3 contrib" > /etc/apt/sources.list.d/cloudera.list'
sudo sh -c 'echo "deb-src http://archive.cloudera.com/debian `lsb_release -c -s`-cdh3 contrib" >> /etc/apt/sources.list.d/cloudera.list'
wget -q -O - http://archive.cloudera.com/debian/archive.key | sudo apt-key add -
sudo apt-get update
sudo apt-get -y install hadoop
# Install Hue
sudo apt-get -y install hadoop-0.20-conf-pseudo-hue
-- 0.1 ログインインターバルの分布 --
-- 当日入会->辞めた人もインターバルは1となるので1回以上プレイしている人に限定 --
td query -w -d your_app -f csv -o dist_of_login_interval.csv "
SELECT ROUND((datediff(latest_login, registered_day)+1)/login_times) AS login_interval, COUNT(*) AS cnt
FROM
(
SELECT v['uid'] AS uid, from_unixtime(MAX(time),'yyyy-MM-dd' ) AS latest_login
FROM login
GROUP BY v['uid']
) t1
-- 1.1 チュートリアルファンネル(All) --
td query -w -f csv -d your_app -o tutorial_all.csv "
SELECT
t2.step AS step,
cnt,
prev_cnt,
ROUND(cnt/enter_cnt*100) AS rate_from_enter,
IF(prev_cnt IS NULL, 100,ROUND(cnt/prev_cnt*100)) AS rate_from_prev,
IF(prev_cnt IS NULL, 0,ROUND((prev_cnt-cnt)/prev_cnt*100)) AS normed_rate_from_enter
FROM
-- 6.1 共起分析:Simpson係数の高いブックペア上位20 --
td query -w -d book_crossing_dataset "
SELECT r2.title AS title1,
r2.year_of_publication AS year1,
r2.cnt AS cnt1,
r3.title AS title2,
r3.year_of_publication AS year2,
r3.cnt AS cnt2,
r1.cnt AS intersection,
r1.cnt/IF(r2.cnt<r3.cnt,r2.cnt,r3.cnt)*100 AS simpson
-- 2.1.1 ユーザーの居住国分布 --
td query -w -d book_crossing_dataset -f csv -o user_dist_of_country.csv "
SELECT country, cnt, ROUND(cnt/total_cnt*10000)/100 AS rate
FROM
(
SELECT country, COUNT(*) as cnt, 1 AS one
FROM users
WHERE country != '' AND country != 'n/a'
GROUP BY country
ORDER BY cnt DESC
-- 4.1.1 ユーザーの評価回数平均 --
td query -w -d book_crossing_dataset "
SELECT ROUND(AVG(cnt)) AS avg
FROM
(
SELECT user_id, COUNT(book_rating) AS cnt
FROM ratings
GROUP BY user_id
) t1
"
-- 1.1.1 登録ユーザー数, アクティブユーザー数 --
td query -w -d book_crossing_dataset "
SELECT t1.cnt AS all_users, t2.cnt AS active_users, ROUND(t2.cnt/t1.cnt*100) AS active_rate
FROM
(
SELECT COUNT(distinct user_id) as cnt, 1 AS one
FROM users
) t1
JOIN
(
-- 3.1.1 出版年別分布 --
td query -w -d book_crossing_dataset "
SELECT year, cnt
FROM
(
SELECT 1975 AS year, COUNT(year_of_publication) AS cnt
FROM books
WHERE year_of_publication < 1970
UNION ALL
@futoase
futoase / pygments.css
Last active December 21, 2015 11:08 — forked from zmanji/pygments.css
.highlight {
background-color:#073642;
color:#93a1a1;
}
.highlight .c {
color:#586e75 !important;
font-style:italic !important;
}
=Navigating=
visit('/projects')
visit(post_comments_path(post))
=Clicking links and buttons=
click_link('id-of-link')
click_link('Link Text')
click_button('Save')
click('Link Text') # Click either a link or a button
click('Button Value')