Skip to content

Instantly share code, notes, and snippets.

@tattyamm
Created May 27, 2012 08:34
Show Gist options
  • Save tattyamm/2802856 to your computer and use it in GitHub Desktop.
Save tattyamm/2802856 to your computer and use it in GitHub Desktop.
R_Google_Analyrics_Sample.R
#こちらの記事で使ったコードです
# Google AnalyticsのデータをRで解析する - えんたつの記録
# http://blog.livedoor.jp/tattyamm/archives/4018354.html
#ライブラリ読み込み
source("RGoogleAnalytics.R")
source("QueryBuilder.R")
library(lattice) #levelplot関数に必要
# 1. Create a new Google Analytics API object
ga <- RGoogleAnalytics()
# 2. Authorize the object with your Google Analytics Account Credentials
ga$SetCredentials("username", "password") #適宜変更してください
# 3. Get the list of different profiles, to help build the query
profiles <- ga$GetProfileData()
# 4. Build the Data Export API query
query <- QueryBuilder()
print(profiles[1]);
query$Init(start.date = "2011-01-01",
end.date = "2011-01-31",
dimensions = c("ga:date","ga:hour"),
metrics = "ga:visitors",
sort = "ga:date",
table.id = "ga:xxxxxxxxxx")
#5. Make a request to get the data from the API
ga.data <- ga$GetReportData(query)
#6. Look at the returned data
head(ga.data$data)
#簡単なグラフ出力
png('plot_visitors.png')
plot(ga.data$data[,3],type="l")
dev.off()
#ヒートマップをグラフ
hour = ga.data$data[,2] #時刻
day = ga.data$data[,1] #日付
visitors = ga.data$data[,3] #訪問者数
png('levelplot_visitors.png')
levelplot(
tapply(visitors,list(hour,day),sum) ,
col.regions=colorRampPalette(c("white","red")),
xlab="Hour" , ylab="Date" ,main="Visitors"
)
dev.off()
#曜日単位で集計して出力
# 曜日を定義
wdays <- c("1_Sun.","2_Mon.","3_Tue.","4_Wed.","5_Thu.","6_Fri.","7_Sat.") #表示順を揃えるためindexをつける
# 時刻フォーマットをRで扱える(as.Dateで読み込める形式)に変換する関数
# 参考 http://aoki2.si.gunma-u.ac.jp/R/joseki/redefine.as.Date.html
changeDateFormat <- function(str, format="%04s-%02s-%02s"){
y = substr(str,1,4)
m = substr(str,5,6)
d = substr(str,7,8)
sprintf(format, y, m, d)
}
#グラフ
png('levelplot_weekdayVisitors.png')
levelplot(
tapply(visitors,list(hour,wdays[as.numeric(strftime(changeDateFormat(day), "%w"))+1]),sum) ,
col.regions=colorRampPalette(c("white","red")),
xlab="Hour" , ylab="Weekday" ,main="Visitors"
)
dev.off()
#==========================================================================================
#googleから来た人のみをfilterを使って取得
query$Init(start.date = "2011-01-01",
end.date = "2011-01-31",
dimensions = c("ga:date","ga:hour"),
metrics = "ga:visitors",
filters="ga:source=@google", #ga:sourceにgoogleを含む、という意味
sort = "ga:date",
table.id = "ga:xxxxxxxxxx")
ga.data <- ga$GetReportData(query)
head(ga.data$data)
#googleから来た人のうち特定の検索キーワードを含むものを列挙
query$Init(start.date = "2011-01-01",
end.date = "2011-01-31",
dimensions = c("ga:date","ga:keyword"),
metrics = "ga:visitors",
filters="ga:source=@google;ga:keyword=@ios", #ga:sourceにgoogleを含み、かつga:keywordにisoを含む
sort = "ga:date",
max.results=100,
table.id = "ga:xxxxxxxxxx")
ga.data <- ga$GetReportData(query)
head(ga.data$data)
#==========================================================================================
#全訪問者のうち、特定のキーワードで来た人の割合を集計(かなり強引)
#まず、普通に訪問者を取得する
query$Init(start.date = "2011-01-01",
end.date = "2011-01-31",
dimensions = c("ga:date","ga:hour"),
metrics = "ga:visitors",
sort = "ga:date",
table.id = "ga:xxxxxxxxxx")
ga.data <- ga$GetReportData(query)
#必要パラメーターを記録
hour = ga.data$data[,2] #時刻
day = ga.data$data[,1] #日付
visitors = ga.data$data[,3] #訪問者数
#次に、特定キーワードで検索して来た人を取得する。filter機能を使います。
query$Init(start.date = "2011-01-01",
end.date = "2011-01-31",
dimensions = c("ga:date","ga:hour"),
metrics = "ga:visitors",
filters="ga:keyword=@ios",
max.results=10000,
table.id = "ga:xxxxxxxxxx")
ga.data <- ga$GetReportData(query)
#必要パラメーターを記録
keywordVisitors = ga.data$data[,3] #訪問者数
#訪問者における該当キーワード訪問者の比率を計算
keywordVisitorsPercent = keywordVisitors/visitors*100
#曜日単位で集計してグラフ表示
wdays <- c("1_Sun.","2_Mon.","3_Tue.","4_Wed.","5_Thu.","6_Fri.","7_Sat.")
changeDateFormat <- function(str, format="%04s-%02s-%02s")
{
y = substr(str,1,4)
m = substr(str,5,6)
d = substr(str,7,8)
sprintf(format, y, m, d)
}
png('levelplot_weekdayVisitors_byKeyword.png')
levelplot( tapply(keywordVisitorsPercent,list(hour,wdays[as.numeric(strftime(changeDateFormat(day), "%w"))+1]),sum) ,
col.regions=colorRampPalette(c("white","red")),
xlab="Hour" , ylab="Weekday" ,main="keywordVisitors/visitors"
)
dev.off()
#==========================================================================================
#アクセス元がPCとモバイルとで滞在時間がどのように違うのか
query$Init(start.date = "2011-01-01",
end.date = "2011-03-31",
dimensions = c("ga:visitLength","ga:isMobile"),
metrics = "ga:visitors",
table.id = "ga:xxxxxxxxxx")
ga.data <- ga$GetReportData(query)
head(ga.data$data)
#boxplotで概要を見る
png('boxplot_PC_vs_Mobile.png')
boxplot(
as.numeric(ga.data$data[ ga.data$data[,2]=="Yes" , ][,1]) ,
as.numeric(ga.data$data[ ga.data$data[,2]=="No" , ][,1]) ,
xlab="Mobile , PC",ylab="visitLength",main="visitLength (Mobile vs PC)"
)
dev.off()
#densityで全体を見る
density_isMobile_No = density(as.numeric(ga.data$data[ ga.data$data[,2]=="No" , ][,1]))
density_isMobile_Yes = density(as.numeric(ga.data$data[ ga.data$data[,2]=="Yes" , ][,1]))
#最大値を見つけておく
xlim = c(0,max(density_isMobile_No$x,density_isMobile_Yes$x))
ylim = c(0,max(density_isMobile_No$yo,density_isMobile_Yes$y))
#グラフ描画
png('density_Pc_vs_Mobile.png')
plot(
density_isMobile_No,
xlim=xlim, ylim=ylim,
xlab="ga:visitLength (sec)",ylab="density",main="",col=2
)
par(new=T)
plot(
density_isMobile_Yes,
xlim=xlim, ylim=ylim,
xlab="ga:visitLength (sec)",ylab="density",main="",col=3
)
legend( xlim[2]/4*3,ylim[2]/4*3 , c("PC","Mobile") , col=c(2,3) , lty=c(1,1) )
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment