Last active
December 24, 2015 15:05
-
-
Save sasanquaneuf/ce4ae12fab13422417b9 to your computer and use it in GitHub Desktop.
Rでコード進行を丁寧丁寧丁寧に可視化してみる(Level 1) ref: http://qiita.com/sasanquaneuf/items/aed57423b4edce26f783
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(arulesSequences) | |
# 上述の試行錯誤の名残…arulesSequencesでarulesも入る | |
# scraping.Rの続きで | |
t <- song_data_all[,c("before","after")] | |
result <- apriori(as(t,"transactions"), parameter = list(support = 0.008, confidence = 0.05)) | |
# さだまさし→0.01/0.05 | |
# 小田和正→0.005/0.05 | |
# いきものがかり→0.005/0.05 | |
# 図を適当にきれいにするための適当な条件 | |
e <- as(result,"data.frame") | |
e$LHS <- str_replace_all(e$rules,"=>.+","") | |
e$RHS <- str_replace_all(e$rules,".+=>","") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
library(stringr) | |
# さだまさし | |
dataids <- c("27407","980","2699","7814","13411","25116","7815","7816","13416","27404","25115","4531","7817", | |
"7810","7812","13421","13615","13426","13413","13422","13602","25113","25114","27402","25577", | |
"2956","2955","7820","7819","7813","7811","7809","7818","13429","13428","13601","13430","13425", | |
"13423","13424","13420","13418","13419","13603","13412","13415","13417","13414","13607","13606", | |
"13600","13427","13613","13616","13612","13609","13610","13604","13608","13605","13614","13611", | |
"27406","23108","27405","21513","21512","25576","26542","27403","27738") | |
# 小田和正 | |
dataids <- c("3333","2338","2281","2532","9724","9988","2531","9723","9719","9503","9505","9718","9992","9990", | |
"9717","10002","9714","10174","21171","20450","21174","21177","21175","2600","9981","9716","9986", | |
"10172","10000","9999","9506","9509","10001","9998","10003","9715","9997","9504","9980","9984", | |
"9722","9993","9725","9991","9987","9507","9995","9996","9721","9508","9510","9720","9994","9982", | |
"10173","9983","9985","9989","22640","21170","17228","21176","20451","21173","21172","22071","25201") | |
# いきものがかり | |
dataids <- c("953","1928","3980","5177","1367","956","950","1378","1406","1400","24172","1375","3729","3976","5422", | |
"5515","5180","13809","2085","5186","1927","5188","18414","27742","5419","5528","5426","6272","24298", | |
"5522","5529","6265","7808","5517","5190","5418","5420","6264","6268","6271","13929","13930","14819", | |
"16566","22620","2678","3979","5524","5518","5178","5184","5185","5521","5416","5421","5525","6269", | |
"8565","1925","1926","2087","2086","3977","3978","5526","5523","5520","5175","5176","5179","5181","5182", | |
"5183","5187","5189","5516","5417","5423","5424","5425","5427","5519","5527","5530","5531","5532","6266", | |
"6267","6270","8485","8566","13927","13928","19814","16567","19330","24134","21524","25365","23789", | |
"21709","21961","21525","21523","24135","24133") | |
# サカナクション | |
dataids <- c("27032","4676","5075","1035","2117","5070","5073","5074","2658","4689","1187","5072","5076","6759", | |
"27437","5077","19186","9975","18078","23784","4677","5069","5068","5071","20248","27438") | |
song_data_all <- data.frame(id=1,seqid=1,before="A",after="A") | |
song_data_all <- song_data_all[F,] | |
song_info_all <- data.frame(id=1,title="A",author="A") | |
song_info_all <- song_info_all[F,] | |
for(dataid in dataids){ | |
q <- html(paste0("http://www.ufret.jp/song.php?data=", dataid)) | |
# 曲名とタイトル | |
song_info <- html_nodes(q, xpath = '//title') %>% | |
html_text() %>% | |
iconv("utf-8", "cp932") %>% | |
str_replace_all(' ギターコード譜.+',"") %>% | |
str_split(" / ") | |
song_info_all <- merge(song_info_all, | |
data.frame(id = dataid, title=song_info[[1]][[1]], author=song_info[[1]][[2]]), | |
all.x=T, all.y=T) | |
song_code <- html_nodes(q, xpath = '//rt') %>% | |
html_text() %>% | |
iconv("utf-8", "cp932") | |
#song_code <- data.frame(before=c("",song_code), after=c(song_code,"")) | |
song_data <- data.frame(id=dataid, seqid=1:(length(song_code)+1), before=c("",song_code), after=c(song_code,"")) | |
song_data_all <- merge(song_data_all, song_data, all.x=T, all.y=T) | |
Sys.sleep(1) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nodes <- e[!is.na(str_match(e$LHS, "before")),] | |
nodes$LHS <- nodes$LHS %>% str_replace_all("\\{before=","") %>% str_replace_all("\\}","") | |
nodes$RHS <- nodes$RHS %>% str_replace_all("\\{after=","") %>% str_replace_all("\\}","") | |
nodes$mermaid <- paste0(nodes$LHS, "-->|", (floor(nodes$confidence*1000)/10), "%|", nodes$RHS) | |
nodes$mermaid <- str_replace_all(nodes$mermaid, "♭", "b") | |
mermaid(paste("graph TD",Reduce(function(...){paste(...,sep="\n")},nodes$mermaid),sep="\n")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment