gghatano · March 25, 2020 02:37
diff --git a/file0.r b/file0.r
 # install.packages("MiscPsycho_1.6.tar", reposz = NULL, type = "source")

 library("MiscPsycho")

 ## stringMatchを使えばいいらしいです。
 stringMatch("hoge", "huge", normalize = "NO")
diff --git a/file1.txt b/file1.txt
 ## [1] 1
diff --git a/file10.r b/file10.r
 ## 対戦相手
 dat %>% filter(opponent == "日馬富") %>% head %>%
  xtable %>% print(type="html")
diff --git a/file10.txt b/file10.txt
 [1] 42  1
diff --git a/file11.r b/file11.r
 dat_opponent %>% dim
diff --git a/file12.txt b/file12.txt
 [1] 46  1
diff --git a/file13.r b/file13.r
 ## 紐つけたい2つの名前列(nameとopponent)をクロス結合して
 ## 名前の全組み合わせで編集距離を計算
 dat_stringmatch =
  dat_name %>% 
  merge(dat_opponent) 
 ## 本当はdplyr::mutate(dist = stringMatch(name, opponent))としたかった
 ## 動かないので、無理矢理mapplyする
 dist_col = mapply(FUN = function(x,y){return (stringMatch(x,y, normalize = "NO"))}, dat_stringmatch$name, dat_stringmatch$opponent)
 dat_stringmatch$dist = dist_col

 ## 距離の最小値を求める
 dat_mindist = 
  dat_stringmatch %>% 
  group_by(name) %>% 
  summarise(dist = min(dist))

 ## 編集距離が最小のものを抜き出す
 ## 最小値と名前で結合すればいいはず
 dat_stringmatch %>% 
  merge(dat_mindist, by = c("name", "dist")) %>% 
  select(name, opponent) %>%
  xtable %>% print(type="html")
diff --git a/file14.r b/file14.r
 dat_opponent_name = 
  dat_stringmatch %>% 
  merge(dat_mindist, by = c("name", "dist")) %>% 
  mutate(opponent_name=name) %>% 
  select(opponent_name, opponent)

 dat_result = 
  dat %>% 
  merge(dat_opponent_name, by = "opponent") %>%
  select(tournament, class, name, room_old, opponent_name, result)
diff --git a/file14.txt b/file14.txt
 [1] 46  1
diff --git a/file15.r b/file15.r
 library(BradleyTerry2)
diff --git a/file16.r b/file16.r
 dat_opponent_name = 
  dat_stringmatch %>% 
  merge(dat_mindist, by = c("name", "dist")) %>% 
  mutate(opponent_name=name) %>% 
  select(opponent_name, opponent)

 dat_result = 
  dat %>% 
  merge(dat_opponent_name, by = "opponent") %>%
  select(tournament, class, name, room_old, opponent_name, result)
diff --git a/file16.txt b/file16.txt
 ## Loading required package: lme4
 ## Loading required package: Matrix
diff --git a/file17.r b/file17.r
 dat_winlose = 
  dat_result %>% 
  group_by(name, opponent_name) %>% 
  summarise(wins = sum(result == "W"), loses = sum(result=="L")) %>% 
  mutate(winner = ifelse(wins == 1, name, opponent_name))  %>%
  mutate(loser = ifelse(wins == 0, name, opponent_name))  %>%
  group_by(winner, loser, add=FALSE) %>%
  summarise(wins = sum(wins), loses = 0)

 sumoBT = BTm(outcome = cbind(wins, loses), 
             player1 = winner, player2 = loser,
             data = dat_winlose)

 ## BT
 sumoBTdf = BTabilities(sumoBT) %>% as.data.frame %>% 
  mutate(name = row.names(.)) %>%
  arrange(desc(ability))

 ## 勝数を集計
 dat_winlose_result =
  dat_winlose %>% group_by(winner) %>% 
  summarise(wins = sum(wins)) %>% 
  mutate(name = winner) %>% select(-winner)

 ## BTモデルで推定された強さと勝数を較べてみます
 sumoBTdf %>% 
  merge(dat_winlose_result, by = "name") %>% 
  arrange(desc(ability)) %>%
  xtable %>% print(type="html")
diff --git a/file18.r b/file18.r
 dat_mindist = 
  dat_stringmatch %>% 
  group_by(name) %>% 
  summarise(dist = min(dist))

 ## 編集距離が距離が最小のものを抜き出す
 ## 最小値と名前で結合すればいいはず
 dat_stringmatch %>% 
  merge(dat_mindist, by = c("name", "dist")) %>% 
  select(name, opponent)
diff --git a/file18.txt b/file18.txt
 ## Loading required package: lme4
 ## Loading required package: Matrix
diff --git a/file19.r b/file19.r
 dat_winlose = 
  dat_result %>% 
  group_by(name, opponent_name) %>% 
  summarise(wins = sum(result == "W"), loses = sum(result=="L")) %>% 
  mutate(winner = ifelse(wins == 1, name, opponent_name))  %>%
  mutate(loser = ifelse(wins == 0, name, opponent_name))  %>%
  group_by(winner, loser, add=FALSE) %>%
  summarise(wins = sum(wins), loses = 0)

 sumoBT = BTm(outcome = cbind(wins, loses), 
             player1 = winner, player2 = loser,
             data = dat_winlose)

 ## BT
 sumoBTdf = BTabilities(sumoBT) %>% as.data.frame %>% 
  mutate(name = row.names(.)) %>%
  arrange(desc(ability))

 ## 勝数を集計
 dat_winlose_result =
  dat_winlose %>% group_by(winner) %>% 
  summarise(wins = sum(wins)) %>% 
  mutate(name = winner) %>% select(-winner)

 ## BTモデルで推定された強さと勝数を較べてみます
 sumoBTdf %>% 
  merge(dat_winlose_result, by = "name") %>% 
  arrange(desc(ability)) %>%
  xtable %>% print(type="html")
diff --git a/file2.r b/file2.r
 ## normalize = "NO"としないと、文字数で規格化された距離が出力されます。
 stringMatch("hoge", "huge")
diff --git a/file20.r b/file20.r
 dat_mindist = 
  dat_stringmatch %>% 
  group_by(name) %>% 
  summarise(dist = min(dist))

 ## 編集距離が距離が最小のものを抜き出す
 ## 最小値と名前で結合すればいいはず
 dat_stringmatch %>% 
  merge(dat_mindist, by = c("name", "dist")) %>% 
  select(name, opponent)
diff --git a/file3.txt b/file3.txt
 ## [1] 0.75
diff --git a/file4.r b/file4.r
 utils::adist("hoge", "huge")
diff --git a/file5.txt b/file5.txt
     [,1]
 [1,]    1
diff --git a/file6.r b/file6.r
 library("readr")
 library("dplyr")
 library("xtable")
 dat = read_csv("winLoseDataTable.dat.no_juryou", col_names = FALSE) 

 ## 名前をつける
 names(dat) = c("tournament", "class", "name", "room_old", "opponent", "result")

 ## 実験用に、平成24年初場所のデータを使う
 dat = 
  dat %>% filter(tournament == "H24-1")

 ## 内容確認
 dat %>% head %>% 
  xtable %>% print(type="html")
diff --git a/file7.r b/file7.r
 ## 名前
 dat %>% filter(name == "日馬富士") %>% head %>%
  xtable %>% print(type="html")
diff --git a/file7.txt b/file7.txt
     [,1]
 [1,]    1
diff --git a/file8.r b/file8.r
 ## 対戦相手
 dat %>% filter(opponent == "日馬富") %>% head %>%
  xtable %>% print(type="html")
diff --git a/file9.r b/file9.r
 ## name列の要素
 dat_name = dat %>% select(name) %>% unique
 ## opponent列の要素
 dat_opponent = dat %>% select(opponent) %>% unique

 ## それぞれの長さ
 dat_name %>% dim
	# install.packages("MiscPsycho_1.6.tar", reposz = NULL, type = "source")

	library("MiscPsycho")

	## stringMatchを使えばいいらしいです。
	stringMatch("hoge", "huge", normalize = "NO")
	## 対戦相手
	dat %>% filter(opponent == "日馬富") %>% head %>%
	xtable %>% print(type="html")
	## 紐つけたい2つの名前列(nameとopponent)をクロス結合して
	## 名前の全組み合わせで編集距離を計算
	dat_stringmatch =
	dat_name %>%
	merge(dat_opponent)
	## 本当はdplyr::mutate(dist = stringMatch(name, opponent))としたかった
	## 動かないので、無理矢理mapplyする
	dist_col = mapply(FUN = function(x,y){return (stringMatch(x,y, normalize = "NO"))}, dat_stringmatch$name, dat_stringmatch$opponent)
	dat_stringmatch$dist = dist_col

	## 距離の最小値を求める
	dat_mindist =
	dat_stringmatch %>%
	group_by(name) %>%
	summarise(dist = min(dist))

	## 編集距離が最小のものを抜き出す
	## 最小値と名前で結合すればいいはず
	dat_stringmatch %>%
	merge(dat_mindist, by = c("name", "dist")) %>%
	select(name, opponent) %>%
	xtable %>% print(type="html")
	dat_opponent_name =
	dat_stringmatch %>%
	merge(dat_mindist, by = c("name", "dist")) %>%
	mutate(opponent_name=name) %>%
	select(opponent_name, opponent)

	dat_result =
	dat %>%
	merge(dat_opponent_name, by = "opponent") %>%
	select(tournament, class, name, room_old, opponent_name, result)
	## Loading required package: lme4
	## Loading required package: Matrix
	dat_winlose =
	dat_result %>%
	group_by(name, opponent_name) %>%
	summarise(wins = sum(result == "W"), loses = sum(result=="L")) %>%
	mutate(winner = ifelse(wins == 1, name, opponent_name)) %>%
	mutate(loser = ifelse(wins == 0, name, opponent_name)) %>%
	group_by(winner, loser, add=FALSE) %>%
	summarise(wins = sum(wins), loses = 0)

	sumoBT = BTm(outcome = cbind(wins, loses),
	player1 = winner, player2 = loser,
	data = dat_winlose)

	## BT
	sumoBTdf = BTabilities(sumoBT) %>% as.data.frame %>%
	mutate(name = row.names(.)) %>%
	arrange(desc(ability))

	## 勝数を集計
	dat_winlose_result =
	dat_winlose %>% group_by(winner) %>%
	summarise(wins = sum(wins)) %>%
	mutate(name = winner) %>% select(-winner)

	## BTモデルで推定された強さと勝数を較べてみます
	sumoBTdf %>%
	merge(dat_winlose_result, by = "name") %>%
	arrange(desc(ability)) %>%
	xtable %>% print(type="html")
	## normalize = "NO"としないと、文字数で規格化された距離が出力されます。
	stringMatch("hoge", "huge")
	library("readr")
	library("dplyr")
	library("xtable")
	dat = read_csv("winLoseDataTable.dat.no_juryou", col_names = FALSE)

	## 名前をつける
	names(dat) = c("tournament", "class", "name", "room_old", "opponent", "result")

	## 実験用に、平成24年初場所のデータを使う
	dat =
	dat %>% filter(tournament == "H24-1")

	## 内容確認
	dat %>% head %>%
	xtable %>% print(type="html")
	## 名前
	dat %>% filter(name == "日馬富士") %>% head %>%
	xtable %>% print(type="html")
	## name列の要素
	dat_name = dat %>% select(name) %>% unique
	## opponent列の要素
	dat_opponent = dat %>% select(opponent) %>% unique

	## それぞれの長さ
	dat_name %>% dim