Created
October 2, 2016 09:32
-
-
Save piyo7/192e401f06aa2f791da5960c3967ef6b to your computer and use it in GitHub Desktop.
pixivの機械学習モデルからアイドルのキャラクター性を計算してみたよ ζ*'ヮ')ζ ref: http://qiita.com/piyo7/items/d380028080086970a813
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ grep 矢澤にこ fasttext-model.vec | |
矢澤にこ 0.019783 0.19108 -0.022026 0.084974 -0.22116 0.40731 0.050229 0.08452 -0.32226 0.21247 -0.18102 -0.16565 -0.51189 -0.57782 0.25588 -0.68081 -0.78397 0.082184 -0.13755 0.27661 -0.18925 0.69547 -0.39642 0.42797 -0.35558 -0.12432 -0.25115 0.2353 -0.64518 0.07407 0.059794 0.031658 0.55932 0.29246 0.28443 0.21839 -0.63347 0.29398 -0.22737 -0.29317 0.25269 -0.31109 0.10771 -0.56458 -0.038022 0.013576 0.47242 -0.036706 0.62488 -0.65502 -0.45005 -0.20644 0.20615 0.62102 -0.28411 0.4585 -0.032914 -0.41461 0.10216 0.128 -0.27425 0.14086 0.46892 0.026439 -0.22837 -0.23224 0.11419 0.31121 0.10832 -0.96888 -0.30923 0.028069 0.072835 -0.14563 -0.40337 0.55399 -0.21664 0.31468 -0.098204 0.072447 0.29686 0.047919 -0.0831 -0.38392 0.094459 -0.22222 -0.43531 -0.55599 -0.04801 -0.18968 0.67759 0.69869 0.3708 0.80129 -0.79446 -0.16282 -0.26733 -0.41572 0.057653 0.25807 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ scala | |
Welcome to Scala 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_101). |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.Writer | |
import scala.io.Source | |
object Using { | |
def apply[A, B](resource: A)(process: A => B)(implicit closer: Closer[A]): B = | |
try { | |
process(resource) | |
} finally { | |
closer.close(resource) | |
} | |
} | |
case class Closer[-A](close: A => Unit) | |
object Closer { | |
implicit val sourceCloser = Closer[Source](_.close()) | |
implicit val writerCloser = Closer[Writer](_.close()) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.io.Source | |
case class Word(name: String, vector: Seq[Double]) | |
object VecCal { | |
val dict = Using(Source.fromFile("fasttext-model.vec", "UTF-8")) { source => | |
for (line <- source.getLines.toVector.tail) yield { | |
val data = line.split(" ") | |
(data.head, Word(data.head, data.tail.map(_.toDouble).toVector)) | |
} | |
}.toMap | |
val mus = toWords("小泉花陽", "星空凛", "西木野真姫", "高坂穂乃果", "園田海未", "南ことり", "絢瀬絵里", "東條希", "矢澤にこ") | |
val aqours = toWords("黒澤ルビィ", "国木田花丸", "津島善子", "高海千歌", "渡辺曜", "桜内梨子", "黒澤ダイヤ", "松浦果南", "小原鞠莉") | |
val prod765 = toWords("天海春香", "如月千早", "萩原雪歩", "高槻やよい", "秋月律子", "水瀬伊織", "三浦あずさ", "双海亜美", "双海真美", "菊地真", "星井美希", "我那覇響", "四条貴音", "音無小鳥") | |
val prod346 = toWords("島村卯月", "渋谷凛", "本田未央", "新田美波", "アナスタシア", "神崎蘭子", "三村かな子", "双葉杏", "緒方智絵里", "城ヶ崎莉嘉", "諸星きらり", "赤城みりあ", "前川みく", "多田李衣菜", "千川ちひろ") | |
def toWords(names: String*): Seq[Word] = { | |
names.flatMap { name => | |
val word = dict.get(name) | |
if (word.isEmpty) println("unkown: " + name) | |
word | |
} | |
} | |
def role(words: Seq[Word]): Seq[Word] = { | |
val average = words.map(_.vector).reduce(_ + _).map(_ / words.size) | |
words.map(word => Word(word.name, word.vector - average)) | |
} | |
def printSimilar(sources: Seq[Word], targets: Seq[Word], n: Int = 3) = { | |
val results = for (source <- sources) yield { | |
(source.name, targets.map(target => (target.name, source.vector.cos(target.vector))).sortBy(-_._2).take(n)) | |
} | |
val sizes = results.map(result => result._1.length +: result._2.map(_._1.length)).transpose.map(_.max) | |
for ((source, targetsDistance) <- results) { | |
println( | |
"| " + source + (" " * (sizes.head - source.length)) + " | " + | |
targetsDistance.zip(sizes.tail).map { case ((target, distance), size) => | |
target + (" " * (size - target.length)) + f"($distance%.4f)" | |
}.mkString("", " | ", " |")) | |
} | |
} | |
implicit class RichSeqDouble(val a: Seq[Double]) extends AnyVal { | |
def elementWise(op: (Double, Double) => Double)(b: Seq[Double]): Seq[Double] = { | |
assert(a.size == b.size) | |
a.zip(b).map(pair => op(pair._1, pair._2)) | |
} | |
def + = elementWise(_ + _) _ | |
def - = elementWise(_ - _) _ | |
def * = elementWise(_ * _) _ | |
def cos(b: Seq[Double]): Double = { | |
(a * b).sum / math.sqrt((a * a).sum * (b * b).sum) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment