Skip to content

Instantly share code, notes, and snippets.

@piyo7
Created October 2, 2016 09:32
Show Gist options
  • Save piyo7/192e401f06aa2f791da5960c3967ef6b to your computer and use it in GitHub Desktop.
Save piyo7/192e401f06aa2f791da5960c3967ef6b to your computer and use it in GitHub Desktop.
pixivの機械学習モデルからアイドルのキャラクター性を計算してみたよ ζ*'ヮ')ζ ref: http://qiita.com/piyo7/items/d380028080086970a813
$ grep 矢澤にこ fasttext-model.vec
矢澤にこ 0.019783 0.19108 -0.022026 0.084974 -0.22116 0.40731 0.050229 0.08452 -0.32226 0.21247 -0.18102 -0.16565 -0.51189 -0.57782 0.25588 -0.68081 -0.78397 0.082184 -0.13755 0.27661 -0.18925 0.69547 -0.39642 0.42797 -0.35558 -0.12432 -0.25115 0.2353 -0.64518 0.07407 0.059794 0.031658 0.55932 0.29246 0.28443 0.21839 -0.63347 0.29398 -0.22737 -0.29317 0.25269 -0.31109 0.10771 -0.56458 -0.038022 0.013576 0.47242 -0.036706 0.62488 -0.65502 -0.45005 -0.20644 0.20615 0.62102 -0.28411 0.4585 -0.032914 -0.41461 0.10216 0.128 -0.27425 0.14086 0.46892 0.026439 -0.22837 -0.23224 0.11419 0.31121 0.10832 -0.96888 -0.30923 0.028069 0.072835 -0.14563 -0.40337 0.55399 -0.21664 0.31468 -0.098204 0.072447 0.29686 0.047919 -0.0831 -0.38392 0.094459 -0.22222 -0.43531 -0.55599 -0.04801 -0.18968 0.67759 0.69869 0.3708 0.80129 -0.79446 -0.16282 -0.26733 -0.41572 0.057653 0.25807
$ scala
Welcome to Scala 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_101).
import java.io.Writer
import scala.io.Source
object Using {
def apply[A, B](resource: A)(process: A => B)(implicit closer: Closer[A]): B =
try {
process(resource)
} finally {
closer.close(resource)
}
}
case class Closer[-A](close: A => Unit)
object Closer {
implicit val sourceCloser = Closer[Source](_.close())
implicit val writerCloser = Closer[Writer](_.close())
}
import scala.io.Source
case class Word(name: String, vector: Seq[Double])
object VecCal {
val dict = Using(Source.fromFile("fasttext-model.vec", "UTF-8")) { source =>
for (line <- source.getLines.toVector.tail) yield {
val data = line.split(" ")
(data.head, Word(data.head, data.tail.map(_.toDouble).toVector))
}
}.toMap
val mus = toWords("小泉花陽", "星空凛", "西木野真姫", "高坂穂乃果", "園田海未", "南ことり", "絢瀬絵里", "東條希", "矢澤にこ")
val aqours = toWords("黒澤ルビィ", "国木田花丸", "津島善子", "高海千歌", "渡辺曜", "桜内梨子", "黒澤ダイヤ", "松浦果南", "小原鞠莉")
val prod765 = toWords("天海春香", "如月千早", "萩原雪歩", "高槻やよい", "秋月律子", "水瀬伊織", "三浦あずさ", "双海亜美", "双海真美", "菊地真", "星井美希", "我那覇響", "四条貴音", "音無小鳥")
val prod346 = toWords("島村卯月", "渋谷凛", "本田未央", "新田美波", "アナスタシア", "神崎蘭子", "三村かな子", "双葉杏", "緒方智絵里", "城ヶ崎莉嘉", "諸星きらり", "赤城みりあ", "前川みく", "多田李衣菜", "千川ちひろ")
def toWords(names: String*): Seq[Word] = {
names.flatMap { name =>
val word = dict.get(name)
if (word.isEmpty) println("unkown: " + name)
word
}
}
def role(words: Seq[Word]): Seq[Word] = {
val average = words.map(_.vector).reduce(_ + _).map(_ / words.size)
words.map(word => Word(word.name, word.vector - average))
}
def printSimilar(sources: Seq[Word], targets: Seq[Word], n: Int = 3) = {
val results = for (source <- sources) yield {
(source.name, targets.map(target => (target.name, source.vector.cos(target.vector))).sortBy(-_._2).take(n))
}
val sizes = results.map(result => result._1.length +: result._2.map(_._1.length)).transpose.map(_.max)
for ((source, targetsDistance) <- results) {
println(
"| " + source + (" " * (sizes.head - source.length)) + " | " +
targetsDistance.zip(sizes.tail).map { case ((target, distance), size) =>
target + (" " * (size - target.length)) + f"($distance%.4f)"
}.mkString("", " | ", " |"))
}
}
implicit class RichSeqDouble(val a: Seq[Double]) extends AnyVal {
def elementWise(op: (Double, Double) => Double)(b: Seq[Double]): Seq[Double] = {
assert(a.size == b.size)
a.zip(b).map(pair => op(pair._1, pair._2))
}
def + = elementWise(_ + _) _
def - = elementWise(_ - _) _
def * = elementWise(_ * _) _
def cos(b: Seq[Double]): Double = {
(a * b).sum / math.sqrt((a * a).sum * (b * b).sum)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment