Skip to content

Instantly share code, notes, and snippets.

@treper
Last active December 20, 2015 12:19
Show Gist options
  • Save treper/6130443 to your computer and use it in GitHub Desktop.
Save treper/6130443 to your computer and use it in GitHub Desktop.
import scala.collection.mutable.ArrayBuffer
val name = """\[\d+,\d+,\d+\]""".r
val name2 = """\[(\d+),(\d+),\d+]""".r
def parseLine(line: String): ArrayBuffer[String] = {
val jsonstr = line.split("\t")(1)
val result = ArrayBuffer[String]()
val m = name.findAllIn(jsonstr)
val itemIdfArray = ArrayBuffer[Pair[Int, Int]]()
m.foreach(a => { val name2(item, idfnum) = a; val p: Pair[Int, Int] = Pair(item.toInt, idfnum.toInt); itemIdfArray += p; })
if (itemIdfArray.length > 1) {
for (i <- 0 until itemIdfArray.length - 1) {
for (j <- 1 until itemIdfArray.length) {
val one = itemIdfArray(i)
val two = itemIdfArray(j)
if (one._2 < two._2) {
result += one._1.toString + "-" + two._1.toString
} else if (one._2 > two._2) {
result += two._1.toString + "-" + one._1.toString
}
}
}
}
result
}
val file=sc.textFile("/home/mps/md5_labels.txt")
val comatrix=file.flatMap(parseLine).map(item=>(item,1)).reduceByKey((a,b)=>(a+b))
comatrix.saveAsTextFile("/home/mps/comatrix")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment