Skip to content

Instantly share code, notes, and snippets.

@treper
Last active August 29, 2015 14:02
Show Gist options
  • Save treper/3dbd74f4d1a161191b87 to your computer and use it in GitHub Desktop.
Save treper/3dbd74f4d1a161191b87 to your computer and use it in GitHub Desktop.
tag neighbor count,use pageRank maybe more appropriate
import scala.util.parsing.json._
import org.json4s._
import org.json4s.native.JsonMethods._
import scala.collection.mutable.ArrayBuffer
import java.io._
def parseTagTransaction(line:String):ArrayBuffer[String]={
var tagList = line.split(" ").filter(m => m.length>1);
var result = ArrayBuffer[String]()
if(tagList.length>1)
{
for(i <-0 until tagList.length-1)
{
for(j <- 1 until tagList.length)
{
var one = tagList(i);
var two = tagList(j);
if(one.toLong<two.toLong)
{
result += one+" "+two;
}
else
{
result += one+" "+two;
}
}
}
}
result;
}
val file=sc.textFile("hdfs://finger-test2:54310/home/TagHierarchy/editor_tag_transaction")
//comatrix.saveAsTextFile("hdfs://finger-test2:54310/home/TagHierarchy/editor_tag_comatrix")
val neighborCnt=file.flatMap(parseTagTransaction).map(item=>(item,1)).reduceByKey((a,b)=>(a+b)).flatMap(m => {
var tag1=m._1.split(" ")(0).toInt;
var tag2 = m._1.split(" ")(1).toInt;
var result = ArrayBuffer[Pair[Int,Int]]();
result += Pair(tag1,1);
result += Pair(tag2,1);
result;
}
).reduceByKey((a,b) => (a+b)).collect().sortBy(_._2).reverse
val labeldict =sc.textFile("hdfs://finger-test2:54310/home/TagHierarchy/label.dict")
val pairs=labeldict.map(line =>(line.split("\t")(0).toLong,line.split("\t")(1)))
val label_name_map=pairs.collect().toMap
var out = neighborCnt.filter(m => label_name_map.contains(m._1.toLong)).map(m => m._1+"\t"+label_name_map(m._1.toLong)+"\t"+m._2)
var writer = new PrintWriter(new File("/home/mps/software/TagHierarchy/tag_out"))
out.foreach(m => writer.write(m+"\n"))
writer.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment