Last active
August 29, 2015 14:02
-
-
Save treper/3dbd74f4d1a161191b87 to your computer and use it in GitHub Desktop.
tag neighbor count,use pageRank maybe more appropriate
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.util.parsing.json._ | |
import org.json4s._ | |
import org.json4s.native.JsonMethods._ | |
import scala.collection.mutable.ArrayBuffer | |
import java.io._ | |
def parseTagTransaction(line:String):ArrayBuffer[String]={ | |
var tagList = line.split(" ").filter(m => m.length>1); | |
var result = ArrayBuffer[String]() | |
if(tagList.length>1) | |
{ | |
for(i <-0 until tagList.length-1) | |
{ | |
for(j <- 1 until tagList.length) | |
{ | |
var one = tagList(i); | |
var two = tagList(j); | |
if(one.toLong<two.toLong) | |
{ | |
result += one+" "+two; | |
} | |
else | |
{ | |
result += one+" "+two; | |
} | |
} | |
} | |
} | |
result; | |
} | |
val file=sc.textFile("hdfs://finger-test2:54310/home/TagHierarchy/editor_tag_transaction") | |
//comatrix.saveAsTextFile("hdfs://finger-test2:54310/home/TagHierarchy/editor_tag_comatrix") | |
val neighborCnt=file.flatMap(parseTagTransaction).map(item=>(item,1)).reduceByKey((a,b)=>(a+b)).flatMap(m => { | |
var tag1=m._1.split(" ")(0).toInt; | |
var tag2 = m._1.split(" ")(1).toInt; | |
var result = ArrayBuffer[Pair[Int,Int]](); | |
result += Pair(tag1,1); | |
result += Pair(tag2,1); | |
result; | |
} | |
).reduceByKey((a,b) => (a+b)).collect().sortBy(_._2).reverse | |
val labeldict =sc.textFile("hdfs://finger-test2:54310/home/TagHierarchy/label.dict") | |
val pairs=labeldict.map(line =>(line.split("\t")(0).toLong,line.split("\t")(1))) | |
val label_name_map=pairs.collect().toMap | |
var out = neighborCnt.filter(m => label_name_map.contains(m._1.toLong)).map(m => m._1+"\t"+label_name_map(m._1.toLong)+"\t"+m._2) | |
var writer = new PrintWriter(new File("/home/mps/software/TagHierarchy/tag_out")) | |
out.foreach(m => writer.write(m+"\n")) | |
writer.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment