This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.util.parsing.json._ | |
import org.json4s._ | |
import org.json4s.native.JsonMethods._ | |
import scala.collection.mutable.ArrayBuffer | |
def parseLine(line:String):ArrayBuffer[String]={ | |
val jsonstr = line.split("\t")(1) | |
val a=JSON.parseFull(jsonstr) | |
val result=ArrayBuffer[String]() | |
if(a!=None){ | |
val itemIdfArray = ArrayBuffer[Pair[Int,Int]]() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.mutable.ArrayBuffer | |
val name = """\[\d+,\d+,\d+\]""".r | |
val name2 = """\[(\d+),(\d+),\d+]""".r | |
def parseLine(line: String): ArrayBuffer[String] = { | |
val jsonstr = line.split("\t")(1) | |
val result = ArrayBuffer[String]() | |
val m = name.findAllIn(jsonstr) | |
val itemIdfArray = ArrayBuffer[Pair[Int, Int]]() | |
m.foreach(a => { val name2(item, idfnum) = a; val p: Pair[Int, Int] = Pair(item.toInt, idfnum.toInt); itemIdfArray += p; }) | |
if (itemIdfArray.length > 1) { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <Eigen/Dense> | |
#include <Eigen/Array> | |
using namespace Eigen; | |
using namespace std; | |
void sigmoid(MatrixXf& input, MatrixXf& output) | |
{ | |
output = (1+ (input.array().exp()).array()).array().inverse(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val tags=file.filter(line => label_name_map.contains(line.split("\t")(0).toLong)).map(line => (line.split("\t")(1).toLong ->label_name_map(line.split("\t")(0).toLong))).sortByKey(false) | |
tags.saveAsTextFile("hdfs://finger-test2:54310/home/TagHierarchy/tag_count_sorted") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
DBSCAN(D, eps, MinPts) | |
C = 0 | |
for each unvisited point P in dataset D | |
mark P as visited | |
NeighborPts = regionQuery(P, eps) | |
if sizeof(NeighborPts) < MinPts | |
mark P as NOISE | |
else | |
C = next cluster |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spark.util.Vector | |
val word_vec_size=150 | |
def parseVector(line: String): Vector = { | |
return new Vector(line.split(' ').slice(1,word_vec_size+1).map(_.toDouble)) | |
} | |
def closestPoint(p: Vector, centers: Array[Vector]): Int = { | |
var index = 0 | |
var bestIndex = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spark.util.Vector | |
import scala.math.sqrt | |
def cosineDist(a:Vector,b:Vector):Double = { | |
if(a.length==b.length){ | |
(a dot b)/(sqrt(a.squaredDist(Vector.zeros(a.length))*b.squaredDist(Vector.zeros(b.length)))) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#convert mahout data set format to scikit-learn | |
#mahout: -d N 3 C 2 N C 4 N C 8 N 2 C 19 N L | |
#scikit-learn | |
import sys | |
import argparse | |
import numpy | |
from sklearn.cross_validation import cross_val_score | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.ensemble import ExtraTreesClassifier | |
from sklearn.tree import DecisionTreeClassifier |
OlderNewer