Skip to content

Instantly share code, notes, and snippets.

public static void trainActual(Iterable<Pair<Text, VectorWritable>> inputIterable, String outBase,
Map<String, Integer> clusterNamesToIds) throws IOException {
Map<String, Centroid> actualClusters = Maps.newHashMap();
computeActualClusters(inputIterable, actualClusters);
OnlineLogisticRegression learningAlgorithm =
new OnlineLogisticRegression(NUM_CLASSES, NUM_FEATURES_ACTUAL, new L1());
for (Pair<Text, VectorWritable> pair : inputIterable) {
Vector actualCentroid = pair.getSecond().get();
@dfilimon
dfilimon / SyntaxTreeNodeVisitor.java
Created January 26, 2013 14:48
Visitor pattern with reflection
package syntax;
import syntax.tree.SyntaxTreeNode;
import syntax.tree.arithmetic.*;
import syntax.tree.comparison.EqualityNode;
import syntax.tree.comparison.IsVoidNode;
import syntax.tree.comparison.LessOrEqualThanNode;
import syntax.tree.comparison.LessThanNode;
import syntax.tree.conditional.CaseNode;
import syntax.tree.conditional.ConditionNode;
@dfilimon
dfilimon / numprimes.R
Created February 3, 2013 22:52
Number of primes less or equal to n vs n
library('gmp')
primes <- data.frame(1:1000000)
names(primes) <- 'nums'
primes$isprime <- isprime(primes$nums)
pn <- primes[primes$isprime == 2, 'nums']
e <- ecdf(pn)
plot(e, xlab='n', ylab='Number of primes less or equal to n')
@dfilimon
dfilimon / BallKMeans.java
Created March 29, 2013 11:07
ConcurrentModificationException from Multinomial
// Re-weight everything according to the minimum distance to a seed.
for (int currSeedIndex : seedSelector) {
WeightedVector curr = datapoints.get(currSeedIndex);
double newWeight = nextSeed.getWeight() * distanceMeasure.distance(nextSeed, curr);
if (newWeight < seedSelector.getWeight(currSeedIndex)) {
seedSelector.set(currSeedIndex, newWeight);
}
}
@dfilimon
dfilimon / TestSequentialAccessSparseVector.java
Created April 12, 2013 16:26
Vector iteration Mahout.
@Test
public void testVectorIteration() {
Vector vector = new SequentialAccessSparseVector(100);
vector.set(0, 1);
vector.set(2, 2);
vector.set(4, 3);
vector.set(6, 4);
Iterator<Vector.Element> vectorIterator = vector.iterateNonZero();
Vector.Element element = null;
int i = 0;
@dfilimon
dfilimon / VectorBinaryAssign.java
Created April 23, 2013 15:22
VectorBinaryAssign
package org.apache.mahout.math;
import com.google.common.base.Preconditions;
import org.apache.mahout.math.function.DoubleDoubleFunction;
import org.apache.mahout.math.set.OpenIntHashSet;
import java.util.Iterator;
public abstract class VectorBinaryAssign {
private static final VectorBinaryAssign operations[] = new VectorBinaryAssign[] {