Last active
June 23, 2019 21:44
-
-
Save dustalov/2c1340b972fd28f8cad30e45adf24178 to your computer and use it in GitHub Desktop.
Watset (Java) Performance Measurement
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env groovy | |
import org.apache.commons.math3.stat.descriptive.moment.Mean | |
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation | |
import org.jgrapht.graph.SimpleWeightedGraph | |
import org.jgrapht.util.SupplierUtil | |
import org.nlpub.watset.graph.ChineseWhispers | |
import org.nlpub.watset.graph.NodeWeighting | |
import org.nlpub.watset.graph.MaxMax | |
import org.nlpub.watset.eval.Measurer | |
import org.nlpub.watset.graph.Watset | |
import java.nio.file.Paths | |
import java.util.concurrent.ForkJoinPool | |
import java.util.logging.Level | |
import java.util.logging.LogManager | |
import java.util.logging.Logger | |
/* | |
* Copyright 2018 Dmitry Ustalov | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
*/ | |
Locale.setDefault(Locale.ROOT) | |
def options = new CliBuilder().with { | |
usage = 'collocation.groovy [-s] [-p]' | |
s 'silent' | |
p 'parallel' | |
parse(args) ?: System.exit(1) | |
} | |
logger = Logger.getLogger('Collocation') | |
if (options.s) { | |
LogManager.getLogManager().getLogger('').getHandlers().each { it.setLevel(Level.WARNING) } | |
} | |
if (options.p) { | |
logger.info(String.format('Parallelism level is %d.', ForkJoinPool.commonPool().getParallelism())) | |
} else { | |
// This is a very important bit that effectively disables stream parallelism. | |
System.properties['java.util.concurrent.ForkJoinPool.common.parallelism'] = '1' | |
assert ForkJoinPool.commonPool().getParallelism() == 1 | |
} | |
if (!options.arguments()) { | |
logger.warning('No collocation file provided.') | |
System.exit(2) | |
} | |
builder = SimpleWeightedGraph.createBuilder(SupplierUtil.createDefaultWeightedEdgeSupplier()) | |
Paths.get(options.arguments()[0]).withReader { | |
it.each { | |
tokens = it.split('\t', 4) | |
(first, second) = [tokens[0] as int, tokens[1] as int] | |
if (first > second) (first, second) = [second, first] | |
builder.addVertices(first, second) | |
builder.addEdge(first, second, tokens[3] as float) | |
} | |
} | |
graph = builder.build() | |
degree = graph.vertexSet().stream().mapToInt({ graph.degreeOf(it) }).max().orElse(0) | |
algorithms = new LinkedHashMap() | |
algorithms.put('cw', ChineseWhispers.provider( | |
NodeWeighting.top(), | |
ChineseWhispers.ITERATIONS, | |
new Random(1337))) | |
algorithms.put('maxmax', MaxMax.provider()) | |
algorithms.put('watset-top-top', Watset.provider( | |
ChineseWhispers.provider( | |
NodeWeighting.top(), | |
ChineseWhispers.ITERATIONS, | |
new Random(1337)), | |
ChineseWhispers.provider( | |
NodeWeighting.top(), | |
ChineseWhispers.ITERATIONS, | |
new Random(1337)))) | |
System.out.printf('%s\t%s\t%s\t%s\t%s\t%s\n', 'algorithm', 'nodes', 'edges', 'degree', 'clusters', 'mean', 'stddev') | |
algorithms.each { algorithmEntry -> | |
algorithmName = algorithmEntry.key | |
algorithm = algorithmEntry.value | |
measurer = new Measurer(algorithm, graph) | |
measurer.run() | |
double[] durations = measurer.getDurations() | |
double[] clusters = Arrays.stream(measurer.getClusters()).asDoubleStream().toArray() | |
System.out.printf('%s\t%d\t%d\t%d\t%.2f\t%.2f\t%.2f\n', | |
algorithmName, | |
graph.vertexSet().size(), | |
graph.edgeSet().size(), | |
degree, | |
new Mean().evaluate(clusters), | |
new Mean().evaluate(durations), | |
new StandardDeviation().evaluate(durations) | |
) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -e | |
export JAVA_OPTS='-Xms64G -Xmx64G' | |
export CLASSPATH="$HOME/watset-java/target/watset.jar" | |
LEIPZIG="$HOME/leipzig" | |
for corpus in {eng_news_2016,deu_news_2015,rus_news_2010}_{10K,30K,100K,300K,1M}; do | |
groovy collocation.groovy "$LEIPZIG/$corpus/$corpus-co_s.txt" | tee collocation-$corpus.txt | |
groovy collocation.groovy -p "$LEIPZIG/$corpus/$corpus-co_s.txt" | tee collocation-parallel-$corpus.txt | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env groovy | |
import org.apache.commons.math3.stat.descriptive.moment.Mean | |
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation | |
import org.jgrapht.Graph | |
import org.jgrapht.generate.CompleteGraphGenerator | |
import org.jgrapht.generate.GnmRandomGraphGenerator | |
import org.jgrapht.generate.ScaleFreeGraphGenerator | |
import org.jgrapht.generate.StarGraphGenerator | |
import org.jgrapht.graph.SimpleWeightedGraph | |
import org.jgrapht.util.SupplierUtil | |
import org.nlpub.watset.graph.ChineseWhispers | |
import org.nlpub.watset.graph.NodeWeighting | |
import org.nlpub.watset.graph.MaxMax | |
import org.nlpub.watset.eval.Measurer | |
import org.nlpub.watset.graph.Watset | |
import java.util.concurrent.ForkJoinPool | |
import java.util.logging.Level | |
import java.util.logging.LogManager | |
import java.util.logging.Logger | |
/* | |
* Copyright 2018 Dmitry Ustalov | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
*/ | |
Locale.setDefault(Locale.ROOT) | |
def options = new CliBuilder().with { | |
usage = 'performance.groovy [-p] [-s]' | |
p 'parallel' | |
s 'silent' | |
parse(args) ?: System.exit(1) | |
} | |
if (!options.p) { | |
// This is a very important bit that effectively disables stream parallelism. | |
System.properties['java.util.concurrent.ForkJoinPool.common.parallelism'] = '1' | |
assert ForkJoinPool.commonPool().getParallelism() == 1 | |
} | |
logger = Logger.getLogger('Performance') | |
if (options.s) { | |
LogManager.getLogManager().getLogger('').getHandlers().each { it.setLevel(Level.WARNING) } | |
} | |
starGraph = { int n -> new StarGraphGenerator(n) } | |
scaleFreeGraph = { int n -> new ScaleFreeGraphGenerator(n, new Random(1337)) } | |
erdosRenyiGraph = { int n -> new GnmRandomGraphGenerator(n, n * 3, new Random(1337), false, false) } | |
completeGraph = { int n -> new CompleteGraphGenerator(n) } | |
static def generate(n, generator) { | |
new SimpleWeightedGraph(SupplierUtil.createIntegerSupplier(), SupplierUtil.createDefaultEdgeSupplier()).with { | |
generator(n).generateGraph(it, new HashMap()) | |
it | |
} | |
} | |
graphs = new LinkedHashMap<Graph, String>() | |
degrees = new HashMap<Graph, String>() | |
1.upto(options.arguments() ? options.arguments()[0] as int : 3) { | |
logger.info(String.format('Generating graphs for 10^%d.', it)) | |
graphs.put(generate(10**it, starGraph), 'star') | |
logger.info('Star graph done.') | |
graphs.put(generate(10**it, erdosRenyiGraph), 'erdos-renyi') | |
logger.info('Erdős-Rényi graph done.') | |
if (it < 6) { | |
graphs.put(generate(10**it, scaleFreeGraph), 'scale-free') | |
logger.info('Scale-free graph done.') | |
} | |
if (it < 3) { | |
graphs.put(generate(10**it, completeGraph), 'complete') | |
logger.info('Complete graph done.') | |
} | |
} | |
graphs.keySet().each { graph -> | |
degrees.put(graph, graph.vertexSet().stream(). | |
mapToInt({ graph.degreeOf(it) }). | |
max().orElse(0)) | |
} | |
algorithms = new LinkedHashMap() | |
algorithms.put('cw', ChineseWhispers.provider( | |
NodeWeighting.top(), | |
ChineseWhispers.ITERATIONS, | |
new Random(1337))) | |
algorithms.put('maxmax', MaxMax.provider()) | |
algorithms.put('watset-top-top', Watset.provider( | |
ChineseWhispers.provider( | |
NodeWeighting.top(), | |
ChineseWhispers.ITERATIONS, | |
new Random(1337)), | |
ChineseWhispers.provider( | |
NodeWeighting.top(), | |
ChineseWhispers.ITERATIONS, | |
new Random(1337)))) | |
System.out.printf('%s\t%s\t%s\t%s\t%s\t%s\t%s\n', 'algorithm', 'graph', 'nodes', 'edges', 'degree', 'clusters', 'mean', 'stddev') | |
algorithms.each { algorithmEntry -> | |
algorithmName = algorithmEntry.key | |
algorithm = algorithmEntry.value | |
graphs.each { graphEntry -> | |
graphName = graphEntry.value | |
graph = graphEntry.key | |
measurer = new Measurer(algorithm, graph) | |
measurer.run() | |
double[] durations = measurer.getDurations() | |
double[] clusters = Arrays.stream(measurer.getClusters()).asDoubleStream().toArray() | |
System.out.printf('%s\t%s\t%d\t%d\t%d\t%.2f\t%.2f\t%.2f\n', | |
algorithmName, | |
graphName, | |
graph.vertexSet().size(), | |
graph.edgeSet().size(), | |
degrees.get(graph), | |
new Mean().evaluate(clusters), | |
new Mean().evaluate(durations), | |
new StandardDeviation().evaluate(durations) | |
) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment