koverholt · August 29, 2015 14:17
diff --git a/spark_wordcount.py b/spark_wordcount.py
 from pyspark import SparkContext
 from pyspark import SparkConf

 if __name__ == "__main__":
    conf = SparkConf()
    conf.setMaster("spark://{hostname}:7077")
    conf.setAppName("WordCount")
    sc = SparkContext(conf=conf)

 file = sc.textFile("/mnt/gluster/pg2591.txt")
 counts = file.map(lambda word: word.replace('"', '').replace("'", '').replace('.', ' ').replace(',', ' ').replace(':', '').replace(';', ' ').lower()) \
    .flatMap(lambda line: line.split()) \
    .map(lambda word: (word, 1)) \
    .reduceByKey(lambda a, b: a + b) \
    .map(lambda x: (x[1], x[0])) \
    .sortByKey(False)
 counts.saveAsTextFile("/mnt/gluster/wordcount")
	from pyspark import SparkContext
	from pyspark import SparkConf

	if __name__ == "__main__":
	conf = SparkConf()
	conf.setMaster("spark://{hostname}:7077")
	conf.setAppName("WordCount")
	sc = SparkContext(conf=conf)

	file = sc.textFile("/mnt/gluster/pg2591.txt")
	counts = file.map(lambda word: word.replace('"', '').replace("'", '').replace('.', ' ').replace(',', ' ').replace(':', '').replace(';', ' ').lower()) \
	.flatMap(lambda line: line.split()) \
	.map(lambda word: (word, 1)) \
	.reduceByKey(lambda a, b: a + b) \
	.map(lambda x: (x[1], x[0])) \
	.sortByKey(False)
	counts.saveAsTextFile("/mnt/gluster/wordcount")
No results found