imfht · April 14, 2020 01:30 · imfht · Apr 14, 2020
diff --git a/pyspark_cos_qcloud_example.py b/pyspark_cos_qcloud_example.py
 import os

 from pyspark import SparkContext

 # first you'll need download hadoop-cos-x.x.x-shaded.jar at -> https://github.com/tencentyun/hadoop-cos
 os.environ[
    'PYSPARK_SUBMIT_ARGS'] = './hadoop-cos-2.8.5-shaded.jar pyspark-shell'
 sc = SparkContext(appName="wordCount").getOrCreate()

 # some basic configuration, find more at https://cloud.tencent.com/document/product/436/6884
 sc._jsc.hadoopConfiguration().set("fs.cosn.userinfo.secretId", "ak")  # ak
 sc._jsc.hadoopConfiguration().set("fs.cosn.userinfo.secretKey", "sk")  # sk
 sc._jsc.hadoopConfiguration().set("fs.cosn.bucket.region", "ap-guangzhou")  # ap
 sc._jsc.hadoopConfiguration().set("fs.cosn.impl", "org.apache.hadoop.fs.CosFileSystem")  # register cosn scheme

 # then create a rdd via cos key
 text_file = sc.textFile("cosn://ap_name/filename")

 # wordcount
 counts = text_file.flatMap(lambda line: line.split(" ")) \
    .map(lambda word: (word, 1)) \
    .reduceByKey(lambda a, b: a + b)

 # save it!
 counts.saveAsTextFile("anywhere")

 if __name__ == '__main__':
    pass
	import os

	from pyspark import SparkContext

	# first you'll need download hadoop-cos-x.x.x-shaded.jar at -> https://github.com/tencentyun/hadoop-cos
	os.environ[
	'PYSPARK_SUBMIT_ARGS'] = './hadoop-cos-2.8.5-shaded.jar pyspark-shell'
	sc = SparkContext(appName="wordCount").getOrCreate()

	# some basic configuration, find more at https://cloud.tencent.com/document/product/436/6884
	sc._jsc.hadoopConfiguration().set("fs.cosn.userinfo.secretId", "ak") # ak
	sc._jsc.hadoopConfiguration().set("fs.cosn.userinfo.secretKey", "sk") # sk
	sc._jsc.hadoopConfiguration().set("fs.cosn.bucket.region", "ap-guangzhou") # ap
	sc._jsc.hadoopConfiguration().set("fs.cosn.impl", "org.apache.hadoop.fs.CosFileSystem") # register cosn scheme

	# then create a rdd via cos key
	text_file = sc.textFile("cosn://ap_name/filename")

	# wordcount
	counts = text_file.flatMap(lambda line: line.split(" ")) \
	.map(lambda word: (word, 1)) \
	.reduceByKey(lambda a, b: a + b)

	# save it!
	counts.saveAsTextFile("anywhere")

	if __name__ == '__main__':
	pass