Skip to content

Instantly share code, notes, and snippets.

@magigo
Created March 24, 2015 10:26
Show Gist options
  • Save magigo/cb0304f1146028c4f799 to your computer and use it in GitHub Desktop.
Save magigo/cb0304f1146028c4f799 to your computer and use it in GitHub Desktop.
Spark 1.3.0 PyCharm 本地开发环境例程
import os
import sys
import traceback
import json
# Path for spark source folder
os.environ['SPARK_HOME']="/Users/jilu/Downloads/spark-1.3.0-bin-hadoop2.4"
# Append pyspark to Python Path
sys.path.append("/Users/jilu/Downloads/spark-1.3.0-bin-hadoop2.4/python/")
sys.path.append("/Users/jilu/Downloads/spark-1.3.0-bin-hadoop2.4/python/lib/py4j-0.8.2.1-src.zip")
# try to import needed models
try:
from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.sql import SQLContext, Row
print ("Successfully imported Spark Modules")
except ImportError as e:
print ("Can not import Spark Modules {}".format(traceback.format_exc()))
sys.exit(1)
# config spark env
conf = SparkConf().setAppName("myApp").setMaster("local")
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)
lines = sc.textFile("/Users/jilu/Downloads/2015011900_0.gz")
rows = lines.map(lambda l: json.loads(l.split("\t")[2]))
for x in rows.takeSample(False, 10):
print x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment