ayee · December 21, 2015 23:03
diff --git a/pyspark-split-dataframe-column-literal.py b/pyspark-split-dataframe-column-literal.py
 from pyspark.sql.functions import split
 df = sc.parallelize([[1, 'Foo:10'], [2, 'Bar:11'], [3,'Car:12']]).toDF(['Event', 'eventtype'])
 df = df.withColumn('Thing', split(df.eventtype, ':')[0])
 df = df.withColumn('Ranking', split(df.eventtype, ':')[1])
 df.collect()

 # [Row(Event=1, eventtype=u'Foo:10', Thing=u'Foo', Ranking=u'10'),
 #  Row(Event=2, eventtype=u'Bar:11', Thing=u'Bar', Ranking=u'11'),
 #  Row(Event=3, eventtype=u'Car:12', Thing=u'Car', Ranking=u'12')]
	from pyspark.sql.functions import split
	df = sc.parallelize([[1, 'Foo:10'], [2, 'Bar:11'], [3,'Car:12']]).toDF(['Event', 'eventtype'])
	df = df.withColumn('Thing', split(df.eventtype, ':')[0])
	df = df.withColumn('Ranking', split(df.eventtype, ':')[1])
	df.collect()

	# [Row(Event=1, eventtype=u'Foo:10', Thing=u'Foo', Ranking=u'10'),
	# Row(Event=2, eventtype=u'Bar:11', Thing=u'Bar', Ranking=u'11'),
	# Row(Event=3, eventtype=u'Car:12', Thing=u'Car', Ranking=u'12')]
No results found