Last active
December 19, 2017 02:42
-
-
Save solidpple/28478a3a7502a32eabf308965aa4f1bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ArrayType()을 multiple columns으로 변환하는 방법 | |
df = hc.createDataFrame(sc.parallelize([['a', [1,2,3]], ['b', [2,3,4]]]), ["key", "value"]) | |
df.printSchema() | |
df.show() | |
''' | |
root | |
|-- key: string (nullable = true) | |
|-- value: array (nullable = true) | |
| |-- element: long (containsNull = true) | |
''' | |
# python []을 통해서 값에 접근이 가능하다. | |
df.select("key", df.value[0], df.value[1], df.value[2]).show() | |
''' | |
+---+--------+--------+--------+ | |
|key|value[0]|value[1]|value[2]| | |
+---+--------+--------+--------+ | |
| a| 1| 2| 3| | |
| b| 2| 3| 4| | |
+---+--------+--------+--------+ | |
+---+-------+ | |
|key| value| | |
+---+-------+ | |
| a|[1,2,3]| | |
| b|[2,3,4]| | |
+---+-------+ | |
''' | |
# StructType()일때는 아래와 같이 | |
df2 = df.select("key", psf.struct( | |
df.value[0].alias("value1"), | |
df.value[1].alias("value2"), | |
df.value[2].alias("value3") | |
).alias("value")) | |
df2.printSchema() | |
df2.show() | |
''' | |
root | |
|-- key: string (nullable = true) | |
|-- value: struct (nullable = false) | |
| |-- value1: long (nullable = true) | |
| |-- value2: long (nullable = true) | |
| |-- value3: long (nullable = true) | |
+---+-------+ | |
|key| value| | |
+---+-------+ | |
| a|[1,2,3]| | |
| b|[2,3,4]| | |
+---+-------+ | |
''' | |
# split된 컬럼은 *을 이용해서 접근이 가능하다. | |
''' | |
df2.select('key', 'value.*').show() | |
+---+------+------+------+ | |
|key|value1|value2|value3| | |
+---+------+------+------+ | |
| a| 1| 2| 3| | |
| b| 2| 3| 4| | |
+---+------+------+------+ | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment