Skip to content

Instantly share code, notes, and snippets.

@emptymalei
Last active April 20, 2022 15:15
Show Gist options
  • Save emptymalei/07ba6716d0e2d815ebb64adce25dee72 to your computer and use it in GitHub Desktop.
Save emptymalei/07ba6716d0e2d815ebb64adce25dee72 to your computer and use it in GitHub Desktop.
Beware of python mutable objects in pyspark
Display the source blob
Display the rendered blob
Raw
{"cells":[{"cell_type":"markdown","source":["# Strange Typing in Pyspark"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"48e7fc3d-5fc4-4b09-913a-8bdfa82a6eb3"}}},{"cell_type":"code","source":["import pyspark.sql.functions as F\nimport pyspark.sql.types as T"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"2b3c2117-2e0b-4cc5-815b-dd4a72bb5c91"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\"></div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n</style>\n<div class=\"ansiout\"></div>"]}}],"execution_count":0},{"cell_type":"code","source":["columns = [\"language\",\"a\", \"b\"]\ndata = [\n (\"Java\", [0], [0]), (\"Python\", [0], [0]), (\"Scala\", [1], [2]), (\"Java\", [1], [1]), (\"Python\", [-1], [3]), (\"Java\", [-1], [3]), (\"Scala\", [-1], [3])\n]"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"73bbcd50-8db5-491c-bec7-573234a6f5c7"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\"></div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n</style>\n<div class=\"ansiout\"></div>"]}}],"execution_count":0},{"cell_type":"code","source":["sdf = spark.createDataFrame(data).toDF(*columns)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"c2e69fc1-918a-4b30-8347-cc9c4a8b0659"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[{"name":"sdf","typeStr":"pyspark.sql.dataframe.DataFrame","schema":{"fields":[{"metadata":{},"name":"language","nullable":true,"type":"string"},{"metadata":{},"name":"a","nullable":true,"type":{"containsNull":true,"elementType":"long","type":"array"}},{"metadata":{},"name":"b","nullable":true,"type":{"containsNull":true,"elementType":"long","type":"array"}}],"type":"struct"},"tableIdentifier":null}],"data":"<div class=\"ansiout\"></div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n</style>\n<div class=\"ansiout\"></div>"]}}],"execution_count":0},{"cell_type":"code","source":["display(sdf)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"1dda0b6a-d098-4679-a95a-efccd2e8ddaa"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["Java",[0],[0]],["Python",[0],[0]],["Scala",[1],[2]],["Java",[1],[1]],["Python",[-1],[3]],["Java",[-1],[3]],["Scala",[-1],[3]]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"language","type":"\"string\"","metadata":"{}"},{"name":"a","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"b","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>language</th><th>a</th><th>b</th></tr></thead><tbody><tr><td>Java</td><td>List(0)</td><td>List(0)</td></tr><tr><td>Python</td><td>List(0)</td><td>List(0)</td></tr><tr><td>Scala</td><td>List(1)</td><td>List(2)</td></tr><tr><td>Java</td><td>List(1)</td><td>List(1)</td></tr><tr><td>Python</td><td>List(-1)</td><td>List(3)</td></tr><tr><td>Java</td><td>List(-1)</td><td>List(3)</td></tr><tr><td>Scala</td><td>List(-1)</td><td>List(3)</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"code","source":["sdf_1 = sdf.groupby(\"language\", \"b\").agg((F.collect_list(F.struct(\"b\"))).alias(\"combined\"))\nsdf_2 = sdf.groupby(\"language\", \"b\").agg(F.max(\"b\").alias(\"combined\"))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"af2f2643-45a9-4704-a7f6-dd7928928e46"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[{"name":"sdf_1","typeStr":"pyspark.sql.dataframe.DataFrame","schema":{"fields":[{"metadata":{},"name":"language","nullable":true,"type":"string"},{"metadata":{},"name":"b","nullable":true,"type":{"containsNull":true,"elementType":"long","type":"array"}},{"metadata":{},"name":"combined","nullable":true,"type":{"containsNull":false,"elementType":{"fields":[{"metadata":{},"name":"b","nullable":true,"type":{"containsNull":true,"elementType":"long","type":"array"}}],"type":"struct"},"type":"array"}}],"type":"struct"},"tableIdentifier":null},{"name":"sdf_2","typeStr":"pyspark.sql.dataframe.DataFrame","schema":{"fields":[{"metadata":{},"name":"language","nullable":true,"type":"string"},{"metadata":{},"name":"b","nullable":true,"type":{"containsNull":true,"elementType":"long","type":"array"}},{"metadata":{},"name":"combined","nullable":true,"type":{"containsNull":true,"elementType":"long","type":"array"}}],"type":"struct"},"tableIdentifier":null}],"data":"<div class=\"ansiout\"></div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n</style>\n<div class=\"ansiout\"></div>"]}}],"execution_count":0},{"cell_type":"code","source":["display(\n sdf_1\n)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"552f1a41-996f-4b89-b10f-fbb8b0386c25"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["Java",[0],[[[0]]]],["Python",[0],[[[0]]]],["Scala",[2],[[[2]]]],["Java",[1],[[[1]]]],["Python",[3],[[[3]]]],["Java",[3],[[[3]]]],["Scala",[3],[[[3]]]]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"language","type":"\"string\"","metadata":"{}"},{"name":"b","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"combined","type":"{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"b\",\"type\":{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]},\"containsNull\":false}","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>language</th><th>b</th><th>combined</th></tr></thead><tbody><tr><td>Java</td><td>List(0)</td><td>List(List(List(0)))</td></tr><tr><td>Python</td><td>List(0)</td><td>List(List(List(0)))</td></tr><tr><td>Scala</td><td>List(2)</td><td>List(List(List(2)))</td></tr><tr><td>Java</td><td>List(1)</td><td>List(List(List(1)))</td></tr><tr><td>Python</td><td>List(3)</td><td>List(List(List(3)))</td></tr><tr><td>Java</td><td>List(3)</td><td>List(List(List(3)))</td></tr><tr><td>Scala</td><td>List(3)</td><td>List(List(List(3)))</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"code","source":["display(sdf_2)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"6b700461-b4fe-4943-b06b-435fdb33dbe5"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["Java",[0],[0]],["Java",[1],[1]],["Java",[3],[3]],["Python",[0],[0]],["Python",[3],[3]],["Scala",[2],[2]],["Scala",[3],[3]]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"language","type":"\"string\"","metadata":"{}"},{"name":"b","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"combined","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>language</th><th>b</th><th>combined</th></tr></thead><tbody><tr><td>Java</td><td>List(0)</td><td>List(0)</td></tr><tr><td>Java</td><td>List(1)</td><td>List(1)</td></tr><tr><td>Java</td><td>List(3)</td><td>List(3)</td></tr><tr><td>Python</td><td>List(0)</td><td>List(0)</td></tr><tr><td>Python</td><td>List(3)</td><td>List(3)</td></tr><tr><td>Scala</td><td>List(2)</td><td>List(2)</td></tr><tr><td>Scala</td><td>List(3)</td><td>List(3)</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"markdown","source":["## When is it mutable"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"bfa3f5dc-d558-4a3b-b532-015cc81a4a91"}}},{"cell_type":"code","source":["sch = T.ArrayType(T.IntegerType())\n\[email protected](returnType=sch)\ndef add_one(data):\n b = data[\"b\"]\n b[0] = b[0] + 1\n \n return b"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"6005d751-a356-49f9-896f-b78bb678d988"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"<div class=\"ansiout\"></div>","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"html","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n</style>\n<div class=\"ansiout\"></div>"]}}],"execution_count":0},{"cell_type":"markdown","source":["In a dataframe with agg results, we observe weird outputs"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"2d8f7184-c57c-4919-84d3-37c965bfdf38"}}},{"cell_type":"code","source":["sch = T.ArrayType(T.IntegerType())\n\ndisplay(sdf_1.withColumn(\n \"b_add_one\",\n add_one(\n F.struct(\"language\", \"b\")\n )\n))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"4be7b836-2c61-4c6a-aeab-80bfbedf8198"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["Java",[0],[[[0]]],[2]],["Python",[0],[[[0]]],[2]],["Scala",[2],[[[2]]],[3]],["Java",[1],[[[1]]],[2]],["Python",[3],[[[3]]],[6]],["Java",[3],[[[3]]],[6]],["Scala",[3],[[[3]]],[6]]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"language","type":"\"string\"","metadata":"{}"},{"name":"b","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"combined","type":"{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"b\",\"type\":{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]},\"containsNull\":false}","metadata":"{}"},{"name":"b_add_one","type":"{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true}","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>language</th><th>b</th><th>combined</th><th>b_add_one</th></tr></thead><tbody><tr><td>Java</td><td>List(0)</td><td>List(List(List(0)))</td><td>List(2)</td></tr><tr><td>Python</td><td>List(0)</td><td>List(List(List(0)))</td><td>List(2)</td></tr><tr><td>Scala</td><td>List(2)</td><td>List(List(List(2)))</td><td>List(3)</td></tr><tr><td>Java</td><td>List(1)</td><td>List(List(List(1)))</td><td>List(2)</td></tr><tr><td>Python</td><td>List(3)</td><td>List(List(List(3)))</td><td>List(6)</td></tr><tr><td>Java</td><td>List(3)</td><td>List(List(List(3)))</td><td>List(6)</td></tr><tr><td>Scala</td><td>List(3)</td><td>List(List(List(3)))</td><td>List(6)</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"code","source":["display(sdf_2.withColumn(\n \"b_add_one\",\n add_one(\n F.struct(\"language\", \"b\")\n )\n))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"d9337ea5-4cf0-4ce8-b87e-e31ee5a5debf"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["Java",[0],[0],[2]],["Java",[1],[1],[2]],["Java",[3],[3],[6]],["Python",[0],[0],[2]],["Python",[3],[3],[6]],["Scala",[2],[2],[3]],["Scala",[3],[3],[6]]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"language","type":"\"string\"","metadata":"{}"},{"name":"b","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"combined","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"b_add_one","type":"{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true}","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>language</th><th>b</th><th>combined</th><th>b_add_one</th></tr></thead><tbody><tr><td>Java</td><td>List(0)</td><td>List(0)</td><td>List(2)</td></tr><tr><td>Java</td><td>List(1)</td><td>List(1)</td><td>List(2)</td></tr><tr><td>Java</td><td>List(3)</td><td>List(3)</td><td>List(6)</td></tr><tr><td>Python</td><td>List(0)</td><td>List(0)</td><td>List(2)</td></tr><tr><td>Python</td><td>List(3)</td><td>List(3)</td><td>List(6)</td></tr><tr><td>Scala</td><td>List(2)</td><td>List(2)</td><td>List(3)</td></tr><tr><td>Scala</td><td>List(3)</td><td>List(3)</td><td>List(6)</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"markdown","source":["dataframe without aggregations works fine"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"c5eec991-9dc8-409b-ac19-3bc553f4ce33"}}},{"cell_type":"code","source":["display(sdf.withColumn(\n \"b_add_one\",\n add_one(\n F.struct(\"language\", \"b\")\n )\n))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"4bf84462-3b9d-40bb-bee5-3def5284ffd5"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["Java",[0],[0],[1]],["Python",[0],[0],[1]],["Scala",[1],[2],[3]],["Java",[1],[1],[2]],["Python",[-1],[3],[4]],["Java",[-1],[3],[4]],["Scala",[-1],[3],[4]]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"language","type":"\"string\"","metadata":"{}"},{"name":"a","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"b","type":"{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true}","metadata":"{}"},{"name":"b_add_one","type":"{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true}","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>language</th><th>a</th><th>b</th><th>b_add_one</th></tr></thead><tbody><tr><td>Java</td><td>List(0)</td><td>List(0)</td><td>List(1)</td></tr><tr><td>Python</td><td>List(0)</td><td>List(0)</td><td>List(1)</td></tr><tr><td>Scala</td><td>List(1)</td><td>List(2)</td><td>List(3)</td></tr><tr><td>Java</td><td>List(1)</td><td>List(1)</td><td>List(2)</td></tr><tr><td>Python</td><td>List(-1)</td><td>List(3)</td><td>List(4)</td></tr><tr><td>Java</td><td>List(-1)</td><td>List(3)</td><td>List(4)</td></tr><tr><td>Scala</td><td>List(-1)</td><td>List(3)</td><td>List(4)</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"code","source":[""],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"be90f9df-d992-4b3c-b154-5ff546394ea4"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"data":"","errorSummary":"","metadata":{},"errorTraceType":null,"type":"ipynbError","arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .ansiout {\n display: block;\n unicode-bidi: embed;\n white-space: pre-wrap;\n word-wrap: break-word;\n word-break: break-all;\n font-family: \"Source Code Pro\", \"Menlo\", monospace;;\n font-size: 13px;\n color: #555;\n margin-left: 4px;\n line-height: 19px;\n }\n</style>"]}}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"pyspark-strange-udf-behavior","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":21118282}},"nbformat":4,"nbformat_minor":0}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment