Created
October 7, 2022 10:46
-
-
Save lordlinus/d7d7f66988fbb85fea031c8d466396c3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"cells":[{"cell_type":"code","source":["from graphframes import *\nimport pyspark.sql.functions as fn\nfrom pyspark.sql.functions import udf\nfrom pyspark.sql import Row\nfrom pyspark.sql.types import StructType, StructField, FloatType, StringType, ArrayType,IntegerType\nsc.setCheckpointDir(\"/dbfs/checkpoint\")"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"0373b7fc-d4d3-4c3c-a6d2-cc78c0cc33aa"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["# data = spark.read.option(\"header\",\"true\").csv(\"dbfs:/FileStore/sampleHashData.csv\")\n# v = data.withColumnRenamed(\"_beforeDataHash\",\"id\").select(\"id\",\"changeSequence\")\n# e = data.withColumnRenamed(\"_beforeDataHash\",\"src\").withColumnRenamed(\"_rowHash\",\"dst\").select(\"src\",\"dst\",\"operation\")\n# g = GraphFrame(v, e)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"f73c34e9-5e17-4ae1-92ed-71dd229d247a"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["v = sqlContext.createDataFrame([\n (\"a\", 34),\n (\"b\", 36),\n (\"c\", 37),\n (\"x\", 30),\n (\"h\", 22),\n (\"y\", 234),\n (\"z\", 50),\n (\"d\", 23),\n (\"e\", 23),\n (\"f\", 30),\n (\"g\", 32),\n], [\"id\", \"changeSequence\"])\n\ne = sqlContext.createDataFrame([\n (\"a\", \"b\", \"UPDATE\"),\n (\"b\", \"c\", \"UPDATE\"),\n (\"g\", \"h\", \"UPDATE\"),\n (\"x\", \"y\", \"UPDATE\"),\n (\"y\", \"z\", \"UPDATE\"),\n (\"c\", \"d\", \"UPDATE\"),\n (\"d\", \"e\", \"UPDATE\"),\n (\"e\", \"f\", \"UPDATE\"),\n (\"f\", \"g\", \"UPDATE\"),\n], [\"src\", \"dst\", \"operation\"])\ng = GraphFrame(v, e)\n\n\n# Final result\n# ('a','g')\n# ('x','z')"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"77117a0b-a331-432d-a8a2-704622f09c95"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["# find the max lenght of the connected components to create motif pattern\nmax_lenght_graph = g.connectedComponents().groupBy('component').count().agg({\"count\":\"max\"}).collect()[0][0]"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"905daf3e-3712-42b0-96f6-bcbe5cb11e11"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"datasetInfos":[],"data":"/databricks/spark/python/pyspark/sql/dataframe.py:129: UserWarning: DataFrame constructor is internal. Do not directly use it.\n warnings.warn(\"DataFrame constructor is internal. Do not directly use it.\")\n","removedWidgets":[],"addedWidgets":{},"metadata":{},"type":"ansi","arguments":{}}},"output_type":"display_data","data":{"text/plain":["/databricks/spark/python/pyspark/sql/dataframe.py:129: UserWarning: DataFrame constructor is internal. Do not directly use it.\n warnings.warn(\"DataFrame constructor is internal. Do not directly use it.\")\n"]}}],"execution_count":0},{"cell_type":"code","source":["# Create motif of variable lenght and append all these to a dataframe\ndef create_motif(length):\n motif_path = \"(a)-[]->\"\n for i in range(1, length):\n motif_path += f\"(v{i-1});(v{i-1})-[]->\"\n motif_path += f\"(v{length-1})\"\n return motif_path\n\ndef variable_length_motif(depth:int, graph):\n for length in range(1, depth + 1):\n motif_path = create_motif(length)\n if length == 1:\n base_motif = graph.find(motif_path)\n else:\n current_motif = graph.find(motif_path)\n base_motif = base_motif.unionByName(current_motif,allowMissingColumns=True)\n return base_motif"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"4af9a732-b969-4bf6-94f1-ee78f1cf5b51"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["df = variable_length_motif(max_lenght_graph-1,g)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"bdfc022a-08ed-438a-808b-bb5c62ded896"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["display(df)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"65a3a130-c34d-4619-a1a6-82748dbd3b8c"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[[["a",34],["b",36],null,null,null,null,null,null],[["b",36],["c",37],null,null,null,null,null,null],[["g",32],["h",22],null,null,null,null,null,null],[["x",30],["y",234],null,null,null,null,null,null],[["y",234],["z",50],null,null,null,null,null,null],[["c",37],["d",23],null,null,null,null,null,null],[["d",23],["e",23],null,null,null,null,null,null],[["f",30],["g",32],null,null,null,null,null,null],[["e",23],["f",30],null,null,null,null,null,null],[["a",34],["b",36],["c",37],null,null,null,null,null],[["f",30],["g",32],["h",22],null,null,null,null,null],[["x",30],["y",234],["z",50],null,null,null,null,null],[["b",36],["c",37],["d",23],null,null,null,null,null],[["c",37],["d",23],["e",23],null,null,null,null,null],[["e",23],["f",30],["g",32],null,null,null,null,null],[["d",23],["e",23],["f",30],null,null,null,null,null],[["e",23],["f",30],["g",32],["h",22],null,null,null,null],[["a",34],["b",36],["c",37],["d",23],null,null,null,null],[["b",36],["c",37],["d",23],["e",23],null,null,null,null],[["d",23],["e",23],["f",30],["g",32],null,null,null,null],[["c",37],["d",23],["e",23],["f",30],null,null,null,null],[["d",23],["e",23],["f",30],["g",32],["h",22],null,null,null],[["a",34],["b",36],["c",37],["d",23],["e",23],null,null,null],[["c",37],["d",23],["e",23],["f",30],["g",32],null,null,null],[["b",36],["c",37],["d",23],["e",23],["f",30],null,null,null],[["c",37],["d",23],["e",23],["f",30],["g",32],["h",22],null,null],[["b",36],["c",37],["d",23],["e",23],["f",30],["g",32],null,null],[["a",34],["b",36],["c",37],["d",23],["e",23],["f",30],null,null],[["b",36],["c",37],["d",23],["e",23],["f",30],["g",32],["h",22],null],[["a",34],["b",36],["c",37],["d",23],["e",23],["f",30],["g",32],null],[["a",34],["b",36],["c",37],["d",23],["e",23],["f",30],["g",32],["h",22]]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"a","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"v0","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"v1","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"v2","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"v3","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"v4","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"v5","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"v6","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>a</th><th>v0</th><th>v1</th><th>v2</th><th>v3</th><th>v4</th><th>v5</th><th>v6</th></tr></thead><tbody><tr><td>List(a, 34)</td><td>List(b, 36)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(b, 36)</td><td>List(c, 37)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(g, 32)</td><td>List(h, 22)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(x, 30)</td><td>List(y, 234)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(y, 234)</td><td>List(z, 50)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(c, 37)</td><td>List(d, 23)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(d, 23)</td><td>List(e, 23)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(f, 30)</td><td>List(g, 32)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(e, 23)</td><td>List(f, 30)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(a, 34)</td><td>List(b, 36)</td><td>List(c, 37)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(f, 30)</td><td>List(g, 32)</td><td>List(h, 22)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(x, 30)</td><td>List(y, 234)</td><td>List(z, 50)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>null</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>List(h, 22)</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(a, 34)</td><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>null</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>List(h, 22)</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(a, 34)</td><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>null</td><td>null</td><td>null</td></tr><tr><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>List(h, 22)</td><td>null</td><td>null</td></tr><tr><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>null</td><td>null</td></tr><tr><td>List(a, 34)</td><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>null</td><td>null</td></tr><tr><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>List(h, 22)</td><td>null</td></tr><tr><td>List(a, 34)</td><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>null</td></tr><tr><td>List(a, 34)</td><td>List(b, 36)</td><td>List(c, 37)</td><td>List(d, 23)</td><td>List(e, 23)</td><td>List(f, 30)</td><td>List(g, 32)</td><td>List(h, 22)</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"code","source":["# Get all the ids from the vertices\ncols = df.columns\nids = [x+'.id' for x in cols]\ndf2 = df.withColumn('concatenated_cols',fn.concat_ws('-',*ids)).groupBy('a').agg(fn.concat_ws(\",\", fn.collect_list(\"concatenated_cols\")).alias(\"concatenated_cols\"))"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"f885de1e-1f10-4854-ab76-af1ef60f4d73"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["display(df2)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"ab5ff283-3a3c-4741-89a5-41b08d4d7a62"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[[["x",30],"x-y,x-y-z"],[["b",36],"b-c-d-e-f-g,b-c-d-e-f-g-h,b-c-d,b-c,b-c-d-e-f,b-c-d-e"],[["a",34],"a-b-c-d-e-f,a-b-c-d-e-f-g-h,a-b-c-d,a-b-c,a-b,a-b-c-d-e,a-b-c-d-e-f-g"],[["g",32],"g-h"],[["f",30],"f-g-h,f-g"],[["e",23],"e-f-g-h,e-f,e-f-g"],[["d",23],"d-e-f-g-h,d-e-f,d-e-f-g,d-e"],[["c",37],"c-d-e-f-g-h,c-d,c-d-e-f,c-d-e-f-g,c-d-e"],[["y",234],"y-z"]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"a","type":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"changeSequence\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","metadata":"{}"},{"name":"concatenated_cols","type":"\"string\"","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>a</th><th>concatenated_cols</th></tr></thead><tbody><tr><td>List(x, 30)</td><td>x-y,x-y-z</td></tr><tr><td>List(b, 36)</td><td>b-c-d-e-f-g,b-c-d-e-f-g-h,b-c-d,b-c,b-c-d-e-f,b-c-d-e</td></tr><tr><td>List(a, 34)</td><td>a-b-c-d-e-f,a-b-c-d-e-f-g-h,a-b-c-d,a-b-c,a-b,a-b-c-d-e,a-b-c-d-e-f-g</td></tr><tr><td>List(g, 32)</td><td>g-h</td></tr><tr><td>List(f, 30)</td><td>f-g-h,f-g</td></tr><tr><td>List(e, 23)</td><td>e-f-g-h,e-f,e-f-g</td></tr><tr><td>List(d, 23)</td><td>d-e-f-g-h,d-e-f,d-e-f-g,d-e</td></tr><tr><td>List(c, 37)</td><td>c-d-e-f-g-h,c-d,c-d-e-f,c-d-e-f-g,c-d-e</td></tr><tr><td>List(y, 234)</td><td>y-z</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"code","source":["def extract_link(v):\n link = max([(len(y.split(\"-\")),y.split(\"-\")) for y in v.split(\",\")])[1]\n return (link,link[0],link[-1],len(link))\n\nschema_added = StructType([\n StructField(\"link\", ArrayType(StringType()), False),\n StructField(\"start\", StringType(), False),\n StructField(\"end\", StringType(), False),\n StructField(\"len\", IntegerType(), False)\n])\nextract_link_udf = udf(extract_link,schema_added)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"e5d883ec-c702-4f97-baaa-5e6c66be5524"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["df3= df2.select(extract_link_udf('concatenated_cols').alias('cols')).select('cols.link','cols.start','cols.end','cols.len')"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"46937ff0-5f8b-487f-bf33-daaaa95f9d38"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["display(df3)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"c313b417-d82c-443e-83b8-9382854914d6"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[[["g","h"],"g","h",2],[["d","e","f","g","h"],"d","h",5],[["f","g","h"],"f","h",3],[["c","d","e","f","g","h"],"c","h",6],[["b","c","d","e","f","g","h"],"b","h",7],[["a","b","c","d","e","f","g","h"],"a","h",8],[["y","z"],"y","z",2],[["e","f","g","h"],"e","h",4],[["x","y","z"],"x","z",3]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"link","type":"{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true}","metadata":"{}"},{"name":"start","type":"\"string\"","metadata":"{}"},{"name":"end","type":"\"string\"","metadata":"{}"},{"name":"len","type":"\"integer\"","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>link</th><th>start</th><th>end</th><th>len</th></tr></thead><tbody><tr><td>List(g, h)</td><td>g</td><td>h</td><td>2</td></tr><tr><td>List(d, e, f, g, h)</td><td>d</td><td>h</td><td>5</td></tr><tr><td>List(f, g, h)</td><td>f</td><td>h</td><td>3</td></tr><tr><td>List(c, d, e, f, g, h)</td><td>c</td><td>h</td><td>6</td></tr><tr><td>List(b, c, d, e, f, g, h)</td><td>b</td><td>h</td><td>7</td></tr><tr><td>List(a, b, c, d, e, f, g, h)</td><td>a</td><td>h</td><td>8</td></tr><tr><td>List(y, z)</td><td>y</td><td>z</td><td>2</td></tr><tr><td>List(e, f, g, h)</td><td>e</td><td>h</td><td>4</td></tr><tr><td>List(x, y, z)</td><td>x</td><td>z</td><td>3</td></tr></tbody></table></div>"]}}],"execution_count":0},{"cell_type":"code","source":["df4 = df3.groupBy('end').agg(fn.max('len').alias(\"_len\")).withColumnRenamed('end','_end')\ndf5 = df3.join( df4, (df3.end == df4._end) & (df3.len == df4._len), 'inner').select('start','end','link','len')"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"9b44781b-b5a5-4a12-8e6e-dd152e35531e"}},"outputs":[],"execution_count":0},{"cell_type":"code","source":["display(df5)"],"metadata":{"application/vnd.databricks.v1+cell":{"title":"","showTitle":false,"inputWidgets":{},"nuid":"76c5388e-fd95-43b9-a979-1da9950553a8"}},"outputs":[{"output_type":"display_data","metadata":{"application/vnd.databricks.v1+output":{"overflow":false,"datasetInfos":[],"data":[["a","h",["a","b","c","d","e","f","g","h"],8],["x","z",["x","y","z"],3]],"plotOptions":{"displayType":"table","customPlotOptions":{},"pivotColumns":null,"pivotAggregation":null,"xColumns":null,"yColumns":null},"columnCustomDisplayInfos":{},"aggType":"","isJsonSchema":true,"removedWidgets":[],"aggSchema":[],"schema":[{"name":"start","type":"\"string\"","metadata":"{}"},{"name":"end","type":"\"string\"","metadata":"{}"},{"name":"link","type":"{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true}","metadata":"{}"},{"name":"len","type":"\"integer\"","metadata":"{}"}],"aggError":"","aggData":[],"addedWidgets":{},"metadata":{},"dbfsResultPath":null,"type":"table","aggOverflow":false,"aggSeriesLimitReached":false,"arguments":{}}},"output_type":"display_data","data":{"text/html":["<style scoped>\n .table-result-container {\n max-height: 300px;\n overflow: auto;\n }\n table, th, td {\n border: 1px solid black;\n border-collapse: collapse;\n }\n th, td {\n padding: 5px;\n }\n th {\n text-align: left;\n }\n</style><div class='table-result-container'><table class='table-result'><thead style='background-color: white'><tr><th>start</th><th>end</th><th>link</th><th>len</th></tr></thead><tbody><tr><td>a</td><td>h</td><td>List(a, b, c, d, e, f, g, h)</td><td>8</td></tr><tr><td>x</td><td>z</td><td>List(x, y, z)</td><td>3</td></tr></tbody></table></div>"]}}],"execution_count":0}],"metadata":{"application/vnd.databricks.v1+notebook":{"notebookName":"graphx","dashboards":[],"notebookMetadata":{"pythonIndentUnit":4},"language":"python","widgets":{},"notebookOrigID":3351652596622951}},"nbformat":4,"nbformat_minor":0} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment