jeanmidevacc · August 2, 2024 18:26
diff --git a/mf_index_entities_with_pyspark.py b/mf_index_entities_with_pyspark.py
 from pyspark.sql import SparkSession, SQLContext
 import pyspark.sql.functions as F
 import pyspark.sql.types as T
 from pyspark.sql import Window

 dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid)

 previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora

 windowspec = Window.orderBy(F.col("itemid"))# build a window function
 dfs_items = dfs_items.withColumn("itemid_indexed", F.row_number().over(windowspec))#Get the row number
 dfs_items = dfs_items.withColumn("itemid_indexed", F.col("itemid_indexed") + previous_max_itemid_indexed)
	from pyspark.sql import SparkSession, SQLContext
	import pyspark.sql.functions as F
	import pyspark.sql.types as T
	from pyspark.sql import Window

	dfs_items = #pyspark dataframe that contains items to be indexed (define by itemid)

	previous_max_itemid_indexed = 0 #in case it's an incremental process , set a tempora

	windowspec = Window.orderBy(F.col("itemid"))# build a window function
	dfs_items = dfs_items.withColumn("itemid_indexed", F.row_number().over(windowspec))#Get the row number
	dfs_items = dfs_items.withColumn("itemid_indexed", F.col("itemid_indexed") + previous_max_itemid_indexed)