kshirsagarsiddharth · June 23, 2021 16:35
diff --git a/combine.py b/combine.py
 ratings.createOrReplaceTempView('ratings')
 books.createOrReplaceTempView('books')

 combined_dataframe = spark.sql("""
             SELECT user_id,r.isbn,book_rating,book_title
             FROM ratings r INNER JOIN books b 
             ON r.isbn = b.isbn 
             """)
 # we only take those books for which the 15 or more users have rated and those books which have 50 or more rating

 combined_dataframe.createOrReplaceTempView('combined_dataframe')

 # we only take those books which have their total rating greater than 50
 combined_and_filtered = spark.sql("""SELECT isbn,
                                            user_id,
                                            book_rating,
                                            book_title,
                                            sum(book_rating) OVER (PARTITION BY isbn) as total_book_rating, 
                                            count(user_id) OVER (PARTITION BY user_id) as user_ratings_total
                                    FROM combined_dataframe
                    
                    """).filter((F.col("total_book_rating") > 50) & (F.col('user_ratings_total') > 15))
	ratings.createOrReplaceTempView('ratings')
	books.createOrReplaceTempView('books')

	combined_dataframe = spark.sql("""
	SELECT user_id,r.isbn,book_rating,book_title
	FROM ratings r INNER JOIN books b
	ON r.isbn = b.isbn
	""")
	# we only take those books for which the 15 or more users have rated and those books which have 50 or more rating

	combined_dataframe.createOrReplaceTempView('combined_dataframe')

	# we only take those books which have their total rating greater than 50
	combined_and_filtered = spark.sql("""SELECT isbn,
	user_id,
	book_rating,
	book_title,
	sum(book_rating) OVER (PARTITION BY isbn) as total_book_rating,
	count(user_id) OVER (PARTITION BY user_id) as user_ratings_total
	FROM combined_dataframe

	""").filter((F.col("total_book_rating") > 50) & (F.col('user_ratings_total') > 15))
No results found