mmafrar · April 26, 2025 04:07
diff --git a/ratings_histogram.py b/ratings_histogram.py
 import collections
 from pyspark import SparkConf, SparkContext

 # Set up the Spark configuration and context
 cfg = SparkConf().setMaster("local").setAppName("ratings_histogram")
 ctx = SparkContext(conf=cfg)

 # Load the data from the specified file
 lines = ctx.textFile("file:///Users/mmafrar/Partition/ml-100k/u.data")
 print(type(lines))  # Print the type of 'lines' to verify it's an RDD

 # Extract the ratings from each line
 ratings = lines.map(lambda x: x.split()[2])
 print(type(ratings))  # Print the type of 'ratings' to verify it's an RDD

 # Count the occurrences of each rating
 results = ratings.countByValue()

 # Sort the results by rating
 sortedResults = collections.OrderedDict(sorted(results.items()))

 # Print the sorted results
 for key, value in sortedResults.items():
    print("%s %i" % (key, value))
	import collections
	from pyspark import SparkConf, SparkContext

	# Set up the Spark configuration and context
	cfg = SparkConf().setMaster("local").setAppName("ratings_histogram")
	ctx = SparkContext(conf=cfg)

	# Load the data from the specified file
	lines = ctx.textFile("file:///Users/mmafrar/Partition/ml-100k/u.data")
	print(type(lines)) # Print the type of 'lines' to verify it's an RDD

	# Extract the ratings from each line
	ratings = lines.map(lambda x: x.split()[2])
	print(type(ratings)) # Print the type of 'ratings' to verify it's an RDD

	# Count the occurrences of each rating
	results = ratings.countByValue()

	# Sort the results by rating
	sortedResults = collections.OrderedDict(sorted(results.items()))

	# Print the sorted results
	for key, value in sortedResults.items():
	print("%s %i" % (key, value))