Created
January 28, 2014 19:56
-
-
Save pitosalas/8675012 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note to Ken: Interestng, you assume that the records after 8000 are test | |
# Also you add the 'test data' into both the base and the test set. | |
# Neither one is wrong, but they are assumptions that go beyond the specification. No need to fix. | |
def load_base_dataz | |
counter = 0 | |
File.open(@base_path, "r").each_line do |line| | |
split_line = line.split # user_id, movie_id, rating, time_stamp | |
if counter < 80000 # read in base data first | |
add_base_data @movie_base_hash, split_line[0], split_line[1], split_line[2] | |
else # then read in test | |
add_movie @movie_test_hash, split_line[0], split_line[1], split_line[2] | |
add_to_test_data_arr split_line[0], split_line[1], split_line[2] | |
end | |
counter += 1 | |
end | |
end | |
def load_base_and_test_data | |
# read in base data first | |
File.open(@base_path, "r").each_line do |line| | |
split_line = line.split # user_id, movie_id, rating, time_stamp | |
add_base_data @movie_base_hash, split_line[0], split_line[1], split_line[2] | |
end | |
# then read in test data | |
File.open(@test_path, "r").each_line do |line| | |
split_line = line.split # user_id, movie_id, rating, time_stamp | |
add_movie @movie_test_hash, split_line[0], split_line[1], split_line[2] | |
add_to_test_data_arr split_line[0], split_line[1], split_line[2] | |
end | |
end | |
def load_data | |
# load base and test data | |
if @data_file.eql? 'u.data' | |
load_base_data | |
else | |
load_base_and_test_data | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I meant Note to Jacob :)