This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''This is to write all values in the netflix data into a file | |
1. It will be of the form ( movie id, user id , rating , timestamp) | |
''' | |
import sys,os | |
import MySQLdb | |
import datetime | |
def timestamp(time): | |
#We are assuming that the start date is 1990 | |
old=datetime.date(1990,1,1) | |
newD=time.rstrip("\n").split("-") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
load data infile "/home/crazyabtliv/netflixAll2.txt" | |
into table allRatings | |
fields terminated by "," | |
lines terminated by "\n"; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some details of the dataset: | |
1.Number of movie titles : 17,770 | |
2.Number of ratings: over 100 million | |
3. Number of users:480,000 | |
Sample lines from the file : | |
5468: | |
716091,5,2001-02-08 | |
1945809,3,2002-10-06 | |
2400678,3,2003-04-04 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table allRatings ( | |
movieId int(20), | |
userId int(20), | |
rating int(20), | |
timestamp decimal(60,4)); | |
create table userdata ( | |
id int(30), | |
average decimal(50,5), | |
number int(20)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys,os | |
import MySQLdb | |
def connect(): | |
conn = MySQLdb.connect(host = "localhost",user = "root",db = "netflix") | |
cursor=conn.cursor() | |
return cursor | |
def getAverage(cursor,id): | |
#Take a movieId and find out the average rating for that movie | |
cursor.execute("select avg(rating) from allRatings where movieId="+id) | |
return cursor.fetchone() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys,os | |
import MySQLdb | |
def connect(): | |
conn = MySQLdb.connect (host = "localhost",user = "root",db = "netflix") | |
cursor=conn.cursor() | |
return cursor | |
def getAverage(cursor,id): | |
#Take a movieId and find out the average rating for that movie | |
cursor.execute("select avg(rating) from allRatings where movieId="+str(id)) | |
return cursor.fetchone() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys,os | |
import MySQLdb | |
def connect(): | |
conn = MySQLdb.connect (host = "localhost",user = "root",db = "netflix") | |
cursor=conn.cursor() | |
return cursor | |
def getAverage(cursor,id): | |
#Take a movieId and find out the average rating for that movie | |
cursor.execute("select avg(rating) from allRatings where userId="+str(id)) | |
return cursor.fetchone() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mysql> select * from userdata order by average desc limit 0,10; | |
+-------+---------+--------+ | |
| id | average | number | | |
+-------+---------+--------+ | |
| 15617 | 5.00000 | 1 | | |
| 12047 | 5.00000 | 1 | | |
| 9551 | 5.00000 | 26 | | |
| 29199 | 5.00000 | 6 | | |
| 18881 | 5.00000 | 13 | | |
| 38940 | 5.00000 | 3 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#This script reads all files in all directories of the folder taken from the openClassroom site and generates the dictionary, which we can then store in a file | |
folders = ["spam-train","spam-test","nonspam-train","nonspam-test"] | |
import os,sys | |
#We need a dictionary to store word occurences. What we can do is create a default dict and then update the frequencies. Write it all into a file all at once. | |
from collections import * | |
dictionary = defaultdict(int) | |
fdict = open(sys.argv[2],'w') #File to write all the entries in the dictionary | |
for root,dirnames,filenames in os.walk(sys.argv[1]): | |
for d in dirnames: #For each directory | |
for f in os.listdir(d): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rohit@linux-u5pt:~/Twitter/stanfordTweets> head training.1600000.processed.noemoticon.csv | |
"0","1467810369","Mon Apr 06 22:19:45 PDT 2009","NO_QUERY","_TheSpecialOne_","@switchfoot http://twitpic.com/2y1zl - Awww, that's a bummer. You shoulda got David Carr of Third Day to do it. ;D" | |
"0","1467810672","Mon Apr 06 22:19:49 PDT 2009","NO_QUERY","scotthamilton","is upset that he can't update his Facebook by texting it... and might cry as a result School today also. Blah!" | |
"0","1467810917","Mon Apr 06 22:19:53 PDT 2009","NO_QUERY","mattycus","@Kenichan I dived many times for the ball. Managed to save 50% The rest go out of bounds" | |
"0","1467811184","Mon Apr 06 22:19:57 PDT 2009","NO_QUERY","ElleCTF","my whole body feels itchy and like its on fire " | |
"0","1467811193","Mon Apr 06 22:19:57 PDT 2009","NO_QUERY","Karoli","@nationwideclass no, it's not behaving at all. i'm mad. why am i here? because I can't see you all over there. " | |
"0","1467811372","Mon Apr 06 22:20:00 PDT 2009","NO_QUERY","joy_wolf","@Kwesidei not |
OlderNewer