This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
open_data.py | |
Created by Thomas Cabrol on 2012-01-27. | |
""" | |
import re | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
tax_data.py | |
Created by Thomas Cabrol on 2012-02-12. | |
""" | |
import re |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
get_tax_data.py | |
Created by Thomas Cabrol on 2012-02-15. | |
""" | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
extract_tax_data.py | |
Created by Thomas Cabrol on 2012-02-15. | |
""" | |
import codecs | |
import os |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
geocoder.py | |
Created by Thomas Cabrol on 2012-02-08. | |
""" | |
import codecs | |
import simplejson |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require("ggplot2") | |
# Loading Iris dataset | |
columns <- c("sepal_length", "sepal_width", "petal_length", "petal_width", "class") | |
iris <- read.table("/Users/thomas/Documents/data/datasets/iris/iris_learn.csv", sep=',', col.names=columns) | |
# Simple bar graphs wrapped inside a function | |
# Showing the mean value of a given variable | |
graph.mean <- function (variable) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
USE movie_lens ; | |
-- Create the table structure ; | |
DROP TABLE IF EXISTS movies_ratings ; | |
CREATE TABLE movies_ratings ( | |
user_id INT , | |
movie_id INT , | |
rating INT , | |
timestamp INT , |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
** Loading ratings dataset ; | |
filename source "Z:\data\datasets\movielens\ml-100k\u.data" ; | |
data Movies_Ratings ; | |
attrib | |
user_id informat=best8. | |
movie_id informat=best8. | |
rating informat=best8. ; | |
infile | |
source dlm='09'x dsd missover ; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Loading base data | |
movies_ratings = LOAD '/Users/thomas/Documents/data/datasets/movielens/ml-100k/u.data' USING PigStorage('\t') AS (user_id:int, movie_id:int, rating:int) ; | |
-- Starting by limiting the dataset to movies with at least 30 ratings ; | |
B = GROUP movies_ratings BY movie_id ; | |
C = FOREACH B GENERATE group AS movie_id, COUNT($1) AS count ; | |
D = FILTER C BY count >= 30 ; | |
E = FOREACH D GENERATE movie_id AS movie_ok ; | |
F = JOIN movies_ratings BY movie_id, E BY movie_ok ; | |
filtered = FOREACH F GENERATE user_id, movie_id, rating ; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
movie_recommender.py | |
Created by Thomas Cabrol on 2012-04-06. | |
Copyright (c) 2012 __MyCompanyName__. All rights reserved. | |
""" | |
import csv |