Skip to content

Instantly share code, notes, and snippets.

View marcelcaraciolo's full-sized avatar
💭
Coding !

Marcel Caraciolo marcelcaraciolo

💭
Coding !
View GitHub Profile
def cosine(dot_product, rating_norm_squared, rating2_norm_squared):
'''
The cosine between two vectors A, B
dotProduct(A, B) / (norm(A) * norm(B))
'''
numerator = dot_product
denominator = rating_norm_squared * rating2_norm_squared
return (numerator / (float(denominator))) if denominator else 0.0
def calculate_ranking(self, item_keys, values):
'''
Emit items with similarity in key for ranking:
19,0.4 70,1
19,0.6 21,2
21,0.6 19,2
21,0.9 70,1
70,0.4 19,1
70,0.9 21,1
from math import sqrt
def correlation(size, dot_product, rating_sum, \
rating2sum, rating_norm_squared, rating2_norm_squared):
'''
The correlation between two vectors A, B is
cov(A, B) / (stdDev(A) * stdDev(B))
'''
def pairwise_items(self, user_id, values):
'''
The output drops the user from the key entirely, instead it emits
the pair of items as the key:
19,21 2,1
19,70 2,4
21,70 1,4
19,21 1,2
def group_by_user_rating(self, key, line):
"""
Emit the user_id and group by their ratings (item and rating)
17 70,3
35 21,1
49 19,2
49 21,1
49 70,4
87 19,1
@marcelcaraciolo
marcelcaraciolo / callcommand.sh
Created August 14, 2012 18:01
call movieSimilarities
$ python moviesSimilarities.py ratings.csv > output.csv
@marcelcaraciolo
marcelcaraciolo / ratings.csv
Created August 14, 2012 17:57
ratings.csv
We can make this file beautiful and searchable if this error is corrected: It looks like row 7 should actually have 1 column, instead of 2 in line 6.
196|Kolya (1996)|3
186|L.A. Confidential (1997)|3
22|Heavyweights (1994)|1
244|Legends of the Fall (1994)|2
166|Jackie Brown (1997)|1
298|Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)|4
115|Hunt for Red October, The (1990)|2
253|Jungle Book, The (1994)|5
305|Grease (1978)|3
@marcelcaraciolo
marcelcaraciolo / graphs.py
Created June 18, 2012 15:11
exemplo_graphs
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pylab
from matplotlib.ticker import MaxNLocator
def plot_graphs(dataP, id_listP, titleP, labelsP,
dataR, id_listR, titleR, labelsR,
dataF, id_listF, titleF, labelsF, filesave=None):
from flask import jsonify
from flask import Flask
from correios import obter_rastreamento
app = Flask(__name__)
@app.route('/track/<trackid>', methods = ['GET'])
def api_tracking(trackid):
resp = jsonify(obter_rastreamento(trackid))
resp.status_code = 200
return resp
import urllib
import sys
import re
import json
URL = 'http://websro.correios.com.br/sro_bin/txect01$.QueryList?P_ITEMCODE=&P_LINGUA=001&P_TESTE=&P_TIPO=001&P_COD_UNI=%s'
def captura_html(url):
html_data = urllib.urlopen(url).read()
return html_data