Khalil micaleel

Movies Recommendation:

MovieLens - Movie Recommendation Data Sets http://www.grouplens.org/node/73
Yahoo! - Movie, Music, and Images Ratings Data Sets http://webscope.sandbox.yahoo.com/catalog.php?datatype=r
Jester - Movie Ratings Data Sets (Collaborative Filtering Dataset) http://www.ieor.berkeley.edu/~goldberg/jester-data/
Cornell University - Movie-review data for use in sentiment-analysis experiments http://www.cs.cornell.edu/people/pabo/movie-review-data/

Music Recommendation:

	ratings = pd.read_csv('ratings_small.csv') # loading data from csv
	"""
	ratings_small.csv has 4 columns - userId, movieId, ratings, and timestammp
	it is most generic data format for CF related data
	"""

	val_indx = get_cv_idxs(len(ratings)) # index for validation set
	wd = 2e-4 # weight decay
	n_factors = 50 # n_factors - dimension of embedding matrix (D)

	import time
	import datetime


	class Stopwatch:
	def __init__(self, message: None, silent=False, callback=None):
	self._start = None
	self._stop = None
	self._silent = silent
	self._message = message

	from nltk.corpus import stopwords
	import string


	STOPWORDS = frozenset(stopwords.words('english'))

	def is_valid(token):
	return word.isalpha() and w not in STOPWORDS and len(token) > 1

	def clean(text):

	num = int(input('Enter a positive number: '))

	# Ensure that the number is positive; otherwise, exit.
	if num < 0:
	print('You entered a negative number')
	exit()

	for i in range(1, num + 1):
	# Finding the factors of i
	factors = []

	"""Find duplicate BitTex entries."""

	import sys
	import os
	from collections import Counter
	from pprint import pprint


	def extract_id(line):
	x = line.index('{')

	import unittest, os, os.path, sys, urllib
	import tornado.database
	import tornado.options
	from tornado.options import options
	from tornado.testing import AsyncHTTPTestCase

	# add application root to sys.path
	APP_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
	sys.path.append(os.path.join(APP_ROOT, '..'))


	def compute_edit_dist(df_explanations, perturb_scale=0.0, gold_std_col='rank_target_item_average_rating'):
	"""Computes the edit distance between the rankings from different approaches.

	Args:
	perturb_scale: noise level; higher values indicate more noise.
	gold_std_col: column with gold standard ranking.
	df_explanations: DataFrame of explanations for a single session.

	Returns:

	from IPython.core.display import HTML

	HTML("""
	<style>
	{
	display: table-cell;
	text-align: center;
	.output_png vertical-align: middle;
	}
	</style>

	"""Information Retrieval metrics

	Useful Resources:
	http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
	http://www.nii.ac.jp/TechReports/05-014E.pdf
	http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
	http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
	Learning to Rank for Information Retrieval (Tie-Yan Liu)
	"""
	import numpy as np