Skip to content

Instantly share code, notes, and snippets.

View micaleel's full-sized avatar
:octocat:
I may be slow to respond.

Khalil micaleel

:octocat:
I may be slow to respond.
View GitHub Profile
@micaleel
micaleel / pmf
Created November 13, 2018 17:12 — forked from groverpr/pmf
pmf neural net fastai
ratings = pd.read_csv('ratings_small.csv') # loading data from csv
"""
ratings_small.csv has 4 columns - userId, movieId, ratings, and timestammp
it is most generic data format for CF related data
"""
val_indx = get_cv_idxs(len(ratings)) # index for validation set
wd = 2e-4 # weight decay
n_factors = 50 # n_factors - dimension of embedding matrix (D)
@micaleel
micaleel / stopwatch.py
Last active October 19, 2018 16:15
Poor man's stopwatch profiler for code blocks
import time
import datetime
class Stopwatch:
def __init__(self, message: None, silent=False, callback=None):
self._start = None
self._stop = None
self._silent = silent
self._message = message
@micaleel
micaleel / preprocess_text.py
Created March 23, 2018 16:05
Preprocess text
from nltk.corpus import stopwords
import string
STOPWORDS = frozenset(stopwords.words('english'))
def is_valid(token):
return word.isalpha() and w not in STOPWORDS and len(token) > 1
def clean(text):
num = int(input('Enter a positive number: '))
# Ensure that the number is positive; otherwise, exit.
if num < 0:
print('You entered a negative number')
exit()
for i in range(1, num + 1):
# Finding the factors of i
factors = []
@micaleel
micaleel / duplicatebibs.py
Last active October 2, 2017 11:37
Lists duplicate BibTex entries in a .bib file
"""Find duplicate BitTex entries."""
import sys
import os
from collections import Counter
from pprint import pprint
def extract_id(line):
x = line.index('{')
@micaleel
micaleel / request_handler_test.py
Created April 28, 2017 16:24 — forked from didip/request_handler_test.py
Testing Tornado RequestHandlers
import unittest, os, os.path, sys, urllib
import tornado.database
import tornado.options
from tornado.options import options
from tornado.testing import AsyncHTTPTestCase
# add application root to sys.path
APP_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(os.path.join(APP_ROOT, '..'))
@micaleel
micaleel / gist:da3c3df40ea0e374a20f6d897d4a8c1c
Created February 27, 2017 16:51 — forked from entaroadun/gist:1653794
Recommendation and Ratings Public Data Sets For Machine Learning

Movies Recommendation:

Music Recommendation:

def compute_edit_dist(df_explanations, perturb_scale=0.0, gold_std_col='rank_target_item_average_rating'):
"""Computes the edit distance between the rankings from different approaches.
Args:
perturb_scale: noise level; higher values indicate more noise.
gold_std_col: column with gold standard ranking.
df_explanations: DataFrame of explanations for a single session.
Returns:
@micaleel
micaleel / centering-plots.py
Created January 6, 2017 21:47
Centering Plots in Jupyter Notebook
from IPython.core.display import HTML
HTML("""
<style>
{
display: table-cell;
text-align: center;
.output_png vertical-align: middle;
}
</style>
@micaleel
micaleel / rank_metrics.py
Created October 11, 2016 15:23 — forked from bwhite/rank_metrics.py
Ranking Metrics
"""Information Retrieval metrics
Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np