Skip to content

Instantly share code, notes, and snippets.

View micaleel's full-sized avatar
:octocat:
I may be slow to respond.

Khalil micaleel

:octocat:
I may be slow to respond.
View GitHub Profile
# coding=UTF-8
from __future__ import division
import nltk
from collections import Counter
# This is a simple tool for adding automatic hashtags into an article title
# Created by Shlomi Babluki
# Sep, 2013
# coding=UTF-8
from __future__ import division
import nltk
import re
import requests
# Add your freebase key here
# If you don't have one, register at https://code.google.com/apis/console
FREEBASE_KEY = ""
# coding=UTF-8
from __future__ import division
import nltk
import re
import requests
# Add your freebase key here
# If you don't have one, register at https://code.google.com/apis/console
FREEBASE_KEY = ""
@minrk
minrk / nbstripout
Last active March 12, 2025 18:41
git pre-commit hook for stripping output from IPython notebooks
#!/usr/bin/env python
"""strip outputs from an IPython Notebook
Opens a notebook, strips its output, and writes the outputless version to the original file.
Useful mainly as a git filter or pre-commit hook for users who don't want to track output in VCS.
This does mostly the same thing as the `Clear All Output` command in the notebook UI.
LICENSE: Public Domain
results = []
for w in ['uniform', 'distance', lambda x: np.log(x)]:
clf = KNeighborsClassifier(3, weights=w)
w = str(w)
clf.fit(train[features], train['high_quality'])
preds = clf.predict(test[features])
accuracy = np.where(preds==test['high_quality'], 1, 0).sum() / float(len(test))
print "Weights: %s, Accuracy: %3f" % (w, accuracy)
results.append([w, accuracy])
@glamp
glamp / knn_wine.py
Last active December 20, 2015 03:48
import pandas as pd
import pylab as pl
from sklearn.neighbors import KNeighborsClassifier
df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")
test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
train = df[test_idx==True]
test = df[test_idx==False]
@zacstewart
zacstewart / classifier.py
Last active September 19, 2024 23:56
Document Classification with scikit-learn
import os
import numpy
from pandas import DataFrame
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import KFold
from sklearn.metrics import confusion_matrix, f1_score
NEWLINE = '\n'
# coding=UTF-8
from __future__ import division
import re
# This is a naive text summarization algorithm
# Created by Shlomi Babluki
# April, 2013
class SummaryTool(object):
@damianavila
damianavila / remove_output.py
Created April 3, 2013 22:05
Remove output from IPython notebook from the command line (dev version 1.0)
"""
Usage: python remove_output.py notebook.ipynb [ > without_output.ipynb ]
Modified from remove_output by Minrk
"""
import sys
import io
import os
from IPython.nbformat.current import read, write
from dateutil.parser import parse
import pandas as pd
# monthly slaughter records since 1921
df = pd.read_csv("http://bit.ly/119792b")
# parse the data (we could also use pd.to_datetime)
df.date = df.date.apply(parse)
# sort the data frame by date
df = df.sort(['date'])
# create an index