I may be slow to respond.

Khalil micaleel

I may be slow to respond.

code + data = ツ (alt https://github.com/khalil707)

shlomibabluki / hashtagify.py

Last active September 19, 2024 23:54

	# coding=UTF-8
	from __future__ import division
	import nltk
	from collections import Counter

	# This is a simple tool for adding automatic hashtags into an article title
	# Created by Shlomi Babluki
	# Sep, 2013

shlomibabluki / facebook_ner.py

Created August 25, 2013 10:35

	# coding=UTF-8
	from __future__ import division
	import nltk
	import re
	import requests

	# Add your freebase key here
	# If you don't have one, register at https://code.google.com/apis/console
	FREEBASE_KEY = ""

shlomibabluki / freebase_ner.py

Created August 25, 2013 10:34

	# coding=UTF-8
	from __future__ import division
	import nltk
	import re
	import requests

	# Add your freebase key here
	# If you don't have one, register at https://code.google.com/apis/console
	FREEBASE_KEY = ""

minrk / nbstripout

Last active March 12, 2025 18:41

git pre-commit hook for stripping output from IPython notebooks

	#!/usr/bin/env python
	"""strip outputs from an IPython Notebook

	Opens a notebook, strips its output, and writes the outputless version to the original file.

	Useful mainly as a git filter or pre-commit hook for users who don't want to track output in VCS.

	This does mostly the same thing as the `Clear All Output` command in the notebook UI.

	LICENSE: Public Domain

glamp / knn_weights.py

Last active December 20, 2015 03:49

	results = []
	for w in ['uniform', 'distance', lambda x: np.log(x)]:
	clf = KNeighborsClassifier(3, weights=w)
	w = str(w)
	clf.fit(train[features], train['high_quality'])
	preds = clf.predict(test[features])
	accuracy = np.where(preds==test['high_quality'], 1, 0).sum() / float(len(test))
	print "Weights: %s, Accuracy: %3f" % (w, accuracy)

	results.append([w, accuracy])

glamp / knn_wine.py

Last active December 20, 2015 03:48

	import pandas as pd
	import pylab as pl
	from sklearn.neighbors import KNeighborsClassifier


	df = pd.read_csv("https://s3.amazonaws.com/demo-datasets/wine.csv")

	test_idx = np.random.uniform(0, 1, len(df)) <= 0.3
	train = df[test_idx==True]
	test = df[test_idx==False]

zacstewart / classifier.py

Last active September 19, 2024 23:56

Document Classification with scikit-learn

	import os
	import numpy
	from pandas import DataFrame
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.pipeline import Pipeline
	from sklearn.cross_validation import KFold
	from sklearn.metrics import confusion_matrix, f1_score

	NEWLINE = '\n'

shlomibabluki / summary_tool.py

Created April 27, 2013 15:36

	# coding=UTF-8
	from __future__ import division
	import re

	# This is a naive text summarization algorithm
	# Created by Shlomi Babluki
	# April, 2013


	class SummaryTool(object):

damianavila / remove_output.py

Created April 3, 2013 22:05

Remove output from IPython notebook from the command line (dev version 1.0)

	"""
	Usage: python remove_output.py notebook.ipynb [ > without_output.ipynb ]
	Modified from remove_output by Minrk

	"""
	import sys
	import io
	import os
	from IPython.nbformat.current import read, write

glamp / plot_example.py

Last active December 13, 2015 20:48

	from dateutil.parser import parse
	import pandas as pd

	# monthly slaughter records since 1921
	df = pd.read_csv("http://bit.ly/119792b")
	# parse the data (we could also use pd.to_datetime)
	df.date = df.date.apply(parse)
	# sort the data frame by date
	df = df.sort(['date'])
	# create an index