Skip to content

Instantly share code, notes, and snippets.

import numpy as np
from scipy.spatial.distance import cdist
def nearest_neighbor(samples, targets, samples_to_classify, metric='euclidean'):
return targets[np.argmin(cdist(samples_to_classify, samples, metric=metric), axis=1)]
@darkseed
darkseed / getcolor.py
Last active February 25, 2017 10:25 — forked from zollinger/getcolor.py
import Image, ImageDraw
def get_colors(infile, outfile, numcolors=10, swatchsize=20, resize=150):
image = Image.open(infile)
image = image.resize((resize, resize))
result = image.convert('P', palette=Image.ADAPTIVE, colors=numcolors)
result.putalpha(0)
colors = result.getcolors(resize*resize)
@darkseed
darkseed / lm_example
Last active August 29, 2015 14:23 — forked from yoavg/lm_example
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# The unreasonable effectiveness of Character-level Language Models\n",
"## (and why RNNs are still cool)\n",
"\n",
"###[Yoav Goldberg](http://www.cs.biu.ac.il/~yogo)\n",
import sys
import numpy
from nltk.cluster import KMeansClusterer, GAAClusterer, euclidean_distance
import nltk.corpus
from nltk import decorators
import nltk.stem
stemmer_func = nltk.stem.EnglishStemmer().stem
stopwords = set(nltk.corpus.stopwords.words('english'))
  1. General Background and Overview
ADD JAR /opt/cloudera/parcels/CDH/lib/hive/lib/hive-contrib.jar;
DROP TABLE raw_log;
CREATE EXTERNAL TABLE raw_log(
IP STRING,
timestamp STRING,
URL STRING,
referrer STRING,
user_agent STRING)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
package ner
import edu.stanford.nlp.ie.crf.CRFClassifier
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import edu.stanford.nlp.ling.CoreAnnotations
import java.util.ArrayList
import java.util.HashMap
import java.util.Map
import scala.xml.XML
import com.twitter.algebird.{Aggregator, Semigroup}
import com.twitter.scalding._
import scala.util.Random
/**
* This job is a tutorial of sorts for scalding's Execution[T] abstraction.
* It is a simple implementation of Lloyd's algorithm for k-means on 2D data.
*
* http://en.wikipedia.org/wiki/K-means_clustering
import com.twitter.scalding._
import com.twitter.algebird.{ MinHasher, MinHasher32, MinHashSignature }
/**
* Computes similar items (with a string itemId), based on approximate
* Jaccard similarity, using LSH.
*
* Assumes an input data TSV file of the following format:
*
* itemId userId