Skip to content

Instantly share code, notes, and snippets.

@swayson
swayson / mongodb-generator-sample
Created March 3, 2015 07:19
MongoDB Find generator
from pymongo.connection import Connection
m = Connection()
db = m.reddit
votes = db.votes
cursor = votes.find().skip(0).limit(50000)
print "Setup cursor: %s" % cursor
"""Kernel K-means"""
# Author: Mathieu Blondel <[email protected]>
# License: BSD 3 clause
import numpy as np
from sklearn.base import BaseEstimator, ClusterMixin
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.utils import check_random_state
@swayson
swayson / regex_url
Created October 27, 2014 17:15
Regular Expression for URL
(\b((https?://|www\.)|[a-z0-9\.-]+?\.(com|co\.uk|org|net|info|ca)(?=[/ \W\b]))[^ \t\r\n<>]*?(?=(([\'\xe2\x80\x9c".?!,:;]|&(amp|lt|gt|quot);)+?)?(\.\.+|[<>]|\s|$)))
@swayson
swayson / lsa_hack.r
Created February 26, 2014 06:56 — forked from rpietro/lsa_hack.r
Analyze Text Similarity with R: Latent Semantic Analysis and Multidimentional Scaling
# script stolen from http://goo.gl/YbQyAQ
# install.packages("tm")
# install.packages("ggplot2")
# install.packages("lsa")
# install.packages("scatterplot3d")
#install.packages("SnowballC")
#if !(require('SnowballC')) then install.packages("SnowballC")
library(tm)
library(ggplot2)
@swayson
swayson / prep_edx_downloader
Created January 31, 2014 13:43
Shell script for preparation and use of the edx-downloader python script.
# Create a virtual environment using conda
conda create -n edx_downloader python=2.7 pip
activate edx_downloader
# Install dependencies:
pip install youtube-dl
pip install beautifulsoup4
# Get the script
git clone https://github.com/dmitrime/edx-downloader.git
@swayson
swayson / prep_coursera_dl
Created January 31, 2014 13:21
Shell script for preparation and use of the coursera-dl python script.
# Create a virtual environment using conda
conda create -n coursera_dl python=2.7 pip
# Get the code
git clone https://github.com/coursera-dl/coursera.git
cd coursera\
# Install the dependencies (using virtual environment)
activate coursera_dl
pip install -r requirements.txt