Skip to content

Instantly share code, notes, and snippets.

View cheekybastard's full-sized avatar

cheekybastard

View GitHub Profile
# coding=UTF-8
from __future__ import division
import re
# This is a naive text summarization algorithm
# Created by Shlomi Babluki
# April, 2013
class SummaryTool(object):
# coding=UTF-8
import nltk
from nltk.corpus import brown
# This is a fast and simple noun phrase extractor (based on NLTK)
# Feel free to use it, just keep a link back to this post
# http://thetokenizer.com/2013/05/09/efficient-way-to-extract-the-main-topics-of-a-sentence/
# Create by Shlomi Babluki
# May, 2013
# http://preshing.com/20110831/penrose-tiling-explained
import math
import cmath
import cairo
#------ Configuration --------
IMAGE_SIZE = (1000, 1000)
NUM_SUBDIVISIONS = 8
#-----------------------------
#! /usr/bin/env python
# See http://preshing.com/20130115/view-your-filesystem-history-using-python
import optparse
import os
import fnmatch
import time
# Parse options
parser = optparse.OptionParser(usage='Usage: %prog [options] path [path2 ...]')
parser.add_option('-g', action='store', type='long', dest='secs', default=10,
# https://github.com/clutchio/clutch/blob/master/django_ext/http.py
#
# Copyright 2012 Twitter
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# http://pythonsweetness.tumblr.com/post/45227295342/fast-pypy-compatible-ordered-map-in-89-lines-of-python
"""
The great innovation here is that unlike a dictionary, skip lists keep their items permanently in order,
making it possible to efficiently walk the collection backwards and forwards, from start or end,
or from an arbitrary key, much more efficiently than would ever be possible with a dict.
I needed this class to implement efficient indexing of an in-memory collection,
where updating a single item would also update its position in a sorted list,
without the cost of having to re-build or re-sort that list on every update.
Inserting 938,075 string keys of ~8-10 bytes manages 75k inserts/second on PyPy on a Core 2 Duo,
# Rewritten code from /r2/r2/lib/db/_sorts.pyx
# http://amix.dk/blog/post/19588 # How Reddit ranking algorithms work
# http://amix.dk/blog/post/19574 # How Hacker News ranking algorithm works
# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
from math import sqrt
def _confidence(ups, downs):
n = ups + downs
/* =========================================================================
CurveZMQ - authentication and confidentiality for 0MQ
-------------------------------------------------------------------------
Copyright (c) 1991-2013 iMatix Corporation <www.imatix.com>
Copyright other contributors as noted in the AUTHORS file.
This is free software; you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
@cheekybastard
cheekybastard / reddit_scrape.py
Created March 17, 2013 05:19
reddit scraper
#!/usr/local/bin/python
'''
Date July 4, 2012
Author: Justin Jessup
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Disclaimer:
All software provided as is. All software covered under the GPL license and free for public redistribution.
@cheekybastard
cheekybastard / default_settings.py
Created March 16, 2013 06:35
Asynchronous FTP file download handler for scrapy
#scrapy/settings/default_settings.py
'http': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
'https': 'scrapy.core.downloader.handlers.http.HttpDownloadHandler',
's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
+ 'ftp': 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler',
}