Kirk Kimmel kimmel

25 followers · 23 following

USA
kimmel.github.com

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

kimmel / method_bench.pl

Created September 19, 2012 12:22

A Perl benchmark of foreach loops

	#!/usr/bin/perl

	use v5.16;
	use warnings;
	use autodie qw( :all );
	use utf8::all;
	use List::MoreUtils qw( uniq any );
	use Benchmark qw( cmpthese :hireswallclock );

	my %file_names = ();

kimmel / gist:3689276

Created September 10, 2012 06:42

simple text matching with index

	#!/usr/bin/perl

	use v5.14;
	use warnings;
	use utf8::all;
	use File::Slurp qw( read_file );

	my $pattern_list = do 'fw.pl';
	my @patterns = keys $pattern_list;
	my $content = read_file( 'dracula.txt' );

kimmel / gist:3689246

Created September 10, 2012 06:32

scan dracula for 4k patterns

	#!/usr/bin/perl

	use v5.14;
	use warnings;
	use utf8::all;
	use File::Slurp qw( read_file );

	my $pattern_list = do 'fw.pl';
	my @patterns = keys $pattern_list;
	my $content = read_file( 'dracula.txt' );

kimmel / gist:3688579

Created September 10, 2012 02:57

brute force all match patterns

	#!/usr/bin/perl

	use v5.14;
	use warnings;
	use utf8::all;
	use File::Slurp qw( read_file );

	...

	my @patterns = map {qr/\b$_\b/ixms} keys $pattern_list;

kimmel / gist:3688004

Created September 9, 2012 23:55

text normalization and token splitting

	#!/usr/bin/perl

	use v5.14;
	use warnings;
	use utf8::all;
	use List::Util qw( reduce );
	use List::MoreUtils qw( uniq any );
	use Path::Class::Rule;
	use File::Slurp qw( read_file );

kimmel / gist:3681026

Created September 8, 2012 23:28

decode_json() takes a binary encoded string

	#!/usr/bin/perl

	use v5.14;
	use warnings;
	use utf8::all;
	use Encode;
	use Data::Dumper;
	use JSON::XS qw( decode_json );

	my $wl = '{"creche":"crèche", "¥":"£", "₡":"волн"}';

kimmel / gist:3482317

Created August 26, 2012 18:23

perl regexp html parsing

	$html =~ m{
	>\s$num\.</td>\s<td>\s<center>\s<a\s+id=up_
	(\d+) # $1 -> id
	\s+href="vote\?for=\g1&dir=up&whence=[%a-e0-9]+">\s*<img\s+src="http://yc
	ombinator\.com/images/grayarrow\.gif"\s+border=\d+\s+vspace=\d+\s+hspace=
	\d+>\s</a>\s<span\s+id=down_\g1>\s</span>\s</center>\s</td>\s<td\s+
	class="title">\s*<a\s+href="
	([^"]+) # $2 -> uri
	">
	([^<]+) # $3 -> desc

kimmel / gist:3482230

Created August 26, 2012 18:10

python HTMLParser regexp

	locatestarttagend = re.compile(r"""
	<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
	(?:[\s/]* # optional whitespace before attribute name
	(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
	(?:\s=+\s # value indicator
	(?:'[^']*' # LITA-enclosed value
	\|"[^"]*" # LIT-enclosed value
	\|(?!['"])[^>\s]* # bare value
	)
	)?(?:\s\|/(?!>))*

kimmel / gist:3482220

Created August 26, 2012 18:09

python Beautiful Soup regexp 2

	# Methods for supporting CSS selectors.

	tag_name_re = re.compile('^[a-z0-9]+$')

	# /^(\w+)\[(\w+)([=~\\|\^\$\]?)=?"?([^\]"])"?\]$/
	# \---/ \---/\-------------/ \-------/
	# \| \| \| \|
	# \| \| \| The value
	# \| \| ~,\|,^,$,* or =
	# \| Attribute

kimmel / gist:3482211

Created August 26, 2012 18:08

python Beautiful Soup regexp 1

	from datetime import datetime
	import BeautifulSoup as soup
	import requests

	...

	r = requests.get(host + page)
	doc = soup.BeautifulSoup(r.content)
	titles = doc.table.findAll(True, {'class': 'title'})