Skip to content

Instantly share code, notes, and snippets.

View edsu's full-sized avatar

Ed Summers edsu

View GitHub Profile
@edsu
edsu / bots.py
Created June 23, 2011 20:16
see what your bot traffic is like
#!/usr/bin/env python
"""
Hack to look for user agent strings in typical Apache style log and
count up the number of requests by bots vs non-bots. The list of bot
user agents comes from http://www.user-agents.org/ but has had some
agents added to it, since the user-agents.org list is pretty out of date, e.g.
no Bing?!
"""
@edsu
edsu / pmc-licenses.py
Created August 2, 2011 01:43
prints out PubMedCentra IDs and their associated license URL (if one is found in the article XML)
#!/usr/bin/env python
"""
Script to go through all the OAI-PMH records in the PubMedCentral database and
print out a tab delimited list of record identifiers and a license url (if one
is included).
You'll need lxml installed to run this.
"""
@edsu
edsu / gen.py
Created August 19, 2011 02:51
a generator example
def words(filename):
for line in open(filename):
words = line.split(' ')
for word in words:
yield word
for word in words('file.txt'):
print word
@edsu
edsu / resources.py
Created August 30, 2011 02:54
tastypie resources module
from tastypie import fields
from tastypie.resources import ModelResource
from tastypie.authorization import DjangoAuthorization
from tastypie.authentication import ApiKeyAuthentication
from tasty.pie.models import Book, Author
class BookResource(ModelResource):
authors = fields.ToManyField('tasty.pie.api.resources.AuthorResource', 'authors', full=True)
class Meta:
@edsu
edsu / models.py
Created August 30, 2011 02:58
django models module
from django.db import models
class Book(models.Model):
title = models.CharField(max_length=15)
class Author(models.Model):
name = models.CharField(max_length=255)
book = models.ForeignKey('Book', related_name='authors')
class EssayAdmin(VersionAdmin, ManyToManyAdmin):
form = EssayForm
# helper to put LCCNs mentioned in an essay into the list display
def lccns(obj):
links = []
for title in obj.subjects.all():
a = '<a href="http://chroniclingamerica.loc.gov/lccn/%s>%s</a>' % (title.lccn, title.lccn)
links.append(a)
return mark_safe(', '.join(links))
class CampaignResource(ModelResource):
work = fields.ForeignKey(WorkResource, 'work')
class Meta:
queryset = models.Campaign.objects.all()
resource_name = 'campaign'
excludes = ['amazon_receiver', 'paypal_receiver']
def override_urls(self):
return [
@edsu
edsu / ex.py
Created September 26, 2011 19:17
microdata experiment
>>> import microdata
>>> import urllib
>>> html = urllib.urlopen('http://lx6.loc.gov').read()
>>> items = microdata.get_items(html)
>>> print len(items)
30
>>> print items[0].tweet_time
2011-02-15 06:00
>>> print items[0].json()
{
@edsu
edsu / test.rb
Created September 27, 2011 14:23
#!/usr/bin/env ruby
require 'rubygems'
require 'bagit'
bag = BagIt::Bag.new 'test-data'
ed@curry:~/Projects/bagit-ruby$ rake spec
(in /home/ed/Projects/bagit-ruby)
/usr/bin/ruby1.8 -S bundle exec rspec -fs --color spec/tag_info_spec.rb spec/manifest_spec.rb spec/validation_spec.rb spec/bagit_spec.rb spec/fetch_spec.rb
Tag Info Files
bagit.txt