Skip to content

Instantly share code, notes, and snippets.

@fish2000
Created September 30, 2010 05:21
Show Gist options
  • Save fish2000/604071 to your computer and use it in GitHub Desktop.
Save fish2000/604071 to your computer and use it in GitHub Desktop.
ad-hoc python interface to ISBNdb.com web service API
#!/usr/bin/env python
# encoding: utf-8
"""
isbnwrangle.py
Created by FI$H 2000 on 2010-03-24.
Copyright (c) 2010 OST, LLC. All rights reserved.
"""
import sys, os, unittest, pyisbn, urllib2, simplejson
from django.core.management import setup_environ
import settings
setup_environ(settings)
from django.core.exceptions import ObjectDoesNotExist
from pprint import pprint
from decorator import decorator
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
from utils.monkeypatch import memoize, test
from ost2.forsale.models import FSItem, FSAmazonDataset, FSGetColorHash, FSUserProfile, FSISBNDBDataset
@decorator
def silent(f, *args, **kwargs):
try:
return f(*args, **kwargs)
except AttributeError:
return None
except IndexError:
return None
except ValueError:
return None
except TypeError:
return None
class ISBNDBEntry(object):
_isbn = None
access_key = None
url = None
data = None
soup = None
valid = False
def __init__(self, isbn):
super(ISBNDBEntry, self).__init__()
self._isbn = isbn
self.access_key = "????????"
self.url = "http://isbndb.com/api/books.xml?access_key=%s&results=details&results=prices&index1=isbn&value1=%s" % (self.access_key, self._isbn)
self.data = urllib2.urlopen(self.url).read()
self.soup = BeautifulStoneSoup(self.data)
try:
self.soup.find('booklist')['total_results']
except ValueError:
pass
except IndexError:
pass
else:
self.valid = (int(self.soup.find('booklist')['total_results']) > 0)
@silent
def _get_isbn(self):
return self.soup.find('bookdata')['isbn']
@silent
def _get_isbn13(self):
return self.soup.find('bookdata')['isbn13']
@silent
def _get_title(self):
return self.soup.find('title').string
@silent
def _get_titlelong(self):
return self.soup.find('titlelong').string
@silent
def _get_authors(self):
return self.soup.find('authorstext').string
@silent
def _get_dewey(self):
return self.soup.find('details')['dewey_decimal_normalized']
@silent
def _get_bookid(self):
return self.soup.find('bookdata')['book_id']
@silent
def _get_publisherid(self):
return self.soup.find('publishertext')['publisher_id']
@silent
def _get_publisher(self):
return self.soup.find('publishertext').string
isbn = property(_get_isbn)
isbn13 = property(_get_isbn13)
title = property(_get_title)
titlelong = property(_get_titlelong)
publisher = property(_get_publisher)
authors = property(_get_authors)
dewey = property(_get_dewey)
bookID = property(_get_bookid)
publisherID = property(_get_publisherid)
@memoize
def get_isbndb_info(isbn):
return ISBNDBEntry(isbn)
def FSRefreshISBNdbDatasetForBook(item):
iss = get_isbndb_info(str(item.ISBN))
if iss.valid:
#dataset, created = FSISBNDBDataset.objects.get_or_create(ISBN=item.ISBN)
try:
dataset = FSISBNDBDataset.objects.get(ISBN=item.ISBN)
except ObjectDoesNotExist:
dataset = FSISBNDBDataset(ISBN=item.ISBN)
if iss.title:
dataset.title = unicode(iss.title).strip().strip(",")
if iss.titlelong:
dataset.titlelong = unicode(iss.titlelong).strip().strip(",")
if iss.publisher:
dataset.publisher = unicode(iss.publisher).strip().strip(",")
if iss.authors:
dataset.authors = unicode(iss.authors).strip().strip(",")
if iss.dewey:
dataset.dewey = iss.dewey
if iss.data:
dataset.xmldata = str(iss.data)
dataset.save()
else:
pass
return iss
class isbnwrangleTests(unittest.TestCase):
def setUp(self):
self.isbnxml = """
<ISBNdb server_time="2010-03-24T05:28:44Z">
<BookList total_results="1" page_size="10" page_number="1" shown_results="1">
<BookData book_id="learning_from_las_vegas_a01" isbn="026272006X" isbn13="9780262720069">
<Title>Learning from Las Vegas</Title>
<TitleLong>Learning from Las Vegas: the forgotten symbolism of architectural form</TitleLong>
<AuthorsText>Robert Venturi, Denise Scott Brown, Steven Izenour</AuthorsText>
<PublisherText publisher_id="mit_press">Cambridge, Mass. : MIT Press, c1977</PublisherText>
<Details change_time="2004-08-02T12:08:08Z" price_time="2010-03-18T13:10:29Z" edition_info="pbk" language="eng" physical_description_text="xvii, 192 p. : ill. ; 24 cm." lcc_number="NA735" dewey_decimal_normalized="720.979313" dewey_decimal="720/.9793/13" />
</BookData>
</BookList>
</ISBNdb>
"""
@test
def _test_one_isbn(self):
ii = ISBNDBEntry('026272006X')
print "Title: %s" % ii.title
print "Long Title: %s" % ii.titlelong
print "ISBN: %s" % ii.isbn
print "ISBN13: %s" % ii.isbn13
print "Authors: %s" % ii.authors
print "Dewey Number: %s" % ii.dewey
#return ii.data
@test
def test_isbndb(self):
count = 0
matches = 0
for fi in FSItem.objects.all():
count += 1
if not FSISBNDBDataset.objects.isbn(fi.ISBN):
iss = FSRefreshISBNdbDatasetForBook(fi)
if iss.valid:
if fi.ISBN == iss.isbn13:
matches += 1
print "###\t %s\t\t(match)\t\t\t\t%s" % (fi.ISBN, fi.title)
else:
print "###\t %s\t\t(NO MATCH)\t\t\t%s" % (fi.ISBN, fi.title)
else:
print "###\t %s\t\t(INVALID ISBNdb DATA)\t\t%s" % (fi.ISBN, fi.title)
else:
print "###\t %s\t\t(preexisting data)" % fi.ISBN
print "###\t %s of %s items had ISBN13s matching ISBNDB values" % (matches, count)
@test
def test_isbndb13(self):
count = 0
matches = 0
for fi in FSItem.objects.all():
count += 1
iss = get_isbndb_info(str(fi.ISBN))
#iss = ISBNDBEntry(fi.ISBN)
print ">>>\t Listed ISBN: \t\t\t%s" % fi.ISBN
print ">>>\t ISBNDB ISBN13: \t\t%s\t (%s)" % (iss.isbn13, fi.ISBN == iss.isbn13 and 'MATCH' or 'no match')
print "\n"
if fi.ISBN == iss.isbn13:
matches += 1
print "###\t %s of %s items had ISBN13s matching ISBNDB values" % (matches, count)
@test
def _test_fixprices(self):
for fi in FSItem.objects.filter(price__isnull=True):
try:
fsdata = FSAmazonDataset.objects.isbn(fi.ISBN)
except ObjectDoesNotExist:
print "WTF: no FSAmazonDataset found for ISBN %s" % fi.ISBN
else:
if fsdata and (not fi.price):
if fsdata.listprice:
fi.price = int(fsdata.listprice/100)
fi.save()
print("%s\t '%s' new price is $%s" % (fi.ISBN, fi.title, fi.price))
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment