Created
September 30, 2010 05:21
-
-
Save fish2000/604071 to your computer and use it in GitHub Desktop.
ad-hoc python interface to ISBNdb.com web service API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
isbnwrangle.py | |
Created by FI$H 2000 on 2010-03-24. | |
Copyright (c) 2010 OST, LLC. All rights reserved. | |
""" | |
import sys, os, unittest, pyisbn, urllib2, simplejson | |
from django.core.management import setup_environ | |
import settings | |
setup_environ(settings) | |
from django.core.exceptions import ObjectDoesNotExist | |
from pprint import pprint | |
from decorator import decorator | |
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup | |
from utils.monkeypatch import memoize, test | |
from ost2.forsale.models import FSItem, FSAmazonDataset, FSGetColorHash, FSUserProfile, FSISBNDBDataset | |
@decorator | |
def silent(f, *args, **kwargs): | |
try: | |
return f(*args, **kwargs) | |
except AttributeError: | |
return None | |
except IndexError: | |
return None | |
except ValueError: | |
return None | |
except TypeError: | |
return None | |
class ISBNDBEntry(object): | |
_isbn = None | |
access_key = None | |
url = None | |
data = None | |
soup = None | |
valid = False | |
def __init__(self, isbn): | |
super(ISBNDBEntry, self).__init__() | |
self._isbn = isbn | |
self.access_key = "????????" | |
self.url = "http://isbndb.com/api/books.xml?access_key=%s&results=details&results=prices&index1=isbn&value1=%s" % (self.access_key, self._isbn) | |
self.data = urllib2.urlopen(self.url).read() | |
self.soup = BeautifulStoneSoup(self.data) | |
try: | |
self.soup.find('booklist')['total_results'] | |
except ValueError: | |
pass | |
except IndexError: | |
pass | |
else: | |
self.valid = (int(self.soup.find('booklist')['total_results']) > 0) | |
@silent | |
def _get_isbn(self): | |
return self.soup.find('bookdata')['isbn'] | |
@silent | |
def _get_isbn13(self): | |
return self.soup.find('bookdata')['isbn13'] | |
@silent | |
def _get_title(self): | |
return self.soup.find('title').string | |
@silent | |
def _get_titlelong(self): | |
return self.soup.find('titlelong').string | |
@silent | |
def _get_authors(self): | |
return self.soup.find('authorstext').string | |
@silent | |
def _get_dewey(self): | |
return self.soup.find('details')['dewey_decimal_normalized'] | |
@silent | |
def _get_bookid(self): | |
return self.soup.find('bookdata')['book_id'] | |
@silent | |
def _get_publisherid(self): | |
return self.soup.find('publishertext')['publisher_id'] | |
@silent | |
def _get_publisher(self): | |
return self.soup.find('publishertext').string | |
isbn = property(_get_isbn) | |
isbn13 = property(_get_isbn13) | |
title = property(_get_title) | |
titlelong = property(_get_titlelong) | |
publisher = property(_get_publisher) | |
authors = property(_get_authors) | |
dewey = property(_get_dewey) | |
bookID = property(_get_bookid) | |
publisherID = property(_get_publisherid) | |
@memoize | |
def get_isbndb_info(isbn): | |
return ISBNDBEntry(isbn) | |
def FSRefreshISBNdbDatasetForBook(item): | |
iss = get_isbndb_info(str(item.ISBN)) | |
if iss.valid: | |
#dataset, created = FSISBNDBDataset.objects.get_or_create(ISBN=item.ISBN) | |
try: | |
dataset = FSISBNDBDataset.objects.get(ISBN=item.ISBN) | |
except ObjectDoesNotExist: | |
dataset = FSISBNDBDataset(ISBN=item.ISBN) | |
if iss.title: | |
dataset.title = unicode(iss.title).strip().strip(",") | |
if iss.titlelong: | |
dataset.titlelong = unicode(iss.titlelong).strip().strip(",") | |
if iss.publisher: | |
dataset.publisher = unicode(iss.publisher).strip().strip(",") | |
if iss.authors: | |
dataset.authors = unicode(iss.authors).strip().strip(",") | |
if iss.dewey: | |
dataset.dewey = iss.dewey | |
if iss.data: | |
dataset.xmldata = str(iss.data) | |
dataset.save() | |
else: | |
pass | |
return iss | |
class isbnwrangleTests(unittest.TestCase): | |
def setUp(self): | |
self.isbnxml = """ | |
<ISBNdb server_time="2010-03-24T05:28:44Z"> | |
<BookList total_results="1" page_size="10" page_number="1" shown_results="1"> | |
<BookData book_id="learning_from_las_vegas_a01" isbn="026272006X" isbn13="9780262720069"> | |
<Title>Learning from Las Vegas</Title> | |
<TitleLong>Learning from Las Vegas: the forgotten symbolism of architectural form</TitleLong> | |
<AuthorsText>Robert Venturi, Denise Scott Brown, Steven Izenour</AuthorsText> | |
<PublisherText publisher_id="mit_press">Cambridge, Mass. : MIT Press, c1977</PublisherText> | |
<Details change_time="2004-08-02T12:08:08Z" price_time="2010-03-18T13:10:29Z" edition_info="pbk" language="eng" physical_description_text="xvii, 192 p. : ill. ; 24 cm." lcc_number="NA735" dewey_decimal_normalized="720.979313" dewey_decimal="720/.9793/13" /> | |
</BookData> | |
</BookList> | |
</ISBNdb> | |
""" | |
@test | |
def _test_one_isbn(self): | |
ii = ISBNDBEntry('026272006X') | |
print "Title: %s" % ii.title | |
print "Long Title: %s" % ii.titlelong | |
print "ISBN: %s" % ii.isbn | |
print "ISBN13: %s" % ii.isbn13 | |
print "Authors: %s" % ii.authors | |
print "Dewey Number: %s" % ii.dewey | |
#return ii.data | |
@test | |
def test_isbndb(self): | |
count = 0 | |
matches = 0 | |
for fi in FSItem.objects.all(): | |
count += 1 | |
if not FSISBNDBDataset.objects.isbn(fi.ISBN): | |
iss = FSRefreshISBNdbDatasetForBook(fi) | |
if iss.valid: | |
if fi.ISBN == iss.isbn13: | |
matches += 1 | |
print "###\t %s\t\t(match)\t\t\t\t%s" % (fi.ISBN, fi.title) | |
else: | |
print "###\t %s\t\t(NO MATCH)\t\t\t%s" % (fi.ISBN, fi.title) | |
else: | |
print "###\t %s\t\t(INVALID ISBNdb DATA)\t\t%s" % (fi.ISBN, fi.title) | |
else: | |
print "###\t %s\t\t(preexisting data)" % fi.ISBN | |
print "###\t %s of %s items had ISBN13s matching ISBNDB values" % (matches, count) | |
@test | |
def test_isbndb13(self): | |
count = 0 | |
matches = 0 | |
for fi in FSItem.objects.all(): | |
count += 1 | |
iss = get_isbndb_info(str(fi.ISBN)) | |
#iss = ISBNDBEntry(fi.ISBN) | |
print ">>>\t Listed ISBN: \t\t\t%s" % fi.ISBN | |
print ">>>\t ISBNDB ISBN13: \t\t%s\t (%s)" % (iss.isbn13, fi.ISBN == iss.isbn13 and 'MATCH' or 'no match') | |
print "\n" | |
if fi.ISBN == iss.isbn13: | |
matches += 1 | |
print "###\t %s of %s items had ISBN13s matching ISBNDB values" % (matches, count) | |
@test | |
def _test_fixprices(self): | |
for fi in FSItem.objects.filter(price__isnull=True): | |
try: | |
fsdata = FSAmazonDataset.objects.isbn(fi.ISBN) | |
except ObjectDoesNotExist: | |
print "WTF: no FSAmazonDataset found for ISBN %s" % fi.ISBN | |
else: | |
if fsdata and (not fi.price): | |
if fsdata.listprice: | |
fi.price = int(fsdata.listprice/100) | |
fi.save() | |
print("%s\t '%s' new price is $%s" % (fi.ISBN, fi.title, fi.price)) | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment