Created
September 2, 2015 19:44
-
-
Save krysits/5aa70e5e080f2b75a1f3 to your computer and use it in GitHub Desktop.
Python Script that scrapes it-ebooks.net for search query and saves Book ID in text file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import urllib.request | |
import json | |
queryString = 'javascript' | |
ur = 'http://it-ebooks-api.info/v1/' | |
def getFile(urla): | |
response = urllib.request.urlopen(urla) | |
data = json.loads(response.readall().decode('utf-8')) | |
return data | |
def getBookByID(bookID): | |
urla = ur + 'book/' +str(bookID) | |
return getFile(urla) | |
def searchByKeyword(queri,pageNr=1): | |
urla = ur + 'search/' + queri + '/page/' + str(pageNr) | |
return getFile(urla) | |
def getAllBooks(queri): | |
rezults = [] | |
bodi = searchByKeyword(queri) | |
totalBooks = int(bodi["Total"]) | |
totalPages = totalBooks / 10 | |
counter = 0 | |
while(totalBooks > counter): | |
currentPage = bodi["Page"] | |
for buuk in bodi["Books"]: | |
oneID = buuk["ID"] | |
rezults.append(oneID) | |
counter += 1 | |
nextPage = int(currentPage) + 1 | |
if(nextPage < totalPages): | |
bodi = searchByKeyword(queri, nextPage) | |
return rezults | |
# run main | |
booksByCategory = getAllBooks(queryString) | |
fo = open(queryString + ".txt", "w") | |
for buukID in booksByCategory: | |
tekst = str(buukID) + "\n" | |
fo.write( tekst ); | |
fo.close() | |
#bodi = getBookByID('2279690981') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment