Created
June 25, 2018 22:08
-
-
Save Krazybug/906033c42042909e67706a61d998f98f to your computer and use it in GitHub Desktop.
Calibre Downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import os | |
import time | |
# - traiter les formats en stockant le hash du fichier | |
# - capturer les exceptions | |
# - mettre un cli | |
# - autoriser 3 modes: update metadata, update file et append filename | |
# - une query pou la recheche exemple http://localhost:8080/ajax/search?sort=id&sort_order=desc | |
# - bufferiser les fichiers | |
# - indice start/stop | |
# - stocker un index | |
# - id = timestamp + id process | |
# - cover et json avec le meme nom | |
# - mutualiser le code | |
# - json None | |
# - moteur de recherche | |
# - genartion de page html cliquable | |
# - mettre une taille limite | |
# - acces avec un mot de passe | |
# - mode debug | |
def get_file(url, path, id, format): | |
print(url) | |
r1=requests.get(url) | |
# r1=requests.get(url, stream=True) | |
try: | |
f_name=path+r1.headers['Content-Disposition'].split('filename=')[1].strip('"') | |
print(f_name) | |
except: | |
f_name=path+id+"."+format | |
os.makedirs(os.path.dirname(f_name), exist_ok=True) | |
with open(f_name, 'wb') as fd: | |
fd.write(r1.content) | |
# time.sleep(1) | |
# with open(f_name, 'wb') as fd: | |
# for chunk in r.iter_content(chunk_size=4096): | |
# fd.write(chunk) | |
def get_cover(url, path): | |
r1=requests.get(url) | |
# r1=requests.get(url, stream=True) | |
f_name=path+"cover.jpg" | |
print(f_name) | |
os.makedirs(os.path.dirname(f_name), exist_ok=True) | |
with open(f_name, 'wb') as fd: | |
fd.write(r1.content) | |
# with open(f_name, 'wb') as fd: | |
# for chunk in r.iter_content(chunk_size=4096): | |
# fd.write(chunk) | |
max_size=1000*1024*1024 | |
offset=0 | |
# offset=800 | |
num=25 | |
# num=3 | |
# server='http://localhost:8080/' | |
# server='http://209.44.124.40:8080' | |
# server='http://46.244.213.139:8080' | |
# url=server+'ajax/search?num=1' | |
server="http://188.96.212.209:8080" | |
#url=server+'/ajax/search/FSK18?num=0' | |
url=server+'/ajax/search/Zeitschrift_XXX?num=0' | |
print(url) | |
r=requests.get(url) | |
total_num=int(r.json()["total_num"]) | |
# total_num=7 | |
my_formats=['azw3', 'epub', 'pdf', 'mobi', 'doc', 'zip', 'txt', 'chm'] | |
# my_formats=['azw3', 'epub', 'doc', 'zip'] | |
range=offset+1 | |
while offset < total_num: | |
print("offset=", str(offset)) | |
url=server+'/ajax/search/Zeitschrift_XXX?num='+str(num)+'&offset='+str(offset) | |
print(url) | |
r=requests.get(url) | |
print("from: ", str(offset), " to: ", str(offset+int(r.json()['num']))) | |
book_ids=r.json()["book_ids"] | |
books_s=",".join(str(i) for i in r.json()['book_ids']) | |
url=server+'/ajax/books/Zeitschrift_XXX?ids='+books_s | |
r=requests.get(url) | |
print(url) | |
print(len(r.json())) | |
# mettre le rang avec la bovle te min de la requete et du stop total_num | |
for id in r.json().keys(): | |
print ('-> range={}/{}'.format(str(range),str(total_num))) | |
book={} | |
book['formats']=list(set(r.json()[id]['formats']) & set(my_formats)) | |
book['title']=r.json()[id]['title'] | |
print ('--> {}: {}'.format(id, book['title'])) | |
formats=book['formats'] | |
for f in formats: | |
if not 'size' in r.json()[id]['format_metadata'][f] or max_size < int(r.json()[id]['format_metadata'][f]['size']): | |
book['formats'].remove(f) | |
print ("format {} ignored for {}:'{}' too large)".format(f, id, book['title'])) | |
if not len(book['formats']): | |
print ("'{}' ignored: no more format available in {})".format(book['title'], (r.json()[id]['formats']))) | |
else: | |
for f in book['formats']: | |
if f in r.json()[id]['main_format']: | |
url_path=r.json()[id]['main_format'][f] | |
else: | |
url_path=r.json()[id]['other_formats'][f] | |
print ("--->", url_path) | |
url=server+url_path | |
f_path='import/'+id+'/' | |
get_file(url, f_path, id, f) | |
url_path=r.json()[id]['cover'] | |
url=server+url_path | |
f_path='import/'+id+'/' | |
print ("---->", url_path) | |
get_cover(url, f_path) | |
book['id']=id | |
book['source']=server+'/calibre/ajax/book/'+id | |
book['authors']=r.json()[id]['authors'] | |
book['uuid']=r.json()[id]['uuid'] | |
book['identifiers']=r.json()[id]['identifiers'] | |
book['pubdate']=r.json()[id]['pubdate'] | |
book['publisher']=r.json()[id]['publisher'] | |
book['languages']=r.json()[id]['languages'] | |
book['comments']=r.json()[id]['comments'] | |
book['series']=r.json()[id]['series'] | |
book['tags']=r.json()[id]['tags'] | |
print(book) | |
filename=f_path+'metadata.json' | |
os.makedirs(os.path.dirname(filename), exist_ok=True) | |
with open(filename, 'w') as fd: | |
json.dump(book, fd) | |
range=range+1 | |
offset=offset+num |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment