Last active
September 13, 2022 01:38
-
-
Save quininer/a412efaad8714fb6ed70 to your computer and use it in GitHub Desktop.
抓取u17漫画
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import asyncio | |
import json | |
from aiohttp import ClientSession | |
from lxml import etree | |
from argparse import ArgumentParser | |
from os import mkdir, listdir | |
from execjs import compile as compilejs | |
from base64 import b64decode | |
from PIL import Image | |
from io import BytesIO | |
async def name2comic(name: str, session) -> int: | |
print('[name]', name) | |
api = 'https://so.u17.com/all/{name}/m0_p1.html' | |
async with session.get(api.format(name=name)) as res: | |
return int(etree.HTML( | |
(await res.text()) | |
).xpath( | |
'//*[@id="comiclist"]/div/div[3]/ul/li/div/div[2]/h3/strong/a' | |
)[0].attrib['href'].split('/')[-1].split('.')[0]) | |
async def comic2chapter(comic_id: int, session) -> dict: | |
print('[comic]', comic_id) | |
api = 'https://www.u17.com/comic/ajax.php?mod=comic&act=get_chapters&comic_id={comic_id}' | |
async with session.get(api.format(comic_id=comic_id)) as res: | |
res = await res.text() | |
return json.loads(res) | |
async def chapter2image(chapter_id: int, session) -> dict: | |
print('[chapter]', chapter_id) | |
api = 'https://www.u17.com/chapter/{chapter_id}.html' | |
async with session.get(api.format(chapter_id=chapter_id)) as res: | |
script = [e.text for e in etree.HTML((await res.text())).xpath('/html/head/script') if bool(e.text)][-1] | |
js = compilejs( | |
'''var $ = {{ | |
evalJSON: JSON.parse | |
}}; | |
{script}'''.format(script=script) | |
) | |
return js.eval('image_config["image_list"]') | |
async def getimage(image_url: bytes, path: str, session): | |
if b'news.u17i.com' in image_url: | |
return | |
print('[image]', image_url) | |
async with session.get(image_url.decode()) as res: | |
Image.open(BytesIO((await res.read()))).save(path) | |
async def main(name:str=None, comic:int=None, chapter:int=None): | |
path = {} | |
async with ClientSession() as session: | |
chapter_num = 0 | |
if not comic and name: | |
try: | |
comic = await name2comic(name, session) | |
except: | |
exit("[!] 获取 comic id 出错。") | |
path['comic'] = str(comic) | |
if not path['comic'] in listdir(): mkdir(path['comic']) | |
l = await comic2chapter(comic, session) | |
if chapter: | |
for i in l: | |
if i['chapter_id'] == str(chapter): | |
chapter_num = l.index(i) | |
break | |
for i in l[chapter_num:]: | |
path['chapter'] = i['chapter_name'] | |
if not path['chapter'] in listdir(path['comic']): mkdir('{}/{}'.format(path['comic'], path['chapter'])) | |
imgs = await chapter2image(int(i['chapter_id']), session) | |
await asyncio.wait([ | |
getimage(b64decode(imgs[img]['src']), '{comic}/{chapter}/{img:0>3}.jpg'.format( | |
comic = path['comic'], | |
chapter = path['chapter'], | |
img=img | |
), session) for img in imgs | |
]) | |
if __name__ == '__main__': | |
parser = ArgumentParser() | |
parser.add_argument('--name', help="漫画名") | |
parser.add_argument('--comic', type=int, help="漫画ID") | |
parser.add_argument('--chapter', type=int, help="从某章节开始下载") | |
args = parser.parse_args() | |
if not (args.name or args.comic): | |
parser.print_help() | |
exit(0) | |
asyncio.run(main(args.name, args.comic, args.chapter)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment