Last active
December 26, 2021 15:04
-
-
Save vitaminac/ad16144e9f6afb49b042e4d0d2d28699 to your computer and use it in GitHub Desktop.
狐妖小红娘漫画下载
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import errno | |
import json | |
import os | |
import re | |
from base64 import b64decode | |
from re import compile | |
import aiohttp | |
import bs4 | |
import execjs | |
BASE64_PATTERN = compile("packed=\"([^\"]+)\"") | |
SAVE_FILE = "saved.json" | |
HOST = "http://m.katui.net" | |
STATIC_CONTENT_SERVER = "http://katui.700mh.com/" | |
async def fetch(session, url): | |
async with session.get(url) as response: | |
return await response.text(encoding="GBK") | |
async def get_chapter_image_urls(session, chapter_url: str): | |
html = await fetch(session, chapter_url) | |
base64 = BASE64_PATTERN.search(html)[1] | |
decode = str(b64decode(base64), 'utf-8') | |
js = decode[4:] | |
arr = execjs.eval(js) | |
js_get: str = arr[14:] | |
return execjs.eval(js_get) | |
async def download_image(session, index, title, page, url): | |
url = STATIC_CONTENT_SERVER + url | |
filename = str(index) + "-" + title + "/" + str(page) + ".jpg" | |
filename = re.sub('[!?]', '_', filename) | |
if not os.path.exists(filename): | |
if not os.path.exists(os.path.dirname(filename)): | |
try: | |
os.makedirs(os.path.dirname(filename)) | |
except OSError as exc: # Guard against race condition | |
if exc.errno != errno.EEXIST: | |
raise | |
async with session.get(url) as r: | |
with open(filename, "wb") as f: | |
f.write(await r.read()) | |
async def collect_images(images, session, index, title, chapter_url, downloaded: list): | |
for page, url in enumerate(await get_chapter_image_urls(session, chapter_url)): | |
images.append(download_image(session, index, title, page, url)) | |
downloaded.append(chapter_url) | |
async def main(): | |
downloaded = None | |
with open(SAVE_FILE, encoding="UTF-8") as f: | |
downloaded = json.load(f) | |
async with aiohttp.ClientSession(headers={ | |
"User-Agent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19" | |
}) as session: | |
chapters = [] | |
images = [] | |
html = await fetch(session, "http://m.katui.net/manhua/1/") | |
soup = bs4.BeautifulSoup(html, "html.parser") | |
chapter_links = list(soup.select("#c2 > dl > dt > a")) | |
chapter_links.reverse() | |
for index, chapter in enumerate(chapter_links): | |
title = chapter.text | |
chapter_url: str = HOST + chapter.attrs["href"] | |
if chapter_url not in downloaded: | |
chapters.append(collect_images(images, session, | |
index, title, chapter_url, downloaded)) | |
if chapters: | |
await asyncio.wait(chapters) | |
if images: | |
await asyncio.wait(images) | |
with open(SAVE_FILE, "w", encoding="UTF-8") as f: | |
json.dump(downloaded, f) | |
if __name__ == '__main__': | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment