Created
January 19, 2022 06:54
-
-
Save scarf005/27b777440bafd5d721b5cc9505525f50 to your computer and use it in GitHub Desktop.
some ugly little snippet to find toc from isbn13
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from aiohttp import ClientSession as Session | |
from asyncio import run | |
from bs4 import BeautifulSoup as Soup | |
from pprint import pprint | |
from sys import argv | |
yes24 = "http://www.yes24.com" | |
async def query(session: Session, url: str) -> Soup: | |
async with session.get(url) as res: | |
html = await res.text() | |
return Soup(html, "html.parser") | |
async def main(isbn: int): | |
querystr = f"{yes24}/product/search?query={isbn}" | |
async with Session() as ses: | |
soup = await query(ses, querystr) | |
books = soup.find("div", class_="itemUnit") | |
anchor = books.find("a", class_="gd_name") | |
product = f"{yes24}/{anchor['href']}" | |
book = await query(ses, product) | |
toc_html = book.find("div", id="infoset_toc") | |
# TODO: fix here | |
toc_text = toc_html.find("textarea", class_="txtContentText").text | |
pprint(toc_text.split("__")) | |
if __name__ == "__main__": | |
run(main(argv[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment