Skip to content

Instantly share code, notes, and snippets.

@scarf005
Created January 19, 2022 06:54
Show Gist options
  • Save scarf005/27b777440bafd5d721b5cc9505525f50 to your computer and use it in GitHub Desktop.
Save scarf005/27b777440bafd5d721b5cc9505525f50 to your computer and use it in GitHub Desktop.
some ugly little snippet to find toc from isbn13
#!/usr/bin/env python3
from aiohttp import ClientSession as Session
from asyncio import run
from bs4 import BeautifulSoup as Soup
from pprint import pprint
from sys import argv
yes24 = "http://www.yes24.com"
async def query(session: Session, url: str) -> Soup:
async with session.get(url) as res:
html = await res.text()
return Soup(html, "html.parser")
async def main(isbn: int):
querystr = f"{yes24}/product/search?query={isbn}"
async with Session() as ses:
soup = await query(ses, querystr)
books = soup.find("div", class_="itemUnit")
anchor = books.find("a", class_="gd_name")
product = f"{yes24}/{anchor['href']}"
book = await query(ses, product)
toc_html = book.find("div", id="infoset_toc")
# TODO: fix here
toc_text = toc_html.find("textarea", class_="txtContentText").text
pprint(toc_text.split("__"))
if __name__ == "__main__":
run(main(argv[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment