Created
October 14, 2025 03:43
-
-
Save Xnuvers007/863c3bb7dff87cf3b8683f00c481343c to your computer and use it in GitHub Desktop.
Webtoon scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| from sys import platform | |
| try: | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from PIL import Image | |
| except (ModuleNotFoundError, ImportError): | |
| if platform == "win32": | |
| os.system("pip install requests bs4 Pillow") | |
| else: | |
| os.system("pip3 install requests bs4 Pillow") | |
| finally: | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from PIL import Image | |
| def print_author_info(): | |
| print("=" * 60) | |
| print("β¨ Webtoon Scraper β¨") | |
| print("-" * 60) | |
| print("Author : Xnuvers007") | |
| print("GitHub : https://github.com/Xnuvers007") | |
| print("Email : Gosah kepo") | |
| print("Instagram: Gosah Kepo") | |
| print("-" * 60) | |
| print("This tool allows you to scrape Webtoon content") | |
| print("with ease and download episodes as PDF files.") | |
| print("=" * 60) | |
| print() | |
| class WebtoonScraper: | |
| def main(): | |
| genres = input("Masukkan genre webtoon (Example: action): ") | |
| url = f"https://www.webtoons.com/id/{genres}" | |
| headers = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Referer": "https://www.webtoons.com/" | |
| } | |
| response = requests.get(url, headers=headers) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| items = soup.find_all("li") | |
| count_series = soup.find('div', class_='series_count') | |
| if count_series: | |
| print(f"π Jumlah Webtoon: {count_series.text.strip()}") | |
| else: | |
| print("β Tidak ditemukan jumlah webtoon.") | |
| for idx, item in enumerate(items, start=1): | |
| a_tag = item.find("a", class_="link _genre_title_a") | |
| if not a_tag: | |
| continue | |
| title_tag = item.find("strong", class_="title") | |
| author_tag = item.find("div", class_="author") | |
| view_tag = item.find("div", class_="view_count") | |
| img_tag = item.find("img") | |
| title = title_tag.get_text(strip=True) if title_tag else "N/A" | |
| author = author_tag.get_text(strip=True) if author_tag else "N/A" | |
| view = view_tag.get_text(strip=True) if view_tag else "N/A" | |
| image = img_tag["src"] if img_tag else "N/A" | |
| link = a_tag["href"] if a_tag else "N/A" | |
| print(f"{idx}. Judul : {title}") | |
| print(f" Penulis : {author}") | |
| print(f" View : {view}") | |
| print(f" Gambar : {image}") | |
| print(f" Link Webtoon: {link}") | |
| print("-" * 50) | |
| else: | |
| print(f"Gagal mengakses halaman. Status code: {response.status_code}") | |
| def episodeList(): | |
| get_link = input("Masukkan link webtoon (Example: https://www.webtoons.com/id/slice-of-life/wee/list?title_no=3085): ").strip() | |
| print("\n\n") | |
| headers = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Referer": "https://www.webtoons.com/" | |
| } | |
| response_eps = requests.get(get_link, headers=headers) | |
| soup = BeautifulSoup(response_eps.text, "html.parser") | |
| unordered_list = soup.find("ul", id="_listUl") | |
| for li in unordered_list.find_all("li"): | |
| a = li.find('a') | |
| if a and a.has_attr('href'): | |
| print(a['href']) | |
| print("\n\nπ Pagination Links:") | |
| if "&page=" in get_link: | |
| base_url = get_link.split("&page=")[0] | |
| else: | |
| base_url = get_link | |
| page_div = soup.find("div", class_="paginate") | |
| printed_pages = set() | |
| if page_div: | |
| for link in page_div.find_all("a"): | |
| href = link.get("href") | |
| if href == "#": | |
| page_number = 1 | |
| else: | |
| split_parts = href.split("page=") | |
| page_number = int(split_parts[1]) if len(split_parts) > 1 else 1 | |
| if page_number not in printed_pages: | |
| full_url = f"{base_url}&page={page_number}" | |
| print(full_url) | |
| printed_pages.add(page_number) | |
| def get_content(): | |
| get_link = input("Masukkan link episode webtoon (example: https://www.webtoons.com/id/slice-of-life/wee/episode-363/viewer?title_no=3085&episode_no=366): ").strip() | |
| folder_name = input("Masukkan nama folder penyimpanan (judul/episode): ").strip() | |
| headers = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Referer": "https://www.webtoons.com/" | |
| } | |
| response_eps = requests.get(get_link, headers=headers) | |
| soup = BeautifulSoup(response_eps.text, "html.parser") | |
| image_list_div = soup.find("div", id="_imageList") | |
| if not image_list_div: | |
| print("β Tidak ditemukan konten gambar dalam halaman ini.") | |
| return | |
| image_tags = image_list_div.find_all("img", class_="_images") | |
| if not image_tags: | |
| print("β Tidak ada gambar ditemukan.") | |
| return | |
| if not os.path.exists(folder_name): | |
| os.makedirs(folder_name) | |
| print(f"\nπΌοΈ Ditemukan {len(image_tags)} gambar. Mulai mengunduh...\n") | |
| for idx, img in enumerate(image_tags, start=1): | |
| img_url = img.get("data-url") | |
| if not img_url: | |
| continue | |
| ext = img_url.split("?")[0].split(".")[-1] | |
| filename = f"{idx:03d}.{ext}" | |
| filepath = os.path.join(folder_name, filename) | |
| try: | |
| img_data = requests.get(img_url, headers=headers).content | |
| with open(filepath, "wb") as f: | |
| f.write(img_data) | |
| print(f"β Gambar {idx} berhasil disimpan: {filepath}") | |
| except Exception as e: | |
| print(f"β Gagal menyimpan gambar {idx}: {e}") | |
| print(f"\nβ Semua gambar selesai diunduh ke folder: {folder_name}") | |
| WebtoonScraper.convert_to_pdf(folder_name) | |
| def convert_to_pdf(folder_name): | |
| print(f"\nπ Menggabungkan gambar di folder '{folder_name}' menjadi PDF...") | |
| images = [] | |
| for filename in sorted(os.listdir(folder_name)): | |
| if filename.lower().endswith((".png", ".jpg", ".jpeg")): | |
| img_path = os.path.join(folder_name, filename) | |
| img = Image.open(img_path).convert("RGB") | |
| images.append(img) | |
| if images: | |
| pdf_path = os.path.join(folder_name, f"{folder_name}.pdf") | |
| try: | |
| images[0].save(pdf_path, save_all=True, append_images=images[1:]) | |
| print(f"β PDF berhasil dibuat: {pdf_path}") | |
| except Exception as e: | |
| print(f"β Gagal membuat PDF: {e}") | |
| else: | |
| print("β Tidak ada gambar yang bisa digabungkan.") | |
| if __name__ == "__main__": | |
| try: | |
| print_author_info() | |
| WebtoonScraper.main() | |
| WebtoonScraper.episodeList() | |
| WebtoonScraper.get_content() | |
| except Exception as e: | |
| print(f"β Terjadi kesalahan: {e}") | |
| print("β Mohon coba lagi.") | |
| exit(1) | |
| except KeyboardInterrupt: | |
| print("\nβ Pengguna membatalkan operasi.") | |
| exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment