Skip to content

Instantly share code, notes, and snippets.

@Xnuvers007
Created October 14, 2025 03:43
Show Gist options
  • Save Xnuvers007/863c3bb7dff87cf3b8683f00c481343c to your computer and use it in GitHub Desktop.
Save Xnuvers007/863c3bb7dff87cf3b8683f00c481343c to your computer and use it in GitHub Desktop.
Webtoon scraper
import os
from sys import platform
try:
import requests
from bs4 import BeautifulSoup
from PIL import Image
except (ModuleNotFoundError, ImportError):
if platform == "win32":
os.system("pip install requests bs4 Pillow")
else:
os.system("pip3 install requests bs4 Pillow")
finally:
import requests
from bs4 import BeautifulSoup
from PIL import Image
def print_author_info():
print("=" * 60)
print("✨ Webtoon Scraper ✨")
print("-" * 60)
print("Author : Xnuvers007")
print("GitHub : https://github.com/Xnuvers007")
print("Email : Gosah kepo")
print("Instagram: Gosah Kepo")
print("-" * 60)
print("This tool allows you to scrape Webtoon content")
print("with ease and download episodes as PDF files.")
print("=" * 60)
print()
class WebtoonScraper:
def main():
genres = input("Masukkan genre webtoon (Example: action): ")
url = f"https://www.webtoons.com/id/{genres}"
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": "https://www.webtoons.com/"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.text, "html.parser")
items = soup.find_all("li")
count_series = soup.find('div', class_='series_count')
if count_series:
print(f"πŸ“š Jumlah Webtoon: {count_series.text.strip()}")
else:
print("❌ Tidak ditemukan jumlah webtoon.")
for idx, item in enumerate(items, start=1):
a_tag = item.find("a", class_="link _genre_title_a")
if not a_tag:
continue
title_tag = item.find("strong", class_="title")
author_tag = item.find("div", class_="author")
view_tag = item.find("div", class_="view_count")
img_tag = item.find("img")
title = title_tag.get_text(strip=True) if title_tag else "N/A"
author = author_tag.get_text(strip=True) if author_tag else "N/A"
view = view_tag.get_text(strip=True) if view_tag else "N/A"
image = img_tag["src"] if img_tag else "N/A"
link = a_tag["href"] if a_tag else "N/A"
print(f"{idx}. Judul : {title}")
print(f" Penulis : {author}")
print(f" View : {view}")
print(f" Gambar : {image}")
print(f" Link Webtoon: {link}")
print("-" * 50)
else:
print(f"Gagal mengakses halaman. Status code: {response.status_code}")
def episodeList():
get_link = input("Masukkan link webtoon (Example: https://www.webtoons.com/id/slice-of-life/wee/list?title_no=3085): ").strip()
print("\n\n")
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": "https://www.webtoons.com/"
}
response_eps = requests.get(get_link, headers=headers)
soup = BeautifulSoup(response_eps.text, "html.parser")
unordered_list = soup.find("ul", id="_listUl")
for li in unordered_list.find_all("li"):
a = li.find('a')
if a and a.has_attr('href'):
print(a['href'])
print("\n\nπŸ“„ Pagination Links:")
if "&page=" in get_link:
base_url = get_link.split("&page=")[0]
else:
base_url = get_link
page_div = soup.find("div", class_="paginate")
printed_pages = set()
if page_div:
for link in page_div.find_all("a"):
href = link.get("href")
if href == "#":
page_number = 1
else:
split_parts = href.split("page=")
page_number = int(split_parts[1]) if len(split_parts) > 1 else 1
if page_number not in printed_pages:
full_url = f"{base_url}&page={page_number}"
print(full_url)
printed_pages.add(page_number)
def get_content():
get_link = input("Masukkan link episode webtoon (example: https://www.webtoons.com/id/slice-of-life/wee/episode-363/viewer?title_no=3085&episode_no=366): ").strip()
folder_name = input("Masukkan nama folder penyimpanan (judul/episode): ").strip()
headers = {
"User-Agent": "Mozilla/5.0",
"Referer": "https://www.webtoons.com/"
}
response_eps = requests.get(get_link, headers=headers)
soup = BeautifulSoup(response_eps.text, "html.parser")
image_list_div = soup.find("div", id="_imageList")
if not image_list_div:
print("❌ Tidak ditemukan konten gambar dalam halaman ini.")
return
image_tags = image_list_div.find_all("img", class_="_images")
if not image_tags:
print("❌ Tidak ada gambar ditemukan.")
return
if not os.path.exists(folder_name):
os.makedirs(folder_name)
print(f"\nπŸ–ΌοΈ Ditemukan {len(image_tags)} gambar. Mulai mengunduh...\n")
for idx, img in enumerate(image_tags, start=1):
img_url = img.get("data-url")
if not img_url:
continue
ext = img_url.split("?")[0].split(".")[-1]
filename = f"{idx:03d}.{ext}"
filepath = os.path.join(folder_name, filename)
try:
img_data = requests.get(img_url, headers=headers).content
with open(filepath, "wb") as f:
f.write(img_data)
print(f"βœ… Gambar {idx} berhasil disimpan: {filepath}")
except Exception as e:
print(f"❌ Gagal menyimpan gambar {idx}: {e}")
print(f"\nβœ… Semua gambar selesai diunduh ke folder: {folder_name}")
WebtoonScraper.convert_to_pdf(folder_name)
def convert_to_pdf(folder_name):
print(f"\nπŸ“„ Menggabungkan gambar di folder '{folder_name}' menjadi PDF...")
images = []
for filename in sorted(os.listdir(folder_name)):
if filename.lower().endswith((".png", ".jpg", ".jpeg")):
img_path = os.path.join(folder_name, filename)
img = Image.open(img_path).convert("RGB")
images.append(img)
if images:
pdf_path = os.path.join(folder_name, f"{folder_name}.pdf")
try:
images[0].save(pdf_path, save_all=True, append_images=images[1:])
print(f"βœ… PDF berhasil dibuat: {pdf_path}")
except Exception as e:
print(f"❌ Gagal membuat PDF: {e}")
else:
print("❌ Tidak ada gambar yang bisa digabungkan.")
if __name__ == "__main__":
try:
print_author_info()
WebtoonScraper.main()
WebtoonScraper.episodeList()
WebtoonScraper.get_content()
except Exception as e:
print(f"❌ Terjadi kesalahan: {e}")
print("❌ Mohon coba lagi.")
exit(1)
except KeyboardInterrupt:
print("\n❌ Pengguna membatalkan operasi.")
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment