Created
December 21, 2019 23:11
-
-
Save cjavad/9d1491075c30f57794d9e476d79973d5 to your computer and use it in GitHub Desktop.
Python script and downloads manga chapters from mangadex.org, works as a executable and takes args. (Downloads to folders it creates) REQUIRES SELENIUM and matching CHROME+CHROMEDRIVER.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
import os | |
import sys | |
import argparse | |
''' | |
This function takes a string and checks if it uses the format [http(s)][://mangadex.org/chapter/]* | |
aka. if it's a valid link to a mangadex manga/chapter. | |
It returns a bool depending on the result (True/False) | |
''' | |
def check_if_valid_url(url): | |
if "http" in url and "://mangadex.org/chapter/" in url and not "gap" in url: | |
return True | |
else: | |
return False | |
# Using the sleep function to pause and wait for the webpage to load | |
from time import sleep | |
# Import selenium webdirver | |
from selenium import webdriver | |
# Importing multiple elements from the webdriver/common namespace | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.action_chains import ActionChains | |
from selenium.webdriver.chrome.options import Options | |
from selenium.common.exceptions import TimeoutException, WebDriverException | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions | |
# string result of type(driver) added as a const to use for checking | |
DRIVER_TYPE = "<class 'selenium.webdriver.chrome.webdriver.WebDriver'>" | |
# Basic init function that creates a hidden (headless) webdriver element | |
def init_driver(path = "./chromedriver"): | |
CHROMEDRIVER_PATH = path | |
WINDOW_SIZE = "1920,1080" | |
chrome_options = Options() | |
chrome_options.add_argument("--headless") | |
chrome_options.add_argument("--window-size=%s" % WINDOW_SIZE) | |
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, options=chrome_options) | |
return driver | |
# This function can take a webdriver element that has loaded a mangadex page and find the next chapter link | |
def nextchapter(driver): | |
try: | |
next_chapter = driver.find_element_by_class_name("chapter-link-right").get_attribute("href") | |
return next_chapter | |
except: | |
return False | |
''' | |
This function takes a baselink mangadex image host that uses the format *[/*0.jpg] | |
and increases the number to fit the amount of pages in the manga | |
''' | |
def getallpages(baselink, total_pages): | |
counter = 1 | |
all_pages = [] | |
s1 = baselink.split("/") | |
s2 = s1[len(s1)-1] | |
while counter <= int(total_pages): | |
s = baselink.split("/") | |
s.pop() | |
s.append(s2.replace("1", str(counter))) | |
all_pages.append("/".join(s)) | |
counter += 1 | |
return all_pages | |
''' | |
Basic loading function for a chapter page, gets all the information that you can get from a chapter and returns it like this: | |
{ | |
"title":manga title, | |
"ctitle":chapter title, | |
"total_pages":total pages, | |
"pages":[jpg links], | |
"next_chapter": link to next chapter | |
} | |
''' | |
def load_mangadex_chapter(driver, chapter_link, delay = 2): | |
driver.get(chapter_link) | |
try: | |
sleep(delay/2) | |
alert = driver.find_element_by_class_name("message") # for gap situation | |
ActionChains(driver).move_to_element(alert).click().perform() # | |
WebDriverWait(driver, delay/2).until(expected_conditions.presence_of_element_located((By.XPATH, "html/body/div[1]/div[2]/div[2]/div/img"))) | |
except Exception: | |
sleep(1) # wait for one more second | |
mid = chapter_link.split("/")[4] | |
title = driver.find_element_by_class_name("manga-link").text | |
ctitle = driver.find_element_by_xpath("html/body/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/select/option[@value='{}']".format(mid)).text | |
total_pages = driver.find_element_by_class_name("total-pages").text | |
baselink = driver.find_element_by_xpath("html/body/div[1]/div[2]/div[2]/div/img").get_attribute("src") | |
info_sheet = { | |
"title":title, | |
"ctitle":ctitle, | |
"total_pages":total_pages, | |
"pages":getallpages(baselink, total_pages), | |
"next_chapter": nextchapter(driver) | |
} | |
return info_sheet | |
''' | |
This function loads an entire manga from a starting chapter to the last chapter in the series. | |
It returns a dict like this: | |
{ | |
"title": manga title, | |
"total_pages": total pages in manga, | |
"chapters":[ | |
{ | |
"title": chapter title, | |
"pages": [jpg links] | |
} ... | |
] | |
} | |
''' | |
def get_whole_mangadex(driver, first_chapter, single_chapter = False): | |
more_chapters = True | |
c_chapterlink = first_chapter | |
info_sheet = { | |
"title":"", | |
"total_pages":0, | |
"chapters":[] | |
} | |
while more_chapters: | |
cc = load_mangadex_chapter(driver, c_chapterlink) | |
info_sheet["chapters"].append({ | |
"title": cc["ctitle"], | |
"pages": cc["pages"] | |
}) | |
info_sheet["total_pages"] += int(cc["total_pages"]) | |
info_sheet["title"] = cc["title"] | |
if single_chapter: | |
more_chapters = False | |
break | |
if cc["next_chapter"] and "chapter" in cc["next_chapter"]: | |
c_chapterlink = cc["next_chapter"] | |
more_chapters = True | |
continue | |
else: | |
more_chapters = False | |
break | |
return info_sheet | |
''' | |
Downloads a jpg or raw file from a link using requests. Returns raw data. | |
''' | |
def download_link(url): | |
r = requests.get(url, headers={"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}) | |
r.close() | |
return r.content | |
''' | |
Downloads a single mangadex chapter | |
''' | |
def download_chapter(chaptername, pages): | |
chaptername = "-".join(chaptername.split(" ")) | |
try: | |
os.mkdir(chaptername) | |
except FileExistsError: | |
None | |
for j, i in enumerate(pages): | |
name = "page" + str(j) + ".jpg" | |
path = os.path.join("./" + chaptername, name) | |
downloaded_content = download_link(i) | |
with open(path, "wb") as f: | |
f.write(downloaded_content) | |
''' | |
Downloads an entire manga (or just a single chapter) | |
''' | |
def download_manga(first_chapter_link, single_chapter = False, driver = False): | |
driver = (driver if str(type(driver)) == DRIVER_TYPE else init_driver()) | |
manga = get_whole_mangadex(driver, first_chapter_link, single_chapter) | |
if single_chapter: | |
print("Downloading 1 chapter...") | |
else: | |
print(len(manga["chapters"]), "chapters found in the manga", manga["title"]) | |
try: | |
os.mkdir(manga["title"]) | |
os.chdir(manga["title"]) | |
except FileExistsError: | |
os.chdir(manga["title"]) | |
for i, c in enumerate(manga["chapters"]): | |
download_chapter(c["title"], c["pages"]) | |
print("Downloaded", " »" + c["title"] + "« ", str(i + 1) + "/" + str(len(manga["chapters"]))) | |
print("\n") | |
os.chdir("..") | |
# argparse arguments | |
parser = argparse.ArgumentParser(description='Download a mangadex chapter or a whole manga.') | |
parser.add_argument('url', metavar='url', type=str, default=False, nargs='?', help='mangadex chapter link') | |
parser.add_argument('-s','--single', action='store_const', default=False, const=True, help='Use this option to only download the linked chapter') | |
parser.add_argument('-L', '--list', metavar='urls', default=False, nargs='*', help='Use this for multiple chapters. Combine with -s or --single to only download those chapters') | |
parser.add_argument('-D', '--driver', metavar='driver', default=False, help='Path for chromedriver') | |
# main function | |
if __name__ == "__main__": | |
global global_driver | |
parsed = parser.parse_args(sys.argv[1:]) | |
# init driver | |
if parsed.driver: | |
try: | |
global_driver = init_driver(parsed.driver) | |
if str(type(global_driver)) == DRIVER_TYPE: | |
# driver OK. | |
custom_driver = True | |
else: | |
global_driver = init_driver() | |
except WebDriverException: | |
global_driver = init_driver() | |
else: | |
custom_driver = False | |
global_driver = init_driver() | |
# check which method is used and download accordingly | |
if parsed.url: | |
if check_if_valid_url(parsed.url): | |
download_manga(parsed.url, parsed.single, global_driver) | |
else: | |
print('Please use a valid url') | |
elif parsed.list: | |
for i in parsed.list: | |
if check_if_valid_url(i): | |
download_manga(i, parsed.single, global_driver) | |
else: | |
print('Please use valid urls') | |
break | |
else: | |
parser.print_help() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment