cjavad · December 21, 2019 23:11
diff --git a/mangadex.py b/mangadex.py
 #!/usr/bin/env python3

 import requests
 import os
 import sys
 import argparse

 '''
 This function takes a string and checks if it uses the format [http(s)][://mangadex.org/chapter/]*
 aka. if it's a valid link to a mangadex manga/chapter.

 It returns a bool depending on the result (True/False)
 '''
 def check_if_valid_url(url):
    if "http" in url and "://mangadex.org/chapter/" in url and not "gap" in url:
        return True
    else:
        return False


 # Using the sleep function to pause and wait for the webpage to load
 from time import sleep

 # Import selenium webdirver
 from selenium import webdriver

 # Importing multiple elements from the webdriver/common namespace
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.chrome.options import Options
 from selenium.common.exceptions import TimeoutException, WebDriverException
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions

 # string result of type(driver) added as a const to use for checking
 DRIVER_TYPE = "<class 'selenium.webdriver.chrome.webdriver.WebDriver'>"

 # Basic init function that creates a hidden (headless) webdriver element
 def init_driver(path = "./chromedriver"):
    CHROMEDRIVER_PATH = path
    WINDOW_SIZE = "1920,1080"
    chrome_options = Options()  
    chrome_options.add_argument("--headless")  
    chrome_options.add_argument("--window-size=%s" % WINDOW_SIZE)
    driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, options=chrome_options)
    return driver

 # This function can take a webdriver element that has loaded a mangadex page and find the next chapter link
 def nextchapter(driver):
    try:
        next_chapter = driver.find_element_by_class_name("chapter-link-right").get_attribute("href")
        return next_chapter
    except:
        return False

 '''
 This function takes a baselink mangadex image host that uses the format *[/*0.jpg] 
 and increases the number to fit the amount of pages in the manga
 '''
 def getallpages(baselink, total_pages):
    counter = 1
    all_pages = []

    s1 = baselink.split("/")
    s2 = s1[len(s1)-1]
    

    while counter <= int(total_pages):
        s = baselink.split("/")
        s.pop()
        s.append(s2.replace("1", str(counter)))
        all_pages.append("/".join(s))
        counter += 1

    return all_pages

 '''
 Basic loading function for a chapter page, gets all the information that you can get from a chapter and returns it like this:

 {
    "title":manga title,
    "ctitle":chapter title,
    "total_pages":total pages,
    "pages":[jpg links],
    "next_chapter": link to next chapter
 }
 '''
 def load_mangadex_chapter(driver, chapter_link, delay = 2):
    driver.get(chapter_link)

    try:
        sleep(delay/2)
        alert = driver.find_element_by_class_name("message") # for gap situation
        ActionChains(driver).move_to_element(alert).click().perform() # 
        WebDriverWait(driver, delay/2).until(expected_conditions.presence_of_element_located((By.XPATH, "html/body/div[1]/div[2]/div[2]/div/img")))
    except Exception:
        sleep(1) # wait for one more second
        

    mid = chapter_link.split("/")[4]
    title = driver.find_element_by_class_name("manga-link").text
    ctitle = driver.find_element_by_xpath("html/body/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/select/option[@value='{}']".format(mid)).text
    total_pages = driver.find_element_by_class_name("total-pages").text
    baselink = driver.find_element_by_xpath("html/body/div[1]/div[2]/div[2]/div/img").get_attribute("src")

    info_sheet = {
        "title":title,
        "ctitle":ctitle,
        "total_pages":total_pages,
        "pages":getallpages(baselink, total_pages),
        "next_chapter": nextchapter(driver)
    }

    return info_sheet


 '''
 This function loads an entire manga from a starting chapter to the last chapter in the series.
 It returns a dict like this:

 {
    "title": manga title,
    "total_pages": total pages in manga,
    "chapters":[ 
        {
            "title": chapter title,
            "pages": [jpg links]
        } ...
    ]
 }

 '''
 def get_whole_mangadex(driver, first_chapter, single_chapter = False):
    more_chapters = True
    c_chapterlink = first_chapter

    info_sheet = {
        "title":"",
        "total_pages":0,
        "chapters":[]
    }

    while more_chapters:
        cc = load_mangadex_chapter(driver, c_chapterlink)
        info_sheet["chapters"].append({
            "title": cc["ctitle"],
            "pages": cc["pages"]
        })

        info_sheet["total_pages"] += int(cc["total_pages"])
        info_sheet["title"] = cc["title"]

        if single_chapter:
            more_chapters = False
            break

        if cc["next_chapter"] and "chapter" in cc["next_chapter"]:
            c_chapterlink = cc["next_chapter"]
            more_chapters = True
            continue
        else:
            more_chapters = False
            break
        

    return info_sheet

 '''
 Downloads a jpg or raw file from a link using requests. Returns raw data.
 '''
 def download_link(url):
    r = requests.get(url, headers={"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"})
    r.close()
    return r.content

 '''
 Downloads a single mangadex chapter
 '''
 def download_chapter(chaptername, pages):
    chaptername = "-".join(chaptername.split(" "))
    
    try: 
        os.mkdir(chaptername)
    except FileExistsError:
        None

    for j, i in enumerate(pages):
        name = "page" + str(j) + ".jpg"
        path = os.path.join("./" + chaptername, name)
        
        downloaded_content = download_link(i)

        with open(path, "wb") as f:
            f.write(downloaded_content)

 '''
 Downloads an entire manga (or just a single chapter)
 '''
 def download_manga(first_chapter_link, single_chapter = False, driver = False):
    driver = (driver if str(type(driver)) == DRIVER_TYPE else init_driver())
    manga = get_whole_mangadex(driver, first_chapter_link, single_chapter)

    if single_chapter:
        print("Downloading 1 chapter...")
    else:
        print(len(manga["chapters"]), "chapters found in the manga", manga["title"])

    try: 
        os.mkdir(manga["title"])
        os.chdir(manga["title"])
    except FileExistsError:
        os.chdir(manga["title"])

    for i, c in enumerate(manga["chapters"]):
        download_chapter(c["title"], c["pages"])
        print("Downloaded", " »" + c["title"] + "« ", str(i + 1) + "/" + str(len(manga["chapters"])))
    
    print("\n")
    os.chdir("..")

 # argparse arguments
 parser = argparse.ArgumentParser(description='Download a mangadex chapter or a whole manga.')
 parser.add_argument('url', metavar='url', type=str, default=False, nargs='?', help='mangadex chapter link')
 parser.add_argument('-s','--single', action='store_const', default=False, const=True, help='Use this option to only download the linked chapter')
 parser.add_argument('-L', '--list', metavar='urls', default=False, nargs='*', help='Use this for multiple chapters. Combine with -s or --single to only download those chapters')
 parser.add_argument('-D', '--driver', metavar='driver', default=False, help='Path for chromedriver')


 # main function
 if __name__ == "__main__":
    global global_driver
    parsed = parser.parse_args(sys.argv[1:])

    # init driver
    if parsed.driver:
        try:
            global_driver = init_driver(parsed.driver)
            if str(type(global_driver)) == DRIVER_TYPE:
                # driver OK.
                custom_driver = True
            else:
                global_driver = init_driver()

        except WebDriverException:
            global_driver = init_driver()

    else:
        custom_driver = False
        global_driver = init_driver()

    # check which method is used and download accordingly
    if parsed.url:
        if check_if_valid_url(parsed.url):
            download_manga(parsed.url, parsed.single, global_driver)
        else:
            print('Please use a valid url')
    
    elif parsed.list:
        for i in parsed.list:
            if check_if_valid_url(i):
                download_manga(i, parsed.single, global_driver)
            else:
                print('Please use valid urls')
                break
    else:
        parser.print_help()
	#!/usr/bin/env python3

	import requests
	import os
	import sys
	import argparse

	'''
	This function takes a string and checks if it uses the format [http(s)][://mangadex.org/chapter/]*
	aka. if it's a valid link to a mangadex manga/chapter.

	It returns a bool depending on the result (True/False)
	'''
	def check_if_valid_url(url):
	if "http" in url and "://mangadex.org/chapter/" in url and not "gap" in url:
	return True
	else:
	return False


	# Using the sleep function to pause and wait for the webpage to load
	from time import sleep

	# Import selenium webdirver
	from selenium import webdriver

	# Importing multiple elements from the webdriver/common namespace
	from selenium.webdriver.common.by import By
	from selenium.webdriver.common.action_chains import ActionChains
	from selenium.webdriver.chrome.options import Options
	from selenium.common.exceptions import TimeoutException, WebDriverException
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions

	# string result of type(driver) added as a const to use for checking
	DRIVER_TYPE = "<class 'selenium.webdriver.chrome.webdriver.WebDriver'>"

	# Basic init function that creates a hidden (headless) webdriver element
	def init_driver(path = "./chromedriver"):
	CHROMEDRIVER_PATH = path
	WINDOW_SIZE = "1920,1080"
	chrome_options = Options()
	chrome_options.add_argument("--headless")
	chrome_options.add_argument("--window-size=%s" % WINDOW_SIZE)
	driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, options=chrome_options)
	return driver

	# This function can take a webdriver element that has loaded a mangadex page and find the next chapter link
	def nextchapter(driver):
	try:
	next_chapter = driver.find_element_by_class_name("chapter-link-right").get_attribute("href")
	return next_chapter
	except:
	return False

	'''
	This function takes a baselink mangadex image host that uses the format [/0.jpg]
	and increases the number to fit the amount of pages in the manga
	'''
	def getallpages(baselink, total_pages):
	counter = 1
	all_pages = []

	s1 = baselink.split("/")
	s2 = s1[len(s1)-1]


	while counter <= int(total_pages):
	s = baselink.split("/")
	s.pop()
	s.append(s2.replace("1", str(counter)))
	all_pages.append("/".join(s))
	counter += 1

	return all_pages

	'''
	Basic loading function for a chapter page, gets all the information that you can get from a chapter and returns it like this:

	{
	"title":manga title,
	"ctitle":chapter title,
	"total_pages":total pages,
	"pages":[jpg links],
	"next_chapter": link to next chapter
	}
	'''
	def load_mangadex_chapter(driver, chapter_link, delay = 2):
	driver.get(chapter_link)

	try:
	sleep(delay/2)
	alert = driver.find_element_by_class_name("message") # for gap situation
	ActionChains(driver).move_to_element(alert).click().perform() #
	WebDriverWait(driver, delay/2).until(expected_conditions.presence_of_element_located((By.XPATH, "html/body/div[1]/div[2]/div[2]/div/img")))
	except Exception:
	sleep(1) # wait for one more second


	mid = chapter_link.split("/")[4]
	title = driver.find_element_by_class_name("manga-link").text
	ctitle = driver.find_element_by_xpath("html/body/div[1]/div[1]/div[1]/div[2]/div[2]/div[1]/select/option[@value='{}']".format(mid)).text
	total_pages = driver.find_element_by_class_name("total-pages").text
	baselink = driver.find_element_by_xpath("html/body/div[1]/div[2]/div[2]/div/img").get_attribute("src")

	info_sheet = {
	"title":title,
	"ctitle":ctitle,
	"total_pages":total_pages,
	"pages":getallpages(baselink, total_pages),
	"next_chapter": nextchapter(driver)
	}

	return info_sheet


	'''
	This function loads an entire manga from a starting chapter to the last chapter in the series.
	It returns a dict like this:

	{
	"title": manga title,
	"total_pages": total pages in manga,
	"chapters":[
	{
	"title": chapter title,
	"pages": [jpg links]
	} ...
	]
	}

	'''
	def get_whole_mangadex(driver, first_chapter, single_chapter = False):
	more_chapters = True
	c_chapterlink = first_chapter

	info_sheet = {
	"title":"",
	"total_pages":0,
	"chapters":[]
	}

	while more_chapters:
	cc = load_mangadex_chapter(driver, c_chapterlink)
	info_sheet["chapters"].append({
	"title": cc["ctitle"],
	"pages": cc["pages"]
	})

	info_sheet["total_pages"] += int(cc["total_pages"])
	info_sheet["title"] = cc["title"]

	if single_chapter:
	more_chapters = False
	break

	if cc["next_chapter"] and "chapter" in cc["next_chapter"]:
	c_chapterlink = cc["next_chapter"]
	more_chapters = True
	continue
	else:
	more_chapters = False
	break


	return info_sheet

	'''
	Downloads a jpg or raw file from a link using requests. Returns raw data.
	'''
	def download_link(url):
	r = requests.get(url, headers={"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"})
	r.close()
	return r.content

	'''
	Downloads a single mangadex chapter
	'''
	def download_chapter(chaptername, pages):
	chaptername = "-".join(chaptername.split(" "))

	try:
	os.mkdir(chaptername)
	except FileExistsError:
	None

	for j, i in enumerate(pages):
	name = "page" + str(j) + ".jpg"
	path = os.path.join("./" + chaptername, name)

	downloaded_content = download_link(i)

	with open(path, "wb") as f:
	f.write(downloaded_content)

	'''
	Downloads an entire manga (or just a single chapter)
	'''
	def download_manga(first_chapter_link, single_chapter = False, driver = False):
	driver = (driver if str(type(driver)) == DRIVER_TYPE else init_driver())
	manga = get_whole_mangadex(driver, first_chapter_link, single_chapter)

	if single_chapter:
	print("Downloading 1 chapter...")
	else:
	print(len(manga["chapters"]), "chapters found in the manga", manga["title"])

	try:
	os.mkdir(manga["title"])
	os.chdir(manga["title"])
	except FileExistsError:
	os.chdir(manga["title"])

	for i, c in enumerate(manga["chapters"]):
	download_chapter(c["title"], c["pages"])
	print("Downloaded", " »" + c["title"] + "« ", str(i + 1) + "/" + str(len(manga["chapters"])))

	print("\n")
	os.chdir("..")

	# argparse arguments
	parser = argparse.ArgumentParser(description='Download a mangadex chapter or a whole manga.')
	parser.add_argument('url', metavar='url', type=str, default=False, nargs='?', help='mangadex chapter link')
	parser.add_argument('-s','--single', action='store_const', default=False, const=True, help='Use this option to only download the linked chapter')
	parser.add_argument('-L', '--list', metavar='urls', default=False, nargs='*', help='Use this for multiple chapters. Combine with -s or --single to only download those chapters')
	parser.add_argument('-D', '--driver', metavar='driver', default=False, help='Path for chromedriver')


	# main function
	if __name__ == "__main__":
	global global_driver
	parsed = parser.parse_args(sys.argv[1:])

	# init driver
	if parsed.driver:
	try:
	global_driver = init_driver(parsed.driver)
	if str(type(global_driver)) == DRIVER_TYPE:
	# driver OK.
	custom_driver = True
	else:
	global_driver = init_driver()

	except WebDriverException:
	global_driver = init_driver()

	else:
	custom_driver = False
	global_driver = init_driver()

	# check which method is used and download accordingly
	if parsed.url:
	if check_if_valid_url(parsed.url):
	download_manga(parsed.url, parsed.single, global_driver)
	else:
	print('Please use a valid url')

	elif parsed.list:
	for i in parsed.list:
	if check_if_valid_url(i):
	download_manga(i, parsed.single, global_driver)
	else:
	print('Please use valid urls')
	break
	else:
	parser.print_help()