seven332 · January 2, 2016 03:49
diff --git a/cctuku.py b/cctuku.py
 # coding=utf-8

 import os
 import re
 import sys
 import zipfile
 import requests

 # Your cookie
 headers = {'Cookie': 'your cookie'}


 def ensure_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)


 def print_same_line(string):
    sys.stdout.write('\r%s' % string)
    sys.stdout.flush()


 # http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py
 def download_file(url, local_file):
    r = requests.get(url, stream=True)
    content_length = r.headers.get('Content-Length')
    if content_length:
        size = int(content_length)
    else:
        size = -1
    downloaded = 0
    if size != -1:
        print_same_line('0.00%')
    with open(local_file, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:  # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()
                downloaded += len(chunk)
                if size != -1:
                    print_same_line('%.2f%%' % (downloaded * 100 / float(size)))
    if size != -1:
        print


 def get_download_url(part_url):
    r = requests.get(part_url, headers=headers)
    p = re.compile(r'var downurl = Tk\.jsonp_url\+"([^"]+)"')
    m = p.search(r.content)
    another_url = 'http://rtk.um5.cc/' + m.group(1)
    r = requests.get(another_url, headers=headers)
    p = re.compile(r'\("<a href=\\"([^"]+)\\"')
    m = p.search(r.content.replace('\/', '/'))
    return m.group(1)


 def get_download_url_list(comic_url):
    r = requests.get(comic_url)
    p = re.compile(r'<li class="current"><h1>([^<]+)</h1></li>')
    m = p.search(r.content)
    name = ''.join(c for c in m.group(1).decode('utf-8').encode('gbk') if c not in ('\\', '/', ':', '*', '?', '"', '<', '>', '|')).rstrip()

    p = re.compile(r'<a href="([^"]+)" title=".+?">下载</a>')

    l = []
    for m in p.finditer(r.content):
        l.append('http://www.tuku.cc' + m.group(1))

    # The newest part is on the top, so reverse the list
    l.reverse()
    return name, l


 # Sometimes download failed, the zip file is broken
 def test_zip(zip_file_path):
    try:
        zipfile.ZipFile(zip_file_path)
        return True
    except (zipfile.BadZipfile):
        print '*~*~*~*~*~*~*~*~ Warning Bad Zip File ~*~*~*~*~*~*~*~*'
        os.remove(zip_file_path)
        return False


 def download_comic(comic_url):
    download_dir, part_url_list = get_download_url_list(comic_url)
    ensure_dir(download_dir)
    error_times = 0
    i = 0
    for part_url in part_url_list:
        i += 1
        print '### Part', i
        print '* Part Url', part_url

        file_name = '%04d.zip' % i
        print '* File Name', file_name
        file_path = os.path.join(download_dir, file_name)
        print '* File Path', file_path

        # Check exists
        if os.path.exists(file_path) and test_zip(file_path):
            print '* Exists', file_name
            continue

        file_url = get_download_url(part_url)
        print '* File Url', file_url

        download_file(file_url, file_path)

        if not test_zip(file_path):
            print '* Part %d Download Failed' % i
            error_times += 1
        else:
            print '* Part %d Download Complete' % i

    return error_times


 def download_comic_completely(comic_url):
    turn = 1
    while True:
        print '## Turn', turn
        if not download_comic(comic_url):
            break
        else:
            turn += 1


 def main():
    for url in sys.argv[1:]:
        print '# Start Comic', url
        download_comic_completely(url)
        print


 if __name__ == "__main__":
    main()
	# coding=utf-8

	import os
	import re
	import sys
	import zipfile
	import requests

	# Your cookie
	headers = {'Cookie': 'your cookie'}


	def ensure_dir(directory):
	if not os.path.exists(directory):
	os.makedirs(directory)


	def print_same_line(string):
	sys.stdout.write('\r%s' % string)
	sys.stdout.flush()


	# http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py
	def download_file(url, local_file):
	r = requests.get(url, stream=True)
	content_length = r.headers.get('Content-Length')
	if content_length:
	size = int(content_length)
	else:
	size = -1
	downloaded = 0
	if size != -1:
	print_same_line('0.00%')
	with open(local_file, 'wb') as f:
	for chunk in r.iter_content(chunk_size=1024):
	if chunk: # filter out keep-alive new chunks
	f.write(chunk)
	f.flush()
	downloaded += len(chunk)
	if size != -1:
	print_same_line('%.2f%%' % (downloaded * 100 / float(size)))
	if size != -1:
	print


	def get_download_url(part_url):
	r = requests.get(part_url, headers=headers)
	p = re.compile(r'var downurl = Tk\.jsonp_url\+"([^"]+)"')
	m = p.search(r.content)
	another_url = 'http://rtk.um5.cc/' + m.group(1)
	r = requests.get(another_url, headers=headers)
	p = re.compile(r'\("<a href=\\"([^"]+)\\"')
	m = p.search(r.content.replace('\/', '/'))
	return m.group(1)


	def get_download_url_list(comic_url):
	r = requests.get(comic_url)
	p = re.compile(r'<li class="current"><h1>([^<]+)</h1></li>')
	m = p.search(r.content)
	name = ''.join(c for c in m.group(1).decode('utf-8').encode('gbk') if c not in ('\\', '/', ':', '*', '?', '"', '<', '>', '\|')).rstrip()

	p = re.compile(r'<a href="([^"]+)" title=".+?">下载</a>')

	l = []
	for m in p.finditer(r.content):
	l.append('http://www.tuku.cc' + m.group(1))

	# The newest part is on the top, so reverse the list
	l.reverse()
	return name, l


	# Sometimes download failed, the zip file is broken
	def test_zip(zip_file_path):
	try:
	zipfile.ZipFile(zip_file_path)
	return True
	except (zipfile.BadZipfile):
	print '~~~~~~~~ Warning Bad Zip File ~~~~~~~~'
	os.remove(zip_file_path)
	return False


	def download_comic(comic_url):
	download_dir, part_url_list = get_download_url_list(comic_url)
	ensure_dir(download_dir)
	error_times = 0
	i = 0
	for part_url in part_url_list:
	i += 1
	print '### Part', i
	print '* Part Url', part_url

	file_name = '%04d.zip' % i
	print '* File Name', file_name
	file_path = os.path.join(download_dir, file_name)
	print '* File Path', file_path

	# Check exists
	if os.path.exists(file_path) and test_zip(file_path):
	print '* Exists', file_name
	continue

	file_url = get_download_url(part_url)
	print '* File Url', file_url

	download_file(file_url, file_path)

	if not test_zip(file_path):
	print '* Part %d Download Failed' % i
	error_times += 1
	else:
	print '* Part %d Download Complete' % i

	return error_times


	def download_comic_completely(comic_url):
	turn = 1
	while True:
	print '## Turn', turn
	if not download_comic(comic_url):
	break
	else:
	turn += 1


	def main():
	for url in sys.argv[1:]:
	print '# Start Comic', url
	download_comic_completely(url)
	print


	if __name__ == "__main__":
	main()