Last active
June 9, 2019 04:55
-
-
Save a0x/b36b946c56ab466b0b1d43295338ff91 to your computer and use it in GitHub Desktop.
网易公开课下载脚本
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.twblogs.net/a/5bdf0e822b717720b51c3328/zh-cn | |
# https://www.bilibili.com/read/cv1624058/ | |
import os | |
import sys | |
import requests | |
import time | |
from lxml import html | |
from you_get.common import r1, get_decoded_html | |
from win32com.client import Dispatch | |
def resource_url(url): | |
html = get_decoded_html(url) | |
url = (r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html)) + ".mp4" | |
return url | |
def target_resource(course_url): | |
res = requests.get(course_url) | |
tree = html.fromstring(res.text) | |
# Filename list | |
file_index_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/text()') | |
filename_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a/text()') | |
file_index_list = list(map(lambda x:x.strip(), file_index_list)) | |
file_index_list = [i for i in file_index_list if i] | |
filename_list = list(map(lambda i, x:'%s %s.mp4' % (i.strip(), x.strip()), file_index_list, filename_list)) | |
# URL list | |
page_url_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a//@href') | |
page_url_list = list(map(lambda x:x.strip().replace('open.163.com', 'v.163.com'), page_url_list)) | |
resource_url_list = list(map(lambda x: resource_url(x), page_url_list)) | |
return list(map(lambda x, y: { 'filename': x, 'url': y }, filename_list, resource_url_list)) | |
def dispatchXunLei(download_url, save_as, file_path): | |
xunlei = Dispatch("ThunderAgent.Agent64.1") | |
print(download_url, save_as, file_path) | |
xunlei.AddTask(download_url, save_as, file_path, "", "", -1, 0, 5) | |
xunlei.CommitTasks() | |
url = sys.argv[1] | |
data = target_resource(url) | |
for i in range(len(data)): | |
dispatchXunLei(data[i]['url'], data[i]['filename'], sys.argv[2]) | |
time.sleep(0.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install requests lxml you-get | |
import os | |
import requests | |
from lxml import html | |
from you_get.common import r1, get_decoded_html | |
def resource_url(url): | |
html = get_decoded_html(url) | |
url = (r1(r'["\'](.+)-list.m3u8["\']', html) or r1(r'["\'](.+).m3u8["\']', html)) + ".mp4" | |
return url | |
# Course info | |
course_path = 'http://open.163.com/special/opencourse/financialtheory.html' | |
ext = 'mp4' | |
res = requests.get(course_path) | |
tree = html.fromstring(res.text) | |
# Filename list | |
file_index_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/text()') | |
filename_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a/text()') | |
file_index_list = list(map(lambda x:x.strip(), file_index_list)) | |
file_index_list = [i for i in file_index_list if i] | |
filename_list = list(map(lambda i, x:'%s %s' % (i.strip(), x.strip()), file_index_list, filename_list)) | |
print("Here are your courses:") | |
print(*filename_list, sep='\n') | |
# URL list | |
page_url_list = tree.xpath('//div[@class="m-mn"]//table[2]//tr/td[1]/a//@href') | |
page_url_list = list(map(lambda x:x.strip().replace('open.163.com', 'v.163.com'), page_url_list)) | |
resource_url_list = list(map(lambda x: resource_url(x), page_url_list)) | |
print("\nHere are the resources:") | |
print(*resource_url_list, sep='\n') | |
for i in range(len(resource_url_list)): | |
print("Download %s at %s" % (filename_list[i], resource_url_list[i])) | |
os.system(r'curl {} --output "{}".{}'.format(resource_url_list[i], filename_list[i], ext)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment