Last active
July 6, 2018 05:54
-
-
Save yuwen41200/ce16bafa60e8139867a8e154a60ee26a to your computer and use it in GitHub Desktop.
Automatically download all handouts from NCTU e3.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.idea/ | |
secret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
import os | |
import re | |
import shutil | |
from time import sleep | |
from urllib.parse import unquote | |
from selenium import webdriver | |
course_pg = 0 | |
course_no = 0 | |
p = re.compile(r'id="ctl00_ContentPlaceHolder1_dgCourseHandout_.+?_lnkFile"') | |
with open("secret") as file: | |
usr = file.readline().strip() | |
pwd = file.readline().strip() | |
src_dir = file.readline().strip() | |
dest_dir = file.readline().strip() | |
driver = webdriver.Chrome() | |
driver.implicitly_wait(10) | |
driver.get("https://dcpc.nctu.edu.tw") | |
print(driver.title) | |
driver.find_element_by_id("txtAccount").send_keys(usr) | |
driver.find_element_by_id("txtPwd").send_keys(pwd) | |
driver.find_element_by_id("btnLoginIn").click() | |
sleep(5) | |
print(driver.find_element_by_id("ctl00_lbWelcomeT").text) | |
while True: | |
driver.find_element_by_link_text("歷年課程").click() | |
sleep(5) | |
for _ in range(course_pg): | |
next_button = driver.find_element_by_id("ctl00_ContentPlaceHolder1_DataNavigator1_ctl03") | |
assert "not-allowed" not in next_button.get_attribute("style") | |
next_button.click() | |
sleep(5) | |
course_links = driver.find_elements_by_link_text("進入課程") | |
if course_no == len(course_links): | |
print("page", course_pg + 1, ", number", course_no) | |
course_no = 0 | |
course_pg += 1 | |
next_button = driver.find_element_by_id("ctl00_ContentPlaceHolder1_DataNavigator1_ctl03") | |
if "not-allowed" in next_button.get_attribute("style"): | |
driver.quit() | |
exit(0) | |
else: | |
next_button.click() | |
sleep(5) | |
course_links = driver.find_elements_by_link_text("進入課程") | |
course_links[course_no].click() | |
sleep(5) | |
course_name = driver.find_element_by_id("ctl00_lbCurrentCourseName").text | |
print(course_name) | |
driver.find_element_by_link_text("教材列表").click() | |
sleep(5) | |
while True: | |
html = driver.page_source | |
iterator = p.finditer(html) | |
for match in iterator: | |
handout_id = match.group() | |
driver.find_element_by_id(handout_id[4:-1]).click() | |
sleep(5) | |
driver.switch_to.window(driver.window_handles[-1]) | |
driver.switch_to.frame("frmMain") | |
driver.find_element_by_id("ctl00_ContentPlaceHolder1_btnUrl").click() | |
sleep(5) | |
driver.close() | |
driver.switch_to.window(driver.window_handles[0]) | |
next_button = driver.find_element_by_id("ctl00_ContentPlaceHolder1_DataNavigator1_ctl03") | |
if "not-allowed" in next_button.get_attribute("style"): | |
break | |
else: | |
next_button.click() | |
sleep(5) | |
driver.find_element_by_id("ctl00_btnBackIndex").click() | |
sleep(5) | |
course_no += 1 | |
os.makedirs(os.path.join(dest_dir, course_name), exist_ok=True) | |
for filename in os.listdir(src_dir): | |
shutil.move( | |
os.path.join(src_dir, filename), | |
os.path.join(dest_dir, course_name, unquote(filename)) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment