Last active
April 23, 2020 15:08
-
-
Save t510599/d15154077fae2d6d9b919b18361c564b to your computer and use it in GitHub Desktop.
Fetch students who passed first stage for analysing.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
with open("cac.json", "r", encoding="utf-8") as f: | |
cac_db = json.load(f) | |
with open("ntut.json", "r", encoding="utf-8") as f: | |
ntut = json.load(f) | |
with open("ntust.json", "r", encoding="utf-8") as f: | |
ntust = json.load(f) | |
ncu = set(cac_db["016222"]["number"]) | |
ntut = set(ntut) | |
ntust = set(ntust) | |
ntust_inter = ncu.intersection(ntust) | |
ntut_inter = ncu.intersection(ntut) | |
jctv_inter = ntut.intersection(ntust) | |
ncu_inter = jctv_inter.intersection(ncu) | |
print(len(ntust_inter)) | |
print(len(ntut_inter)) | |
print(len(jctv_inter)) | |
print(len(ncu_inter)) | |
with open("dup.json", "w", encoding="utf-8") as f: | |
data = dict() | |
data["ntust_inter"] = list(ntust_inter) | |
data["ntut_inter"] = list(ntut_inter) | |
data["jctv_inter"] = list(jctv_inter) | |
data["ncu_inter"] = list(ncu_inter) | |
json.dump(data, f, indent=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import re | |
import requests as r | |
from tqdm import tqdm | |
from bs4 import BeautifulSoup | |
from time import sleep | |
# list api | |
api = "https://www.cac.edu.tw/CacLink/apply109/109apply_6pSieveXw_ju9sd98/html_sieve_109pgx3/ColPost/common/apply/{}.htm" | |
department_name = "資訊工程" | |
# getting departments list | |
try: | |
with open("departments.json", "r", encoding="utf-8") as f: | |
departments = json.load(f) | |
print("Read local departments.json.") | |
except Exception as err: | |
print(err) | |
departments = dict() | |
print("Fetching departments...") | |
res = r.post( | |
"https://www.cac.edu.tw/apply109/system/109ColQrytk4p_forapply_os92k5w/ShowGsd.php", | |
{ | |
"TxtGsd": department_name, | |
"SubTxtGsd": "依學系名稱查詢", | |
"action": "SubTxtGsd" | |
} | |
) | |
res.encoding = "utf-8" | |
doc = BeautifulSoup(res.text, "html.parser") | |
l = len(doc.select("table tr")) | |
for i, row in enumerate(doc.select("table tr")): | |
if i == 0 or i == (l - 1): | |
continue | |
ele = row.findChildren("td")[0] # name | |
string = re.sub(r"\s+", " ", ele.get_text()) | |
code = string[-7:-1] | |
departments[code] = string[0:-9] | |
with open("departments.json", "w", encoding="utf-8") as f: | |
json.dump(departments, f, ensure_ascii=False, indent=2) | |
print("Total: {} departments.".format(len(departments.keys()))) | |
# database | |
data = dict() | |
bar = tqdm(total=len(departments.keys()), ascii=True, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}]", position=0, leave=True) | |
for k in departments.keys(): | |
bar.set_description_str(k) | |
tmp = { | |
"name": departments.get(k), | |
"number": [] | |
} | |
res = r.get(api.format(k)) | |
res.encoding = "utf-8" | |
doc = BeautifulSoup(res.text, "html.parser") | |
l = len(doc.select("div > span")) | |
for i, ele in enumerate(doc.select("div > span")): | |
if i == 0 or i == (l - 1): | |
continue | |
for num in ele.findChildren("span"): | |
tmp["number"].append(num.string) | |
data[k] = tmp | |
bar.update() | |
sleep(0.01) | |
bar.close() | |
# save data | |
with open("cac.json", "w", encoding="utf-8") as f: | |
json.dump(data, f, ensure_ascii=False, indent=2) | |
print("data saved.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://ent01.jctv.ntut.edu.tw/applys1result/college.html | |
var data = []; | |
Array.from(document.querySelector(".enterTable tbody").children).forEach((e) => { | |
if (e.children[0].textContent == "101009") { // 101009 department code | |
data.push(e.children[2].textContent.replace(/\s/g, "").slice(-9, -1)); | |
} | |
}); | |
console.log(JSON.stringify(data, null, 2)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment