Skip to content

Instantly share code, notes, and snippets.

@t510599
Last active April 23, 2020 15:08
Show Gist options
  • Save t510599/d15154077fae2d6d9b919b18361c564b to your computer and use it in GitHub Desktop.
Save t510599/d15154077fae2d6d9b919b18361c564b to your computer and use it in GitHub Desktop.
Fetch students who passed first stage for analysing.
import json
with open("cac.json", "r", encoding="utf-8") as f:
cac_db = json.load(f)
with open("ntut.json", "r", encoding="utf-8") as f:
ntut = json.load(f)
with open("ntust.json", "r", encoding="utf-8") as f:
ntust = json.load(f)
ncu = set(cac_db["016222"]["number"])
ntut = set(ntut)
ntust = set(ntust)
ntust_inter = ncu.intersection(ntust)
ntut_inter = ncu.intersection(ntut)
jctv_inter = ntut.intersection(ntust)
ncu_inter = jctv_inter.intersection(ncu)
print(len(ntust_inter))
print(len(ntut_inter))
print(len(jctv_inter))
print(len(ncu_inter))
with open("dup.json", "w", encoding="utf-8") as f:
data = dict()
data["ntust_inter"] = list(ntust_inter)
data["ntut_inter"] = list(ntut_inter)
data["jctv_inter"] = list(jctv_inter)
data["ncu_inter"] = list(ncu_inter)
json.dump(data, f, indent=2)
import json
import re
import requests as r
from tqdm import tqdm
from bs4 import BeautifulSoup
from time import sleep
# list api
api = "https://www.cac.edu.tw/CacLink/apply109/109apply_6pSieveXw_ju9sd98/html_sieve_109pgx3/ColPost/common/apply/{}.htm"
department_name = "資訊工程"
# getting departments list
try:
with open("departments.json", "r", encoding="utf-8") as f:
departments = json.load(f)
print("Read local departments.json.")
except Exception as err:
print(err)
departments = dict()
print("Fetching departments...")
res = r.post(
"https://www.cac.edu.tw/apply109/system/109ColQrytk4p_forapply_os92k5w/ShowGsd.php",
{
"TxtGsd": department_name,
"SubTxtGsd": "依學系名稱查詢",
"action": "SubTxtGsd"
}
)
res.encoding = "utf-8"
doc = BeautifulSoup(res.text, "html.parser")
l = len(doc.select("table tr"))
for i, row in enumerate(doc.select("table tr")):
if i == 0 or i == (l - 1):
continue
ele = row.findChildren("td")[0] # name
string = re.sub(r"\s+", " ", ele.get_text())
code = string[-7:-1]
departments[code] = string[0:-9]
with open("departments.json", "w", encoding="utf-8") as f:
json.dump(departments, f, ensure_ascii=False, indent=2)
print("Total: {} departments.".format(len(departments.keys())))
# database
data = dict()
bar = tqdm(total=len(departments.keys()), ascii=True, bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}]", position=0, leave=True)
for k in departments.keys():
bar.set_description_str(k)
tmp = {
"name": departments.get(k),
"number": []
}
res = r.get(api.format(k))
res.encoding = "utf-8"
doc = BeautifulSoup(res.text, "html.parser")
l = len(doc.select("div > span"))
for i, ele in enumerate(doc.select("div > span")):
if i == 0 or i == (l - 1):
continue
for num in ele.findChildren("span"):
tmp["number"].append(num.string)
data[k] = tmp
bar.update()
sleep(0.01)
bar.close()
# save data
with open("cac.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print("data saved.")
// https://ent01.jctv.ntut.edu.tw/applys1result/college.html
var data = [];
Array.from(document.querySelector(".enterTable tbody").children).forEach((e) => {
if (e.children[0].textContent == "101009") { // 101009 department code
data.push(e.children[2].textContent.replace(/\s/g, "").slice(-9, -1));
}
});
console.log(JSON.stringify(data, null, 2));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment