Last active
June 27, 2020 05:39
-
-
Save lc-at/78ca10bb63393f14fd3aa15ff634b16b to your computer and use it in GitHub Desktop.
Score scraper: e-Rapor Direktorat PSMA Kemdikbud
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Usage: python main.py <semester identifier> <student national id list file> | |
Semester identifier consists of year + period (even or odd, 1 or 2) | |
File list.txt contains student nasional identifiers separated by newline character | |
Example: python main.py 20192 list.txt | |
This script has these possible output formats: | |
TOTAL_SCORE,STUDENT_ID,NAME (not sorted, no exception) | |
0,STUDENT_ID,EXCEPTION (not sorted, with exception) | |
You can sort it manually if you want. | |
For example: | |
python main.py 20192 list.txt > out | |
cat out | sort | |
Feel free to do anything with the script. | |
It is unlicensed. | |
""" | |
import concurrent.futures | |
import re | |
import sys | |
import requests | |
MAIN_URL = 'http://localhost:5678' # change with app url | |
DEFAULT_PASSWORD = ( | |
'ba3253876aed6bc22d4a6ff53d8406c6ad864195ed144ab5c87621b6c233b548' + | |
'baeae6956df346ec8c17f5ea10f35ee3cbc514797ed7ddd3145464e2a0bab413') | |
def main(semester_id, nisn_list): | |
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as e: | |
futures = { | |
e.submit(get_score, semester_id, nisn): nisn | |
for nisn in nisn_list | |
} | |
for future in concurrent.futures.as_completed(futures): | |
nisn = futures[future] | |
if future.exception(): | |
print(f'0,{nisn},{future.exception()}') | |
continue | |
name, score = future.result() | |
print(f'{score},{nisn},{name}') | |
def get_score(semester_id, nisn): | |
session = requests.Session() | |
login_data = { | |
'username': nisn, | |
'password': '', | |
'beban': 'Paket', | |
'level': 'Siswa', | |
'semester_id': semester_id, | |
'p': DEFAULT_PASSWORD, | |
} | |
login_request = session.post(f'{MAIN_URL}/library/process_login.php', | |
data=login_data) | |
if 'Lihat Nilai Akhir' not in login_request.text: | |
raise Exception('Login error') | |
name = re.findall('<p><small>(.+?)</small></p>', login_request.text, | |
re.M | re.I)[0] | |
report_request = session.get( | |
f'{MAIN_URL}/raporsma/index.php?page=Siswa-Lihat-Nilai-Akhir') | |
matches = re.findall('<td\s.+>(\d+?)</td>', report_request.text, | |
re.M | re.I) | |
if (len(matches) / 4) % int(len(matches) / 4): | |
raise Exception('Invalid report format') | |
total_score = 0 | |
for i in range(int(len(matches) / 4)): | |
_, _, knowledge, practice = matches[i * 4:(i + 1) * 4] | |
knowledge, practice = int(knowledge), int(practice) | |
total_score += knowledge + practice | |
return name, total_score | |
def read_list_file(filename): | |
with open(filename) as file: | |
for line in file.readlines(): | |
line = line.strip() | |
if len(line): | |
yield line | |
if __name__ == '__main__': | |
if len(sys.argv) != 3: | |
print(f'usage: {sys.argv[0]} <semester_id> <nisn_list.txt>') | |
sys.exit(-1) | |
main(sys.argv[1], read_list_file(sys.argv[2])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment