Last active
November 5, 2020 17:50
-
-
Save PttCodingMan/d128f64ad5e79f7ddb840c020bafa2ec to your computer and use it in GitHub Desktop.
PTT 文章網址轉換成看板與aid
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_aid_from_url(url: str) -> (str, str): | |
# from get_aid_from_url in PyPtt | |
# 檢查是否符合 PTT BBS 文章網址格式 | |
pattern = re.compile('https://www.ptt.cc/bbs/[-.\w]+/M.[\d]+.A[.\w]*.html') | |
r = pattern.search(url) | |
if r is None: | |
raise ValueError('url must be www.ptt.cc article url') | |
# 演算法參考 https://www.ptt.cc/man/C_Chat/DE98/DFF5/DB61/M.1419434423.A.DF0.html | |
# aid 字元表 | |
aid_table = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_' | |
board = url[23:] | |
board = board[:board.find('/')] | |
temp = url[url.rfind('/') + 1:].split('.') | |
# print(temp) | |
id_0 = int(temp[1]) # dec | |
aid_0 = '' | |
for _ in range(6): | |
index = id_0 % 64 | |
aid_0 = f'{aid_table[index]}{aid_0}' | |
id_0 = int(id_0 / 64) | |
if temp[3] != 'html': | |
id_1 = int(temp[3], 16) # hex | |
aid_1 = '' | |
for _ in range(2): | |
index = id_1 % 64 | |
aid_1 = f'{aid_table[index]}{aid_1}' | |
id_1 = int(id_1 / 64) | |
else: | |
aid_1 = '00' | |
aid = f'{aid_0}{aid_1}' | |
return board, aid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment