Created
June 16, 2018 12:45
-
-
Save freelze/a1eae82fc182ae0d25b0b62e48db7b4f to your computer and use it in GitHub Desktop.
Scrape the YZU activities which have service hours and output the information to LINE NOTIFY when the website updates.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Need to create a YZUActivity.txt | |
from selenium import webdriver | |
import time | |
import requests | |
from bs4 import BeautifulSoup | |
URL='https://portalx.yzu.edu.tw/PortalSocialVB/FMain/PageActivityAll.aspx' | |
#https://portalx.yzu.edu.tw/PortalSocialVB/FPage/PageActivityDetail.aspx?Menu=Act&ActID=XXXX | |
driver = webdriver.Chrome(r"C:\Software\chromedriver_win32\chromedriver.exe") # depend on your driver's location | |
driver.get(URL) | |
html_source = driver.page_source # 取得資料 | |
driver.quit(); | |
soup = BeautifulSoup(html_source, 'html.parser') | |
table=soup.find(text='服務學習').parent.parent.next_sibling | |
href = table.a.get('href') | |
ActID = href[-4:] # string(字串) href中,取最後4個Character(字元) | |
Datalist=[] | |
for string in soup.find(text='服務學習').parent.parent.next_sibling.strings: | |
Datalist.append((repr(string))) | |
f = open('YZUActivity.txt', 'r+') # r+:read and write mode | |
TxtActID = f.read() | |
def PrintData(): | |
hyperLink = "https://portalx.yzu.edu.tw/PortalSocialVB/FPage/PageActivityDetail.aspx?Menu=Act&ActID=" + ActID | |
Namelist=[] | |
for string in soup.find(text='服務學習').parent.parent.strings: | |
Namelist.append((repr(string))) | |
if(len(Datalist) == 14): | |
Namelist[0]+=' : '+Datalist[0] | |
Namelist[1]+=' : '+Datalist[1] | |
Namelist[2]+=' : '+Datalist[2] | |
Namelist[3]+=' : '+Datalist[3]+Datalist[4] | |
Namelist[4]+=' : '+Datalist[5] | |
Namelist[5]+=' : '+Datalist[6]+Datalist[7]+Datalist[8] | |
Namelist[6]+=' : '+Datalist[9]+ ' | ' +Datalist[10]+ ' | ' +Datalist[11] | |
Namelist[7]+=' : '+Datalist[13]+Datalist[12] | |
elif(len(Datalist) == 13): | |
Namelist[0]+=' : '+Datalist[0] | |
Namelist[1]+=' : '+Datalist[1] | |
Namelist[2]+=' : '+Datalist[2] | |
Namelist[3]+=' : '+Datalist[3]+Datalist[4] | |
Namelist[4]+=' : '+Datalist[5] | |
Namelist[5]+=' : '+Datalist[6]+Datalist[7]+Datalist[8] | |
Namelist[6]+=' : '+Datalist[9]+ ' | ' +Datalist[10] | |
Namelist[7]+=' : '+Datalist[12]+Datalist[11] | |
else: | |
Namelist[0]+=' : '+Datalist[0] | |
Namelist[1]+=' : '+Datalist[1] | |
Namelist[2]+=' : '+Datalist[2] | |
Namelist[3]+=' : '+Datalist[3]+Datalist[4] | |
Namelist[4]+=' : '+Datalist[5] | |
Namelist[5]+=' : '+Datalist[6]+Datalist[7]+Datalist[8] | |
Namelist[6]+=' : '+Datalist[9] | |
Namelist[7]+=' : '+Datalist[11]+Datalist[10] | |
msg="\n" | |
for i in Namelist: | |
msg += i + '\n' | |
print(i) | |
msg += "\n報名連結 : " + hyperLink | |
return msg | |
def lineNotify(token, msg): | |
url = "https://notify-api.line.me/api/notify" | |
headers = { | |
"Authorization": "Bearer " + token, | |
"Content-Type" : "application/x-www-form-urlencoded" | |
} | |
payload = {'message': msg} | |
r = requests.post(url, headers = headers, params = payload) | |
return r.status_code | |
token = "LINE_TOKEN" # Your LINE NOTIFY TOKEN | |
msg = PrintData() | |
if(TxtActID != ActID): | |
print("TxtActID != ActID") #TEST | |
#f.truncate() | |
f = open('YZUActivity.txt', 'w') | |
f.seek(0) | |
f.truncate() # erase all data | |
f.write(ActID) | |
f.close() | |
if(Datalist[-1] != '\'無\''): # escape character \' | |
print("Datalist[-1]="+Datalist[-1]) | |
print("無") # TEST | |
lineNotify(token, msg) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment