Skip to content

Instantly share code, notes, and snippets.

@freelze
Created June 16, 2018 12:45
Show Gist options
  • Save freelze/a1eae82fc182ae0d25b0b62e48db7b4f to your computer and use it in GitHub Desktop.
Save freelze/a1eae82fc182ae0d25b0b62e48db7b4f to your computer and use it in GitHub Desktop.
Scrape the YZU activities which have service hours and output the information to LINE NOTIFY when the website updates.
# Need to create a YZUActivity.txt
from selenium import webdriver
import time
import requests
from bs4 import BeautifulSoup
URL='https://portalx.yzu.edu.tw/PortalSocialVB/FMain/PageActivityAll.aspx'
#https://portalx.yzu.edu.tw/PortalSocialVB/FPage/PageActivityDetail.aspx?Menu=Act&ActID=XXXX
driver = webdriver.Chrome(r"C:\Software\chromedriver_win32\chromedriver.exe") # depend on your driver's location
driver.get(URL)
html_source = driver.page_source # 取得資料
driver.quit();
soup = BeautifulSoup(html_source, 'html.parser')
table=soup.find(text='服務學習').parent.parent.next_sibling
href = table.a.get('href')
ActID = href[-4:] # string(字串) href中,取最後4個Character(字元)
Datalist=[]
for string in soup.find(text='服務學習').parent.parent.next_sibling.strings:
Datalist.append((repr(string)))
f = open('YZUActivity.txt', 'r+') # r+:read and write mode
TxtActID = f.read()
def PrintData():
hyperLink = "https://portalx.yzu.edu.tw/PortalSocialVB/FPage/PageActivityDetail.aspx?Menu=Act&ActID=" + ActID
Namelist=[]
for string in soup.find(text='服務學習').parent.parent.strings:
Namelist.append((repr(string)))
if(len(Datalist) == 14):
Namelist[0]+=' : '+Datalist[0]
Namelist[1]+=' : '+Datalist[1]
Namelist[2]+=' : '+Datalist[2]
Namelist[3]+=' : '+Datalist[3]+Datalist[4]
Namelist[4]+=' : '+Datalist[5]
Namelist[5]+=' : '+Datalist[6]+Datalist[7]+Datalist[8]
Namelist[6]+=' : '+Datalist[9]+ ' | ' +Datalist[10]+ ' | ' +Datalist[11]
Namelist[7]+=' : '+Datalist[13]+Datalist[12]
elif(len(Datalist) == 13):
Namelist[0]+=' : '+Datalist[0]
Namelist[1]+=' : '+Datalist[1]
Namelist[2]+=' : '+Datalist[2]
Namelist[3]+=' : '+Datalist[3]+Datalist[4]
Namelist[4]+=' : '+Datalist[5]
Namelist[5]+=' : '+Datalist[6]+Datalist[7]+Datalist[8]
Namelist[6]+=' : '+Datalist[9]+ ' | ' +Datalist[10]
Namelist[7]+=' : '+Datalist[12]+Datalist[11]
else:
Namelist[0]+=' : '+Datalist[0]
Namelist[1]+=' : '+Datalist[1]
Namelist[2]+=' : '+Datalist[2]
Namelist[3]+=' : '+Datalist[3]+Datalist[4]
Namelist[4]+=' : '+Datalist[5]
Namelist[5]+=' : '+Datalist[6]+Datalist[7]+Datalist[8]
Namelist[6]+=' : '+Datalist[9]
Namelist[7]+=' : '+Datalist[11]+Datalist[10]
msg="\n"
for i in Namelist:
msg += i + '\n'
print(i)
msg += "\n報名連結 : " + hyperLink
return msg
def lineNotify(token, msg):
url = "https://notify-api.line.me/api/notify"
headers = {
"Authorization": "Bearer " + token,
"Content-Type" : "application/x-www-form-urlencoded"
}
payload = {'message': msg}
r = requests.post(url, headers = headers, params = payload)
return r.status_code
token = "LINE_TOKEN" # Your LINE NOTIFY TOKEN
msg = PrintData()
if(TxtActID != ActID):
print("TxtActID != ActID") #TEST
#f.truncate()
f = open('YZUActivity.txt', 'w')
f.seek(0)
f.truncate() # erase all data
f.write(ActID)
f.close()
if(Datalist[-1] != '\'無\''): # escape character \'
print("Datalist[-1]="+Datalist[-1])
print("無") # TEST
lineNotify(token, msg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment