Last active
June 16, 2018 13:00
-
-
Save freelze/b96b6f65a8bf44171e2d6998e6ce49ea to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
scrape Yahoo Currency , output to Excel | |
請先新建一個Excel,命名為:Currency.xlsx | |
並增加22個sheets, 總共23個sheets , 名稱分別改為'美元', '澳幣', '加拿大幣', | |
'港幣', '英鎊', '瑞士法郎', '日圓', '歐元', '紐西蘭幣', '新加坡幣', | |
'南非幣', '瑞典克朗', '泰銖', '人民幣', '印度幣', '丹麥幣', '土耳其里拉', | |
'墨西哥披索', '越南幣', '菲律賓披索', '馬來西亞幣', '韓圜', '印尼盾' | |
40行請改成自己Excel的路徑 """ | |
import requests | |
from bs4 import BeautifulSoup | |
import time | |
import openpyxl | |
import os | |
res = requests.get('https://tw.money.yahoo.com/currency-converter') | |
res.encoding = 'utf-8' | |
soup = BeautifulSoup(res.text,"html.parser") | |
#all_items = soup.find_all('tr'>'td')[314:315] #307-308 change to 310-311 date:20180507 | |
#all_items = soup.find_all('tbody',{'id':'FKyui_3_18_1_1_1528813578907_811'}) # this line cannot work,because id will change constantly | |
all_items = soup.find_all('table',{'class':'W-100 simple'})[0].find_all_next('tbody') | |
allCur="" # 空字串(string) | |
for item in all_items: # 擷取裡面的字串(string) | |
allCur = item.text | |
split = allCur.splitlines() # 藉由(換行符號,空白)來分割字串,並回傳List | |
curList = [] # 空List | |
i = 2 # split前面兩個是空白 | |
while(i < len(split)): | |
curList.append(split[i]) # 將美元,澳幣,加拿大幣...放到curList中 | |
i += 8 # 略過(最佳銀行,即時買入...) 跳到下一個 | |
def getCurrency(curID,workbook,num): | |
sheet = workbook[curID] # 確認工作表(sheet)的名稱 | |
date = time.strftime("%Y/%m/%d") # 紀錄年月日 | |
numOfRows = len(sheet['A']) # 計算該工作表(sheet)有多少筆資料 | |
sheet.cell(row=numOfRows+1, column=1).value = date # 日期 | |
sheet.cell(row=numOfRows+1, column=2).value = split[num+1] # 外幣 | |
sheet.cell(row=numOfRows+1, column=3).value = split[num+2] # 最佳銀行 | |
sheet.cell(row=numOfRows+1, column=4).value = split[num+3] # 即時買入 | |
sheet.cell(row=numOfRows+1, column=5).value = split[num+4] # 即時賣出 | |
sheet.cell(row=numOfRows+1, column=6).value = split[num+5] # 現金買入 | |
sheet.cell(row=numOfRows+1, column=7).value = split[num+6] # 現金賣出 | |
os.chdir(r"D:\YZU\大三下\MIS-2\Excel") # 請自行更改成自己Excel的路徑 | |
workbook = openpyxl.load_workbook('Currency.xlsx') | |
#print(curList) | |
#print(spilt)#[183] | |
#print(len(spilt)) | |
#print(len(curList)) | |
num = 2 # 前面是空(null) | |
for curID in curList: # 用迴圈把每一個 curID 都跑一次 | |
getCurrency(curID, workbook, num) | |
num += 8 # 6個值(外幣,最佳銀行,即時買入...),2個空(null) | |
workbook.save('Currency.xlsx') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment