Last active
September 28, 2017 09:57
-
-
Save lili668668/d2fdeaef8f8aa33d0a96c4600071f6db to your computer and use it in GitHub Desktop.
想給予一些比較,所以放上來
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import pandas as pd | |
import csv | |
import numpy as np | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import time | |
import datetime | |
#html匯入---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
sp = BeautifulSoup(open("CCB.html", encoding = 'utf-8-sig' ),"html.parser") | |
raw_data = sp.select(".ls10") | |
data2 ="" | |
for name in raw_data: | |
data = name.get_text().replace("0230","").replace("SG","").replace("TWD","").replace("US USD 80.00 08/02","").replace(" USD 15.75 08/02","").replace("-GOOGLE","").replace("-","\n").replace("LINODE","").replace("SYNCAC","").replace(".COM","LINODE") | |
data2= data + data2 | |
fin = data2.split() | |
fina = ['08/01' if x == '08/0308/01' else x for x in fin] | |
fina.remove("08/0208/01") | |
#pandas_dataframe------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | |
array = pd.DataFrame(np.array(fina).reshape(15,4),columns=["exp_date","book_date","item","exp"]) | |
for index, value in enumerate(array["exp_date"]): | |
array["exp_date"][index] = time.mktime(datetime.datetime.strptime(value + "/2017", "%m/%d/%Y").timetuple()) | |
for index, value in enumerate(array["exp"]): | |
array["exp"][index] = value.replace(',', '') | |
array.exp_date = pd.to_datetime(array.exp_date, unit="s") | |
array.exp = pd.to_numeric(array.exp) | |
extr_data = array[["exp_date","exp"]] | |
extr_data.plot(x='exp_date', y='exp') | |
plt.show() | |
#print(extr_data['exp_date'].dtype) | |
#dataframe to list------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | |
exp_date = array["exp_date"].values.tolist() | |
book_date = array["book_date"].values.tolist() | |
item = array["item"].values.tolist() | |
exp = array["exp"].values.tolist() | |
#寫入CSV---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
array.to_csv("res.csv", encoding='big5') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
第1版:拿到的原始版
第2版:刪掉註解的失敗版
第3版:此版本有跑出結果,要下班了,來不及寫註解,回家再寫