Last active
April 1, 2018 03:22
-
-
Save jennyonjourney/804b2c2aaa17722d00572e26c0a6b903 to your computer and use it in GitHub Desktop.
Python - web data crawling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml.html import parse | |
from io import StringIO | |
url='http://finance.naver.com/sise/sise_market_sum.nhn' | |
text = requests.get(url) | |
ppp = parse(StringIO(text.text)) | |
doc = ppp.getroot() | |
tables = doc.findall('.//table') | |
#print(tables) ###표들 | |
tt = tables[1] ###표1번째 | |
rows = tt.findall('.//tr') ###줄들 | |
def rowData(rr, kind): | |
cols = rr.findall('.//'+kind) | |
res = [vv.text_content().replace("\t","").replace("\n","") for vv in cols] | |
return res | |
def rowrite(rr): | |
res = '연습삼아\n' | |
return res | |
# res는 해당 글자를 합쳐서 내보내기만 하면 됨 | |
f=open('../fff/webData.csv','w', encoding='utf-8') | |
for row in rows[2:]: | |
rr = rowData(row, 'td'); | |
if len(rr)>2: | |
print(rr) | |
f.write(rowrite(rr)) | |
f.close() | |
print('-------이제 본격적으로 증권정보를 csv로 저장하자--------') | |
# res는 해당 글자를 합쳐서 내보내기만 하면 됨 | |
def rowWrite(rr): | |
res ='' | |
cnt =0 | |
for i in rr: | |
cnt += 1 | |
res+=i | |
if len(rr)>cnt: | |
res+=',' | |
res +='\n' | |
return res | |
f=open('../fff/webData.csv','w', encoding='utf-8') | |
for row in rows[2:]: | |
rr = rowData(row, 'td'); | |
if len(rr)>2: | |
print(rr) | |
f.write(rowWrite(rr)) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment