Created
March 24, 2018 13:55
-
-
Save yoonbae81/566c0acae4a35a29ec7cbe70ec1c698d to your computer and use it in GitHub Desktop.
Fetch symbol infomation from Daum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import aiohttp | |
import asyncio | |
import json | |
import re | |
from os import getpid | |
from collections import OrderedDict, namedtuple | |
from logger import get_logger | |
def main(): | |
get_logger().info("Starting to fetch stocks from daum (pid:{})".format(getpid())) | |
global stocks | |
URLs = ['http://finance.daum.net/xml/xmlallpanel.daum?stype=P&type=S', | |
'http://finance.daum.net/xml/xmlallpanel.daum?stype=Q&type=S', | |
'http://finance.daum.net/xml/xmlallpanel.daum?stype=P&type=U', | |
'http://finance.daum.net/xml/xmlallpanel.daum?stype=Q&type=U'] | |
try: | |
loop = asyncio.get_event_loop() | |
# loop.set_debug(is_debug()) | |
stocks = {} | |
get_logger().debug('Preparing {} tasks for event loop'.format(len(URLs))) | |
tasks = [asyncio.ensure_future(fetch(url)) for url in URLs] | |
loop.run_until_complete(asyncio.gather(*tasks)) | |
get_logger().info('{:,} stocks were parsed'.format(len(stocks))) | |
stocks_sorted = stocks.values() | |
stocks_sorted = sorted(stocks_sorted, key=lambda item: item.name) | |
stocks_sorted = sorted(stocks_sorted, key=lambda item: item.market, reverse=True) | |
stocks_output = OrderedDict() | |
for item in stocks_sorted: | |
stocks_output[item[0]] = (item[1], item[2], item[3]) | |
print(json.dumps(stocks_output, ensure_ascii=False)) | |
except KeyboardInterrupt: | |
get_logger().info('Stopping the event loop by keyboard interrupt') | |
loop.stop() | |
except Exception as e: | |
get_logger().warn(repr(e)) | |
raise | |
finally: | |
get_logger().debug('Closing the event loop') | |
loop.close() | |
async def fetch(url): | |
get_logger().debug("Fetching task") | |
market = "KOSPI" if "stype=P" in url else "KOSDAQ" | |
text = await request(url) | |
await parse(market, text) | |
async def request(url): | |
get_logger().debug("Requesting {}".format(url)) | |
TIMEOUT = 5 | |
try: | |
with aiohttp.Timeout(TIMEOUT): | |
async with aiohttp.request('GET', url) as resp: | |
assert resp.status == 200 | |
return await resp.text(encoding='utf8') | |
except asyncio.TimeoutError: | |
get_logger().error("Timeout {} seconds for requesting".format(TIMEOUT)) | |
raise | |
async def parse(market, text): | |
get_logger().debug("Parsing {:,d} bytes".format(len(text))) | |
global stocks | |
# 정규표현식으로 파싱(Daum 데이터의 키값에 따옴표가 없어서 JSON 파싱 불가) | |
# , {code:"095570",name :"AJ네트웍스",cost :"34,650",updn :"▲100",rate :"+0.29%"} | |
rep_code = re.compile("code:\"(.+)\",name :\"(.+)\",cost :\"(.+)\",updn") | |
Stock = namedtuple('Stock', ['symbol', 'name', 'market', 'category']) | |
category = None | |
for line in text.splitlines(): | |
if 'upjong' in line: | |
# upjong : {name : "건설업",code:"018",avg:"+0.28%"}, | |
category = re.search(r"name : \"(.+)\",code", line).group(1) | |
continue | |
if 'code' in line: | |
match = rep_code.search(line) | |
symbol = match.group(1) | |
name = match.group(2) | |
#price = int(match.group(3).replace(',', '')) | |
if symbol in stocks: | |
if category is not None and stocks[symbol].category is None: | |
stocks[symbol] = stocks[symbol]._replace(category=category) | |
else: | |
stocks[symbol] = Stock(symbol, name, market, category) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment