Skip to content

Instantly share code, notes, and snippets.

@yoonbae81
Created March 24, 2018 13:55
Show Gist options
  • Save yoonbae81/566c0acae4a35a29ec7cbe70ec1c698d to your computer and use it in GitHub Desktop.
Save yoonbae81/566c0acae4a35a29ec7cbe70ec1c698d to your computer and use it in GitHub Desktop.
Fetch symbol infomation from Daum
#!/usr/bin/python3
import aiohttp
import asyncio
import json
import re
from os import getpid
from collections import OrderedDict, namedtuple
from logger import get_logger
def main():
get_logger().info("Starting to fetch stocks from daum (pid:{})".format(getpid()))
global stocks
URLs = ['http://finance.daum.net/xml/xmlallpanel.daum?stype=P&type=S',
'http://finance.daum.net/xml/xmlallpanel.daum?stype=Q&type=S',
'http://finance.daum.net/xml/xmlallpanel.daum?stype=P&type=U',
'http://finance.daum.net/xml/xmlallpanel.daum?stype=Q&type=U']
try:
loop = asyncio.get_event_loop()
# loop.set_debug(is_debug())
stocks = {}
get_logger().debug('Preparing {} tasks for event loop'.format(len(URLs)))
tasks = [asyncio.ensure_future(fetch(url)) for url in URLs]
loop.run_until_complete(asyncio.gather(*tasks))
get_logger().info('{:,} stocks were parsed'.format(len(stocks)))
stocks_sorted = stocks.values()
stocks_sorted = sorted(stocks_sorted, key=lambda item: item.name)
stocks_sorted = sorted(stocks_sorted, key=lambda item: item.market, reverse=True)
stocks_output = OrderedDict()
for item in stocks_sorted:
stocks_output[item[0]] = (item[1], item[2], item[3])
print(json.dumps(stocks_output, ensure_ascii=False))
except KeyboardInterrupt:
get_logger().info('Stopping the event loop by keyboard interrupt')
loop.stop()
except Exception as e:
get_logger().warn(repr(e))
raise
finally:
get_logger().debug('Closing the event loop')
loop.close()
async def fetch(url):
get_logger().debug("Fetching task")
market = "KOSPI" if "stype=P" in url else "KOSDAQ"
text = await request(url)
await parse(market, text)
async def request(url):
get_logger().debug("Requesting {}".format(url))
TIMEOUT = 5
try:
with aiohttp.Timeout(TIMEOUT):
async with aiohttp.request('GET', url) as resp:
assert resp.status == 200
return await resp.text(encoding='utf8')
except asyncio.TimeoutError:
get_logger().error("Timeout {} seconds for requesting".format(TIMEOUT))
raise
async def parse(market, text):
get_logger().debug("Parsing {:,d} bytes".format(len(text)))
global stocks
# 정규표현식으로 파싱(Daum 데이터의 키값에 따옴표가 없어서 JSON 파싱 불가)
# , {code:"095570",name :"AJ네트웍스",cost :"34,650",updn :"▲100",rate :"+0.29%"}
rep_code = re.compile("code:\"(.+)\",name :\"(.+)\",cost :\"(.+)\",updn")
Stock = namedtuple('Stock', ['symbol', 'name', 'market', 'category'])
category = None
for line in text.splitlines():
if 'upjong' in line:
# upjong : {name : "건설업",code:"018",avg:"+0.28%"},
category = re.search(r"name : \"(.+)\",code", line).group(1)
continue
if 'code' in line:
match = rep_code.search(line)
symbol = match.group(1)
name = match.group(2)
#price = int(match.group(3).replace(',', ''))
if symbol in stocks:
if category is not None and stocks[symbol].category is None:
stocks[symbol] = stocks[symbol]._replace(category=category)
else:
stocks[symbol] = Stock(symbol, name, market, category)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment