Created
April 2, 2019 15:29
-
-
Save rennerocha/a0dd2fd98fa3f74715247236e6a23c87 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import os | |
from scrapy import signals | |
from scrapy.exceptions import NotConfigured | |
from scrapy.utils.project import data_path | |
def json_serial(obj): | |
"""JSON serializer for objects not serializable by default json code""" | |
if isinstance(obj, (datetime.datetime, datetime.date)): | |
return obj.isoformat() | |
raise TypeError ("Type %s not serializable" % type(obj)) | |
class StatsStore(object): | |
def __init__(self, stats): | |
self.stats = stats | |
self.statsdir = data_path('stats', createdir=True) | |
@classmethod | |
def from_crawler(cls, crawler): | |
o = cls(crawler.stats) | |
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) | |
return o | |
def spider_closed(self, spider): | |
spider_stats = self.stats.get_stats(spider) | |
key = int(datetime.datetime.now().timestamp()) | |
stats_f = os.path.join(self.statsdir, str(key)) | |
import json | |
with open(stats_f, "w") as s_f: | |
s_f.write(json.dumps(spider_stats, default=json_serial)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment