rmax · June 4, 2016 04:51
diff --git a/docker.log b/docker.log
 2016-06-04 04:36:49+0000 [-] Log opened.
 2016-06-04 04:36:49.933156 [-] Splash version: 2.1
 2016-06-04 04:36:49.937837 [-] Qt 5.5.1, PyQt 5.5.1, WebKit 538.1, sip 4.17, Twisted 16.1.1, Lua 5.2
 2016-06-04 04:36:49.938075 [-] Python 3.4.3 (default, Oct 14 2015, 20:28:29) [GCC 4.8.4]
 2016-06-04 04:36:49.938282 [-] Open files limit: 1048576
 2016-06-04 04:36:49.938430 [-] Can't bump open files limit
 2016-06-04 04:36:50.046541 [-] Xvfb is started: ['Xvfb', ':1', '-screen', '0', '1024x768x24']
 2016-06-04 04:36:50.213871 [-] proxy profiles support is enabled, proxy profiles path: /etc/splash/proxy-profiles
 2016-06-04 04:36:50.383912 [-] verbosity=1
 2016-06-04 04:36:50.384060 [-] slots=50
 2016-06-04 04:36:50.384122 [-] argument_cache_max_entries=500
 2016-06-04 04:36:50.384813 [-] Web UI: enabled, Lua: enabled (sandbox: enabled)
 2016-06-04 04:36:50.390068 [-] Site starting on 8050
 2016-06-04 04:36:50.390297 [-] Starting factory <twisted.web.server.Site object at 0x7f3aeb582e48>
 2016-06-04 04:45:25.189394 [-] "172.17.0.1" - - [04/Jun/2016:04:45:25 +0000] "GET /robots.txt HTTP/1.1" 404 153 "-" "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
 process 1: D-Bus library appears to be incorrectly set up; failed to read machine uuid: Failed to open "/etc/machine-id": No such file or directory
 See the manual page for dbus-uuidgen to correct this issue.
 2016-06-04 04:45:31.245465 [events] {"maxrss": 140252, "active": 0, "path": "/render.json", "timestamp": 1465015531, "rendertime": 6.0484702587127686, "method": "POST", "args": {"headers": {"User-Agent": "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", "Accept-Language": "en", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Referer": "https://sapui5.hana.ondemand.com/"}, "html": true, "url": "https://sapui5.hana.ondemand.com/sdk/#docs/api/symbols/sap.html", "iframes": true, "wait": 5.0, "uid": 139891033349872}, "status_code": 200, "qsize": 0, "client_ip": "172.17.0.1", "fds": 20, "load": [0.0, 0.01, 0.05], "_id": 139891033349872, "user-agent": "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"}
 2016-06-04 04:45:31.245729 [-] "172.17.0.1" - - [04/Jun/2016:04:45:31 +0000] "POST /render.json HTTP/1.1" 200 24242 "-" "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
diff --git a/myspider.py b/myspider.py
 import scrapy
 from scrapy_splash import SplashRequest


 class MySpider(scrapy.Spider):
    name = 'myspider'
    start_urls = ['https://sapui5.hana.ondemand.com/']

    def parse(self, response):
        url = 'https://sapui5.hana.ondemand.com/sdk/#docs/api/symbols/sap.html'
        yield SplashRequest(url, self.parse_page,
                            args={
                                'wait': 10.,
                                'iframes': True,
                                'html': True,
                            },
                            endpoint='render.json')

    def parse_page(self, response):
        iframe_html = response.data['childFrames'][0]['html']
        sel = scrapy.Selector(text=iframe_html)
        for div in sel.css('#content .sectionItem'):
            name = div.css('a::text').extract_first()
            desc = div.css('.description::text').extract_first() or ''
            print(': '.join([name, desc]))
diff --git a/scrapy.log b/scrapy.log
 2016-06-04 01:46:19 [scrapy] INFO: Scrapy 1.1.0 started (bot: myproject)
 2016-06-04 01:46:19 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'myproject.spiders', 'ROBOTSTXT_OBEY': True, 'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter', 'SPIDER_MODULES': ['myproject.spiders'], 'BOT_NAME': 'myproject', 'USER_AGENT': 'Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'}
 2016-06-04 01:46:19 [scrapy] INFO: Enabled extensions:
 ['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
 2016-06-04 01:46:19 [scrapy] INFO: Enabled downloader middlewares:
 ['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy_splash.SplashCookiesMiddleware',
 'scrapy_splash.SplashMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware',
 'scrapy.downloadermiddlewares.stats.DownloaderStats']
 2016-06-04 01:46:19 [scrapy] INFO: Enabled spider middlewares:
 ['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
 'scrapy_splash.SplashDeduplicateArgsMiddleware',
 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
 'scrapy.spidermiddlewares.referer.RefererMiddleware',
 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
 'scrapy.spidermiddlewares.depth.DepthMiddleware']
 2016-06-04 01:46:19 [scrapy] INFO: Enabled item pipelines:
 []
 2016-06-04 01:46:19 [scrapy] INFO: Spider opened
 2016-06-04 01:46:19 [scrapy] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
 2016-06-04 01:46:19 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023
 2016-06-04 01:46:20 [scrapy] DEBUG: Crawled (404) <GET https://sapui5.hana.ondemand.com/robots.txt> (referer: None)
 2016-06-04 01:46:20 [scrapy] DEBUG: Crawled (200) <GET https://sapui5.hana.ondemand.com/> (referer: None)
 2016-06-04 01:46:20 [scrapy] DEBUG: Crawled (404) <GET http://127.0.0.1:8050/robots.txt> (referer: None)
 2016-06-04 01:46:30 [scrapy] DEBUG: Crawled (200) <GET https://sapui5.hana.ondemand.com/sdk/#docs/api/symbols/sap.html via http://127.0.0.1:8050/render.json> (referer: None)
 apf: Analysis Path Framework
 ca:
 chart: Chart controls based on Vizframe
 collaboration: SAP UI library: SAP Collaboration for Social Media Integration.
 gantt: UI5 library: sap.gantt.
 landvisz: sap.landvisz library for UI developments
 m: The main UI5 control library, with responsive controls that can be used in touch devices as well as desktop browsers.
 makit: Mobile Chart controls based on the Sybase MAKIT charting lib.
 me: SAPUI5 library with controls specialized for mobile devices (extension).
 ndc: SAPUI5 library with controls with native device capabilities.
 ovp: SAP library: sap.ovp
 portal:
 suite:
 tnt: SAPUI5 library with controls specialized for administrative applications.
 ui: The
 uiext:
 ushell:
 uxap: SAP UxAP
 viz: Chart controls based on the SAP BI CVOM charting library
 2016-06-04 01:46:30 [scrapy] INFO: Closing spider (finished)
 2016-06-04 01:46:30 [scrapy] INFO: Dumping Scrapy stats:
 {'downloader/request_bytes': 1852,
 'downloader/request_count': 4,
 'downloader/request_method_count/GET': 3,
 'downloader/request_method_count/POST': 1,
 'downloader/response_bytes': 715777,
 'downloader/response_count': 4,
 'downloader/response_status_count/200': 2,
 'downloader/response_status_count/404': 2,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2016, 6, 4, 4, 46, 30, 980513),
 'log_count/DEBUG': 5,
 'log_count/INFO': 7,
 'request_depth_max': 1,
 'response_received_count': 4,
 'scheduler/dequeued': 3,
 'scheduler/dequeued/memory': 3,
 'scheduler/enqueued': 3,
 'scheduler/enqueued/memory': 3,
 'splash/render.json/request_count': 1,
 'splash/render.json/response_count/200': 1,
 'start_time': datetime.datetime(2016, 6, 4, 4, 46, 19, 640725)}
diff --git a/settings.py b/settings.py
 # -*- coding: utf-8 -*-

 # Scrapy settings for myproject project
 #
 # For simplicity, this file contains only settings considered important or
 # commonly used. You can find more settings consulting the documentation:
 #
 #     http://doc.scrapy.org/en/latest/topics/settings.html
 #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
 #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html

 BOT_NAME = 'myproject'

 SPIDER_MODULES = ['myproject.spiders']
 NEWSPIDER_MODULE = 'myproject.spiders'


 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 USER_AGENT = 'Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'

 # Obey robots.txt rules
 ROBOTSTXT_OBEY = True

 # Configure maximum concurrent requests performed by Scrapy (default: 16)
 #CONCURRENT_REQUESTS = 32

 # Configure a delay for requests for the same website (default: 0)
 # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
 # See also autothrottle settings and docs
 #DOWNLOAD_DELAY = 3
 # The download delay setting will honor only one of:
 #CONCURRENT_REQUESTS_PER_DOMAIN = 16
 #CONCURRENT_REQUESTS_PER_IP = 16

 # Disable cookies (enabled by default)
 #COOKIES_ENABLED = False

 # Disable Telnet Console (enabled by default)
 #TELNETCONSOLE_ENABLED = False

 # Override the default request headers:
 #DEFAULT_REQUEST_HEADERS = {
 #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 #   'Accept-Language': 'en',
 #}

 # Enable or disable spider middlewares
 # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
 SPIDER_MIDDLEWARES = {
    'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
 }

 # Enable or disable downloader middlewares
 # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
 DOWNLOADER_MIDDLEWARES = {
    'scrapy_splash.SplashCookiesMiddleware': 723,
    'scrapy_splash.SplashMiddleware': 725,
    'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
 }

 DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'

 # Enable or disable extensions
 # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
 #EXTENSIONS = {
 #    'scrapy.extensions.telnet.TelnetConsole': None,
 #}

 # Configure item pipelines
 # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
 #ITEM_PIPELINES = {
 #    'myproject.pipelines.SomePipeline': 300,
 #}

 # Enable and configure the AutoThrottle extension (disabled by default)
 # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
 #AUTOTHROTTLE_ENABLED = True
 # The initial download delay
 #AUTOTHROTTLE_START_DELAY = 5
 # The maximum download delay to be set in case of high latencies
 #AUTOTHROTTLE_MAX_DELAY = 60
 # The average number of requests Scrapy should be sending in parallel to
 # each remote server
 #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
 # Enable showing throttling stats for every response received:
 #AUTOTHROTTLE_DEBUG = False

 # Enable and configure HTTP caching (disabled by default)
 # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
 #HTTPCACHE_ENABLED = True
 #HTTPCACHE_EXPIRATION_SECS = 0
 #HTTPCACHE_DIR = 'httpcache'
 #HTTPCACHE_IGNORE_HTTP_CODES = []
 #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
	2016-06-04 04:36:49+0000 [-] Log opened.
	2016-06-04 04:36:49.933156 [-] Splash version: 2.1
	2016-06-04 04:36:49.937837 [-] Qt 5.5.1, PyQt 5.5.1, WebKit 538.1, sip 4.17, Twisted 16.1.1, Lua 5.2
	2016-06-04 04:36:49.938075 [-] Python 3.4.3 (default, Oct 14 2015, 20:28:29) [GCC 4.8.4]
	2016-06-04 04:36:49.938282 [-] Open files limit: 1048576
	2016-06-04 04:36:49.938430 [-] Can't bump open files limit
	2016-06-04 04:36:50.046541 [-] Xvfb is started: ['Xvfb', ':1', '-screen', '0', '1024x768x24']
	2016-06-04 04:36:50.213871 [-] proxy profiles support is enabled, proxy profiles path: /etc/splash/proxy-profiles
	2016-06-04 04:36:50.383912 [-] verbosity=1
	2016-06-04 04:36:50.384060 [-] slots=50
	2016-06-04 04:36:50.384122 [-] argument_cache_max_entries=500
	2016-06-04 04:36:50.384813 [-] Web UI: enabled, Lua: enabled (sandbox: enabled)
	2016-06-04 04:36:50.390068 [-] Site starting on 8050
	2016-06-04 04:36:50.390297 [-] Starting factory <twisted.web.server.Site object at 0x7f3aeb582e48>
	2016-06-04 04:45:25.189394 [-] "172.17.0.1" - - [04/Jun/2016:04:45:25 +0000] "GET /robots.txt HTTP/1.1" 404 153 "-" "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
	process 1: D-Bus library appears to be incorrectly set up; failed to read machine uuid: Failed to open "/etc/machine-id": No such file or directory
	See the manual page for dbus-uuidgen to correct this issue.
	2016-06-04 04:45:31.245465 [events] {"maxrss": 140252, "active": 0, "path": "/render.json", "timestamp": 1465015531, "rendertime": 6.0484702587127686, "method": "POST", "args": {"headers": {"User-Agent": "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", "Accept-Language": "en", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8", "Referer": "https://sapui5.hana.ondemand.com/"}, "html": true, "url": "https://sapui5.hana.ondemand.com/sdk/#docs/api/symbols/sap.html", "iframes": true, "wait": 5.0, "uid": 139891033349872}, "status_code": 200, "qsize": 0, "client_ip": "172.17.0.1", "fds": 20, "load": [0.0, 0.01, 0.05], "_id": 139891033349872, "user-agent": "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"}
	2016-06-04 04:45:31.245729 [-] "172.17.0.1" - - [04/Jun/2016:04:45:31 +0000] "POST /render.json HTTP/1.1" 200 24242 "-" "Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
	import scrapy
	from scrapy_splash import SplashRequest


	class MySpider(scrapy.Spider):
	name = 'myspider'
	start_urls = ['https://sapui5.hana.ondemand.com/']

	def parse(self, response):
	url = 'https://sapui5.hana.ondemand.com/sdk/#docs/api/symbols/sap.html'
	yield SplashRequest(url, self.parse_page,
	args={
	'wait': 10.,
	'iframes': True,
	'html': True,
	},
	endpoint='render.json')

	def parse_page(self, response):
	iframe_html = response.data['childFrames'][0]['html']
	sel = scrapy.Selector(text=iframe_html)
	for div in sel.css('#content .sectionItem'):
	name = div.css('a::text').extract_first()
	desc = div.css('.description::text').extract_first() or ''
	print(': '.join([name, desc]))
	2016-06-04 01:46:19 [scrapy] INFO: Scrapy 1.1.0 started (bot: myproject)
	2016-06-04 01:46:19 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'myproject.spiders', 'ROBOTSTXT_OBEY': True, 'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter', 'SPIDER_MODULES': ['myproject.spiders'], 'BOT_NAME': 'myproject', 'USER_AGENT': 'Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'}
	2016-06-04 01:46:19 [scrapy] INFO: Enabled extensions:
	['scrapy.extensions.logstats.LogStats',
	'scrapy.extensions.telnet.TelnetConsole',
	'scrapy.extensions.corestats.CoreStats']
	2016-06-04 01:46:19 [scrapy] INFO: Enabled downloader middlewares:
	['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
	'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
	'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
	'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
	'scrapy.downloadermiddlewares.retry.RetryMiddleware',
	'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
	'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
	'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
	'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
	'scrapy_splash.SplashCookiesMiddleware',
	'scrapy_splash.SplashMiddleware',
	'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
	'scrapy.downloadermiddlewares.chunked.ChunkedTransferMiddleware',
	'scrapy.downloadermiddlewares.stats.DownloaderStats']
	2016-06-04 01:46:19 [scrapy] INFO: Enabled spider middlewares:
	['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
	'scrapy_splash.SplashDeduplicateArgsMiddleware',
	'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
	'scrapy.spidermiddlewares.referer.RefererMiddleware',
	'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
	'scrapy.spidermiddlewares.depth.DepthMiddleware']
	2016-06-04 01:46:19 [scrapy] INFO: Enabled item pipelines:
	[]
	2016-06-04 01:46:19 [scrapy] INFO: Spider opened
	2016-06-04 01:46:19 [scrapy] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
	2016-06-04 01:46:19 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023
	2016-06-04 01:46:20 [scrapy] DEBUG: Crawled (404) <GET https://sapui5.hana.ondemand.com/robots.txt> (referer: None)
	2016-06-04 01:46:20 [scrapy] DEBUG: Crawled (200) <GET https://sapui5.hana.ondemand.com/> (referer: None)
	2016-06-04 01:46:20 [scrapy] DEBUG: Crawled (404) <GET http://127.0.0.1:8050/robots.txt> (referer: None)
	2016-06-04 01:46:30 [scrapy] DEBUG: Crawled (200) <GET https://sapui5.hana.ondemand.com/sdk/#docs/api/symbols/sap.html via http://127.0.0.1:8050/render.json> (referer: None)
	apf: Analysis Path Framework
	ca:
	chart: Chart controls based on Vizframe
	collaboration: SAP UI library: SAP Collaboration for Social Media Integration.
	gantt: UI5 library: sap.gantt.
	landvisz: sap.landvisz library for UI developments
	m: The main UI5 control library, with responsive controls that can be used in touch devices as well as desktop browsers.
	makit: Mobile Chart controls based on the Sybase MAKIT charting lib.
	me: SAPUI5 library with controls specialized for mobile devices (extension).
	ndc: SAPUI5 library with controls with native device capabilities.
	ovp: SAP library: sap.ovp
	portal:
	suite:
	tnt: SAPUI5 library with controls specialized for administrative applications.
	ui: The
	uiext:
	ushell:
	uxap: SAP UxAP
	viz: Chart controls based on the SAP BI CVOM charting library
	2016-06-04 01:46:30 [scrapy] INFO: Closing spider (finished)
	2016-06-04 01:46:30 [scrapy] INFO: Dumping Scrapy stats:
	{'downloader/request_bytes': 1852,
	'downloader/request_count': 4,
	'downloader/request_method_count/GET': 3,
	'downloader/request_method_count/POST': 1,
	'downloader/response_bytes': 715777,
	'downloader/response_count': 4,
	'downloader/response_status_count/200': 2,
	'downloader/response_status_count/404': 2,
	'finish_reason': 'finished',
	'finish_time': datetime.datetime(2016, 6, 4, 4, 46, 30, 980513),
	'log_count/DEBUG': 5,
	'log_count/INFO': 7,
	'request_depth_max': 1,
	'response_received_count': 4,
	'scheduler/dequeued': 3,
	'scheduler/dequeued/memory': 3,
	'scheduler/enqueued': 3,
	'scheduler/enqueued/memory': 3,
	'splash/render.json/request_count': 1,
	'splash/render.json/response_count/200': 1,
	'start_time': datetime.datetime(2016, 6, 4, 4, 46, 19, 640725)}
	# -- coding: utf-8 --

	# Scrapy settings for myproject project
	#
	# For simplicity, this file contains only settings considered important or
	# commonly used. You can find more settings consulting the documentation:
	#
	# http://doc.scrapy.org/en/latest/topics/settings.html
	# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
	# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html

	BOT_NAME = 'myproject'

	SPIDER_MODULES = ['myproject.spiders']
	NEWSPIDER_MODULE = 'myproject.spiders'


	# Crawl responsibly by identifying yourself (and your website) on the user-agent
	USER_AGENT = 'Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'

	# Obey robots.txt rules
	ROBOTSTXT_OBEY = True

	# Configure maximum concurrent requests performed by Scrapy (default: 16)
	#CONCURRENT_REQUESTS = 32

	# Configure a delay for requests for the same website (default: 0)
	# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
	# See also autothrottle settings and docs
	#DOWNLOAD_DELAY = 3
	# The download delay setting will honor only one of:
	#CONCURRENT_REQUESTS_PER_DOMAIN = 16
	#CONCURRENT_REQUESTS_PER_IP = 16

	# Disable cookies (enabled by default)
	#COOKIES_ENABLED = False

	# Disable Telnet Console (enabled by default)
	#TELNETCONSOLE_ENABLED = False

	# Override the default request headers:
	#DEFAULT_REQUEST_HEADERS = {
	# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	# 'Accept-Language': 'en',
	#}

	# Enable or disable spider middlewares
	# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
	SPIDER_MIDDLEWARES = {
	'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
	}

	# Enable or disable downloader middlewares
	# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
	DOWNLOADER_MIDDLEWARES = {
	'scrapy_splash.SplashCookiesMiddleware': 723,
	'scrapy_splash.SplashMiddleware': 725,
	'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
	}

	DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'

	# Enable or disable extensions
	# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
	#EXTENSIONS = {
	# 'scrapy.extensions.telnet.TelnetConsole': None,
	#}

	# Configure item pipelines
	# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
	#ITEM_PIPELINES = {
	# 'myproject.pipelines.SomePipeline': 300,
	#}

	# Enable and configure the AutoThrottle extension (disabled by default)
	# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
	#AUTOTHROTTLE_ENABLED = True
	# The initial download delay
	#AUTOTHROTTLE_START_DELAY = 5
	# The maximum download delay to be set in case of high latencies
	#AUTOTHROTTLE_MAX_DELAY = 60
	# The average number of requests Scrapy should be sending in parallel to
	# each remote server
	#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
	# Enable showing throttling stats for every response received:
	#AUTOTHROTTLE_DEBUG = False

	# Enable and configure HTTP caching (disabled by default)
	# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
	#HTTPCACHE_ENABLED = True
	#HTTPCACHE_EXPIRATION_SECS = 0
	#HTTPCACHE_DIR = 'httpcache'
	#HTTPCACHE_IGNORE_HTTP_CODES = []
	#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'