clasense4 · March 3, 2016 22:55 · sparq-e · Mar 20, 2019
diff --git a/scrapy_result b/scrapy_result
 [root@cls-ebizu ~]# scrapy shell "http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html"
 2016-03-03 17:52:54 [scrapy] INFO: Scrapy 1.0.5 started (bot: scrapybot)
 2016-03-03 17:52:54 [scrapy] INFO: Optional features available: ssl, http11, boto
 2016-03-03 17:52:54 [scrapy] INFO: Overridden settings: {'LOGSTATS_INTERVAL': 0, 'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter'}
 2016-03-03 17:52:55 [scrapy] INFO: Enabled extensions: CloseSpider, TelnetConsole, CoreStats, SpiderState
 2016-03-03 17:52:55 [boto] DEBUG: Retrieving credentials from metadata server.
 2016-03-03 17:52:55 [boto] ERROR: Caught exception reading instance data
 Traceback (most recent call last):
  File "/usr/lib/python2.7/site-packages/boto/utils.py", line 210, in retry_url
    r = opener.open(req, timeout=timeout)
  File "/usr/lib64/python2.7/urllib2.py", line 437, in open
    response = meth(req, response)
  File "/usr/lib64/python2.7/urllib2.py", line 550, in http_response
    'http', request, response, code, msg, hdrs)
  File "/usr/lib64/python2.7/urllib2.py", line 475, in error
    return self._call_chain(*args)
  File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
    result = func(*args)
  File "/usr/lib64/python2.7/urllib2.py", line 558, in http_error_default
    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
 HTTPError: HTTP Error 404: Not Found
 2016-03-03 17:52:55 [boto] ERROR: Unable to read instance data, giving up
 2016-03-03 17:52:55 [scrapy] INFO: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats
 2016-03-03 17:52:55 [scrapy] INFO: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware
 2016-03-03 17:52:55 [scrapy] INFO: Enabled item pipelines:
 2016-03-03 17:52:55 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023
 2016-03-03 17:52:55 [scrapy] INFO: Spider opened
 2016-03-03 17:52:56 [scrapy] DEBUG: Crawled (200) <GET http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html> (referer: None)
 [s] Available Scrapy objects:
 [s]   crawler    <scrapy.crawler.Crawler object at 0x1c5dc50>
 [s]   item       {}
 [s]   request    <GET http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html>
 [s]   response   <200 http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html>
 [s]   settings   <scrapy.settings.Settings object at 0x1c5db50>
 [s]   spider     <DefaultSpider 'default' at 0x7fd110017950>
 [s] Useful shortcuts:
 [s]   shelp()           Shell help (print this help)
 [s]   fetch(req_or_url) Fetch request (or URL) and update local objects
 [s]   view(response)    View response in a browser
 2016-03-03 17:52:57 [root] DEBUG: Using default logger
 2016-03-03 17:52:57 [root] DEBUG: Using default logger

 In [1]: response.xpath("//meta").extract()
 Out[1]:
 [u'<meta http-equiv="content-language" content="de">',
 u'<meta name="keywords" content="Sport, Formel 1, Michael Schumacher">',
 u'<meta name="news_keywords" content="Michael, Schumacher, Sport, Formel 1">',
 u'<meta name="description" content="Michael Schumacher redet, aber nur in einem Video. Es sind Aufnahmen aus der Vergangenheit. Aus seiner einzigartigen Karriere. Einer, der nun eine Ausstellung gewidmet wird. Zur Er\xf6ffnung ist auch seine Familie da.">',
 u'<meta name="robots" content="index,follow">',
 u'<meta name="robots" content="noodp">',
 u'<meta name="robots" content="noarchive">',
 u'<meta name="revisit-after" content="1 hour">',
 u'<meta name="pragma" content="no-cache">',
 u'<meta name="cache-control" content="no-cache">',
 u'<meta name="date" content="2016-02-15T19:57:21+01:00">',
 u'<meta name="author" content="FOCUS Online">',
 u'<meta name="msvalidate.01" content="18FEC93808733136F33F992192FA967D">',
 u'<meta name="apple-itunes-app" content="app-id=519052678,app-argument=focus://article?id=5286608, affiliate-data=at=11lNpf&amp;ct=FOL_Smartbanner">',
 u'<meta name="al:ios:url" content="focus://article?id=5286608">',
 u'<meta name="al:ios:app_store_id" content="519052678">',
 u'<meta name="al:ios:app_name" content="FOCUS Online - Nachrichten">',
 u'<meta name="al:android:url" content="focus://article/id/5286608">',
 u'<meta name="al:android:package" content="de.cellular.focus">',
 u'<meta name="al:android:app_name" content="FOCUS Online - Nachrichten">',
 u'<meta name="fol_app" content="true">',
 u'<meta property="fb:admins" content="100001540876802, 694311112, 1679965273">',
 u'<meta property="fb:page_id" content="147940371990513">',
 u'<meta property="og:type" content="article">',
 u'<meta property="og:url" content="http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html">',
 u'<meta property="og:title" content="In Schumachers Abwesenheit: Ausstellung zur Formel 1-Karriere er\xf6ffnet">',
 u'<meta property="og:site_name" content="FOCUS Online">',
 u'<meta name="news_synd" content="false">',
 u'<meta name="fol_ct" content="news">',
 u'<meta name="fol_iscomment" content="false">',
 u'<meta http-equiv="content-type" content="text/html; charset=UTF-8">',
 u'<meta http-equiv="expires" content="-1">',
 u'<meta content="http://p5.focus.de/img/fotos/crop5285797/2589045353-w400-h300-o-q75-p5/urn-newsml-dpa-com-20090101-160215-99-749497-large-4-3.jpg" property="imgforsearch">',
 u'<meta content="http://p5.focus.de/img/fotos/crop5285797/5872719666-w1200-h627-o-q75-p5/urn-newsml-dpa-com-20090101-160215-99-749497-large-4-3.jpg" property="og:image">',
 u'<meta content="1200" property="og:image:width">',
 u'<meta content="627" property="og:image:height">',
 u'<meta itemprop="worstRating" content="1">',
 u'<meta itemprop="bestRating" content="5">',
 u'<meta itemprop="ratingCount" content="1">',
 u'<meta itemprop="ratingValue" content="3.0">']
	[root@cls-ebizu ~]# scrapy shell "http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html"
	2016-03-03 17:52:54 [scrapy] INFO: Scrapy 1.0.5 started (bot: scrapybot)
	2016-03-03 17:52:54 [scrapy] INFO: Optional features available: ssl, http11, boto
	2016-03-03 17:52:54 [scrapy] INFO: Overridden settings: {'LOGSTATS_INTERVAL': 0, 'DUPEFILTER_CLASS': 'scrapy.dupefilters.BaseDupeFilter'}
	2016-03-03 17:52:55 [scrapy] INFO: Enabled extensions: CloseSpider, TelnetConsole, CoreStats, SpiderState
	2016-03-03 17:52:55 [boto] DEBUG: Retrieving credentials from metadata server.
	2016-03-03 17:52:55 [boto] ERROR: Caught exception reading instance data
	Traceback (most recent call last):
	File "/usr/lib/python2.7/site-packages/boto/utils.py", line 210, in retry_url
	r = opener.open(req, timeout=timeout)
	File "/usr/lib64/python2.7/urllib2.py", line 437, in open
	response = meth(req, response)
	File "/usr/lib64/python2.7/urllib2.py", line 550, in http_response
	'http', request, response, code, msg, hdrs)
	File "/usr/lib64/python2.7/urllib2.py", line 475, in error
	return self._call_chain(*args)
	File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
	result = func(*args)
	File "/usr/lib64/python2.7/urllib2.py", line 558, in http_error_default
	raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
	HTTPError: HTTP Error 404: Not Found
	2016-03-03 17:52:55 [boto] ERROR: Unable to read instance data, giving up
	2016-03-03 17:52:55 [scrapy] INFO: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats
	2016-03-03 17:52:55 [scrapy] INFO: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware
	2016-03-03 17:52:55 [scrapy] INFO: Enabled item pipelines:
	2016-03-03 17:52:55 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023
	2016-03-03 17:52:55 [scrapy] INFO: Spider opened
	2016-03-03 17:52:56 [scrapy] DEBUG: Crawled (200) <GET http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html> (referer: None)
	[s] Available Scrapy objects:
	[s] crawler <scrapy.crawler.Crawler object at 0x1c5dc50>
	[s] item {}
	[s] request <GET http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html>
	[s] response <200 http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html>
	[s] settings <scrapy.settings.Settings object at 0x1c5db50>
	[s] spider <DefaultSpider 'default' at 0x7fd110017950>
	[s] Useful shortcuts:
	[s] shelp() Shell help (print this help)
	[s] fetch(req_or_url) Fetch request (or URL) and update local objects
	[s] view(response) View response in a browser
	2016-03-03 17:52:57 [root] DEBUG: Using default logger
	2016-03-03 17:52:57 [root] DEBUG: Using default logger

	In [1]: response.xpath("//meta").extract()
	Out[1]:
	[u'<meta http-equiv="content-language" content="de">',
	u'<meta name="keywords" content="Sport, Formel 1, Michael Schumacher">',
	u'<meta name="news_keywords" content="Michael, Schumacher, Sport, Formel 1">',
	u'<meta name="description" content="Michael Schumacher redet, aber nur in einem Video. Es sind Aufnahmen aus der Vergangenheit. Aus seiner einzigartigen Karriere. Einer, der nun eine Ausstellung gewidmet wird. Zur Er\xf6ffnung ist auch seine Familie da.">',
	u'<meta name="robots" content="index,follow">',
	u'<meta name="robots" content="noodp">',
	u'<meta name="robots" content="noarchive">',
	u'<meta name="revisit-after" content="1 hour">',
	u'<meta name="pragma" content="no-cache">',
	u'<meta name="cache-control" content="no-cache">',
	u'<meta name="date" content="2016-02-15T19:57:21+01:00">',
	u'<meta name="author" content="FOCUS Online">',
	u'<meta name="msvalidate.01" content="18FEC93808733136F33F992192FA967D">',
	u'<meta name="apple-itunes-app" content="app-id=519052678,app-argument=focus://article?id=5286608, affiliate-data=at=11lNpf&ct=FOL_Smartbanner">',
	u'<meta name="al:ios:url" content="focus://article?id=5286608">',
	u'<meta name="al:ios:app_store_id" content="519052678">',
	u'<meta name="al:ios:app_name" content="FOCUS Online - Nachrichten">',
	u'<meta name="al:android:url" content="focus://article/id/5286608">',
	u'<meta name="al:android:package" content="de.cellular.focus">',
	u'<meta name="al:android:app_name" content="FOCUS Online - Nachrichten">',
	u'<meta name="fol_app" content="true">',
	u'<meta property="fb:admins" content="100001540876802, 694311112, 1679965273">',
	u'<meta property="fb:page_id" content="147940371990513">',
	u'<meta property="og:type" content="article">',
	u'<meta property="og:url" content="http://www.focus.de/sport/formel1/michael-schumacher-in-schumachers-abwesenheit-ausstellung-zur-formel-1-karriere-eroeffnet_id_5286608.html">',
	u'<meta property="og:title" content="In Schumachers Abwesenheit: Ausstellung zur Formel 1-Karriere er\xf6ffnet">',
	u'<meta property="og:site_name" content="FOCUS Online">',
	u'<meta name="news_synd" content="false">',
	u'<meta name="fol_ct" content="news">',
	u'<meta name="fol_iscomment" content="false">',
	u'<meta http-equiv="content-type" content="text/html; charset=UTF-8">',
	u'<meta http-equiv="expires" content="-1">',
	u'<meta content="http://p5.focus.de/img/fotos/crop5285797/2589045353-w400-h300-o-q75-p5/urn-newsml-dpa-com-20090101-160215-99-749497-large-4-3.jpg" property="imgforsearch">',
	u'<meta content="http://p5.focus.de/img/fotos/crop5285797/5872719666-w1200-h627-o-q75-p5/urn-newsml-dpa-com-20090101-160215-99-749497-large-4-3.jpg" property="og:image">',
	u'<meta content="1200" property="og:image:width">',
	u'<meta content="627" property="og:image:height">',
	u'<meta itemprop="worstRating" content="1">',
	u'<meta itemprop="bestRating" content="5">',
	u'<meta itemprop="ratingCount" content="1">',
	u'<meta itemprop="ratingValue" content="3.0">']