Skip to content

Instantly share code, notes, and snippets.

@rmax
Created January 25, 2010 17:24
Show Gist options
  • Save rmax/286045 to your computer and use it in GitHub Desktop.
Save rmax/286045 to your computer and use it in GitHub Desktop.
import unittest
from scrapy.contrib_exp.spiders.url import UrlSpider, UrlMatcher
from scrapy.contrib_exp.url.extractor import SgmlUrlExtractor
from scrapy.http import HtmlResponse, Request
def test_callable(response):
pass
def capitalize_urls(urls):
for url in urls:
yield url.upper()
class TesSpider(UrlSpider):
url_extractors = [
SgmlUrlExtractor(),
]
url_processors = [
capitalize_urls,
]
url_matchers = [
UrlMatcher(r'URL1', 'testmethod', 'argument'),
UrlMatcher(r'URL2', test_callable, kwargument='argument'),
]
def testmethod(self, response):
return Request('URL3')
response_body = '''
<html>
<head></head>
<body>
<a href="url1">Link1</a>
<a href="url2">Link2</a>
</body>
</html>
'''
class UrlSpiderTest(unittest.TestCase):
def setUp(self):
self.s = TesSpider()
self.r = HtmlResponse('url', body=response_body)
def _compare_requests(self, reqs, other_reqs):
self.assertEquals(len(reqs), len(other_reqs))
for i, r in enumerate(reqs):
self.assertEquals(r.url, other_reqs[i].url)
def test_compile_matchers(self):
self.assertEquals(self.s._url_matchers[0].callback, self.s.testmethod)
self.assertEquals(self.s._url_matchers[1].callback, test_callable)
def test_extract_urls(self):
self.assertEquals(self.s._extract_urls(self.r), ['url1', 'url2'])
def test_process_urls(self):
urls = ['url1', 'url2']
purls = [url for url in self.s._process_urls(urls)]
self.assertEquals(purls, ['URL1', 'URL2'])
def test_response_downloaded(self):
reqs = [Request('URL1'), Request('URL2')]
res = [r for r in self.s._response_downloaded(self.r)]
self._compare_requests(res, reqs)
for i, r in enumerate(res):
callbacks = r.deferred.callbacks[0][0]
matcher = self.s._url_matchers[i]
self.assertEquals(callbacks[0], self.s._response_downloaded)
self.assertEquals(callbacks[1], (matcher.callback,))
self.assertEquals(callbacks[2], {'cb_args': matcher.cb_args,
'cb_kwargs': matcher.cb_kwargs})
def test_response_downloaded_with_callback(self):
reqs = [Request('URL3'), Request('URL1'), Request('URL2')]
res = [r for r in \
self.s._response_downloaded(self.r, callback=self.s.testmethod)]
self._compare_requests(res, reqs)
def test_parse(self):
res1 = [r for r in self.s._response_downloaded(self.r)]
res2 = [r for r in self.s.parse(self.r)]
self._compare_requests(res1, res2)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment