domluna · September 14, 2015 01:46
diff --git a/scraper.py b/scraper.py
 from __future__ import print_function
 from scrapy.selector import Selector
 import requests

 def scrape_comments(url):
    comments = []
    r = requests.get(url).text
    sel = Selector(text=r)

    rough_comments = sel.xpath("//span[@class='comment']/span")
    for c in rough_comments:
        # For some reason the initial paragraph
        # of a comment isn't inside a <p> tag
        first_paragraph = c.xpath('./text()')
        # The following paragraph are in a <p> tag
        rest = c.xpath('./p/text()')

        # loop here just in case it's more
        # than 1 paragraph.
        for pg in first_paragraph:
            comments.append(pg.extract())

        for pg in rest:
            comments.append(pg.extract())

    return comments

 if __name__ == '__main__':
    comments = scrape_comments('https://news.ycombinator.com/item?id=10212770')
    print(comments, len(comments))
	from __future__ import print_function
	from scrapy.selector import Selector
	import requests

	def scrape_comments(url):
	comments = []
	r = requests.get(url).text
	sel = Selector(text=r)

	rough_comments = sel.xpath("//span[@class='comment']/span")
	for c in rough_comments:
	# For some reason the initial paragraph
	# of a comment isn't inside a <p> tag
	first_paragraph = c.xpath('./text()')
	# The following paragraph are in a <p> tag
	rest = c.xpath('./p/text()')

	# loop here just in case it's more
	# than 1 paragraph.
	for pg in first_paragraph:
	comments.append(pg.extract())

	for pg in rest:
	comments.append(pg.extract())

	return comments

	if __name__ == '__main__':
	comments = scrape_comments('https://news.ycombinator.com/item?id=10212770')
	print(comments, len(comments))
No results found