Skip to content

Instantly share code, notes, and snippets.

@masnun
Created December 16, 2015 12:58
Show Gist options
  • Save masnun/e85b38a00a74737bb3eb to your computer and use it in GitHub Desktop.
Save masnun/e85b38a00a74737bb3eb to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
class MySpider(scrapy.Spider):
name = "myspider"
def start_requests(self):
return [
scrapy.Request('http://masnun.com')
]
def parse(self, response):
anchors = response.css('a::text').extract()
for a in anchors:
yield {'text': a}
rom pymongo import MongoClient
from scrapy.conf import settings
import logging
class MongoPipeline(object):
def __init__(self):
connection = MongoClient(settings['MONGODB_HOST'], settings['MONGODB_PORT'])
self.db = connection[settings['MONGODB_DATABASE']]
def process_item(self, item, spider):
collection = self.db[type(item).__name__.lower()]
logging.info(collection.insert(dict(item)))
return item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment