This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
import sys | |
class CraigslistSpider(scrapy.Spider): | |
name = 'craigslist' | |
allowed_domains = ['asheville.craigslist.org'] | |
start_urls = ['https://asheville.craigslist.org/search/sss'] | |
def parse(self, response): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import scrapy | |
import json | |
import requests | |
import re | |
from time import sleep | |
import sys | |
class LetgoSpider(scrapy.Spider): | |
name = 'letgo' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pipeline adjustment to export data to MongoDB | |
from pymongo import MongoClient | |
from scrapy.conf import settings | |
class MongoDBPipeline(object): | |
def __init__(self): | |
connection = MongoClient( | |
settings['MONGODB_SERVER'], | |
settings['MONGODB_PORT']) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
guards_advanced = urllib.request.urlopen("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885").read() | |
guards_advancedguards_ = bs.BeautifulSoup(guards_advanced, 'lxml') | |
#leaving out a number of lines necessary to extract data, see github repo for full code if you'd like. | |
guards_advanced_col_names = col_names.split() | |
print(guards_advanced_col_names) | |
#could also use pandas read_html method as well | |
guards_advanced_dfs = pd.read_html("https://rotogrinders.com/pages/nba-advanced-player-stats-guards-181885") | |
guards_advanced_stats_df = guards_advanced_dfs[2] | |
guards_advanced_stats_df.tail() |
NewerOlder