Skip to content

Instantly share code, notes, and snippets.

@hackprime
Last active April 7, 2020 17:58
Show Gist options
  • Save hackprime/06573315e0d0a4f7b17e to your computer and use it in GitHub Desktop.
Save hackprime/06573315e0d0a4f7b17e to your computer and use it in GitHub Desktop.
Register many applications at Yandex OAuth Service by single run using spider on Scrapy
import scrapy
class AppCredentialsItem(scrapy.Item):
domain = scrapy.Field()
public_key = scrapy.Field()
secret_key = scrapy.Field()
import string
from scrapy.loader import ItemLoader
from scrapy.loader.processors import MapCompose, TakeFirst
from .items import AppCredentialsItem
class CredentialsItemLoader(ItemLoader):
default_input_processor = MapCompose(lambda s: s.strip() if isinstance(s, basestring) else s)
default_output_processor = TakeFirst()
default_item_class = AppCredentialsItem
domain_in = MapCompose(string.lower)
DUPEFILTER_DEBUG = True
BOT_NAME = 'yandex_oauth_app_creator'
SPIDER_MODULES = ['yandex_oauth_app_creator.spiders']
NEWSPIDER_MODULE = 'yandex_oauth_app_creator.spiders'
DOWNLOAD_DELAY = 2
CONCURRENT_REQUESTS_PER_DOMAIN = 1
"""
$ scrapy crawl oauth -o keys_dump.json
"""
import json
import scrapy
from scrapy import FormRequest, Request
from ..loaders import CredentialsItemLoader
INPUT_DATA_FILE_PATH = '/Users/hackprime/Projects/projectname/tmp/yandex_apps_data.json'
SCOPES = {'scopes:login:email': 'login:email', 'scopes:login:info': 'login:info'}
CREDENTIALS = {'login': 'your_login', 'passwd': 'your_password'}
class OauthSpider(scrapy.Spider):
name = "oauth"
allowed_domains = ["oauth.yandex.com", "passport.yandex.com"]
start_urls = (
'https://passport.yandex.com/passport?mode=auth&retpath=https://oauth.yandex.com',
)
def parse(self, response):
return FormRequest.from_response(response,
formdata=CREDENTIALS,
callback=self.after_login)
def after_login(self, response):
if 'List of registered clients' not in response.body_as_unicode():
self.logger.error("Login failed")
return
self.logger.info("Login Successful")
return Request('https://oauth.yandex.ru/client/new',
callback=self.create_applications)
def create_applications(self, response):
with open(DATA_FILE_PATH) as data_file:
domains_data = json.load(data_file)
for domain_item in domains_data:
form_data = dict(domain_item, **SCOPES)
yield FormRequest.from_response(response,
formdata=form_data,
formxpath='//div[@class="layout-content"]/form',
callback=self.after_submit_application_form,
meta={'domain_item': domain_item})
def after_submit_application_form(self, response):
app_title = response.request.meta['domain_item']['title']
if '/client/new' in response.url:
self.logger.error('Error while creating application %s' % app_title)
return
self.logger.info('Successfully created application %s' % app_title)
return self.parse_app_page(response)
def parse_app_page(self, response):
l = CredentialsItemLoader(response=response)
l.add_css('domain', '.layout-content .clientinfo-name .pageTitle::text')
l.add_css('public_key', '.layout-content .clientinfo-owner-info::text', re=r'ID:\s(\w+)')
l.add_css('secret_key', '.layout-content .clientinfo-owner-info::text', re=r'Password:\s(\w+)')
return l.load_item()
[
{
"callback": "http://domain1.ru/social/complete/yandex-oauth2/",
"homepage": "http://domain1.ru",
"description": "domain1.ru auth app",
"title": "DOMAIN1.RU"
},
{
"callback": "http://domain2.ru/social/complete/yandex-oauth2/",
"homepage": "http://domain2.ru",
"description": "domain2.ru auth app",
"title": "DOMAIN2.RU"
}
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment