Last active
April 7, 2020 17:58
-
-
Save hackprime/06573315e0d0a4f7b17e to your computer and use it in GitHub Desktop.
Register many applications at Yandex OAuth Service by single run using spider on Scrapy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
class AppCredentialsItem(scrapy.Item): | |
domain = scrapy.Field() | |
public_key = scrapy.Field() | |
secret_key = scrapy.Field() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
from scrapy.loader import ItemLoader | |
from scrapy.loader.processors import MapCompose, TakeFirst | |
from .items import AppCredentialsItem | |
class CredentialsItemLoader(ItemLoader): | |
default_input_processor = MapCompose(lambda s: s.strip() if isinstance(s, basestring) else s) | |
default_output_processor = TakeFirst() | |
default_item_class = AppCredentialsItem | |
domain_in = MapCompose(string.lower) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DUPEFILTER_DEBUG = True | |
BOT_NAME = 'yandex_oauth_app_creator' | |
SPIDER_MODULES = ['yandex_oauth_app_creator.spiders'] | |
NEWSPIDER_MODULE = 'yandex_oauth_app_creator.spiders' | |
DOWNLOAD_DELAY = 2 | |
CONCURRENT_REQUESTS_PER_DOMAIN = 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
$ scrapy crawl oauth -o keys_dump.json | |
""" | |
import json | |
import scrapy | |
from scrapy import FormRequest, Request | |
from ..loaders import CredentialsItemLoader | |
INPUT_DATA_FILE_PATH = '/Users/hackprime/Projects/projectname/tmp/yandex_apps_data.json' | |
SCOPES = {'scopes:login:email': 'login:email', 'scopes:login:info': 'login:info'} | |
CREDENTIALS = {'login': 'your_login', 'passwd': 'your_password'} | |
class OauthSpider(scrapy.Spider): | |
name = "oauth" | |
allowed_domains = ["oauth.yandex.com", "passport.yandex.com"] | |
start_urls = ( | |
'https://passport.yandex.com/passport?mode=auth&retpath=https://oauth.yandex.com', | |
) | |
def parse(self, response): | |
return FormRequest.from_response(response, | |
formdata=CREDENTIALS, | |
callback=self.after_login) | |
def after_login(self, response): | |
if 'List of registered clients' not in response.body_as_unicode(): | |
self.logger.error("Login failed") | |
return | |
self.logger.info("Login Successful") | |
return Request('https://oauth.yandex.ru/client/new', | |
callback=self.create_applications) | |
def create_applications(self, response): | |
with open(DATA_FILE_PATH) as data_file: | |
domains_data = json.load(data_file) | |
for domain_item in domains_data: | |
form_data = dict(domain_item, **SCOPES) | |
yield FormRequest.from_response(response, | |
formdata=form_data, | |
formxpath='//div[@class="layout-content"]/form', | |
callback=self.after_submit_application_form, | |
meta={'domain_item': domain_item}) | |
def after_submit_application_form(self, response): | |
app_title = response.request.meta['domain_item']['title'] | |
if '/client/new' in response.url: | |
self.logger.error('Error while creating application %s' % app_title) | |
return | |
self.logger.info('Successfully created application %s' % app_title) | |
return self.parse_app_page(response) | |
def parse_app_page(self, response): | |
l = CredentialsItemLoader(response=response) | |
l.add_css('domain', '.layout-content .clientinfo-name .pageTitle::text') | |
l.add_css('public_key', '.layout-content .clientinfo-owner-info::text', re=r'ID:\s(\w+)') | |
l.add_css('secret_key', '.layout-content .clientinfo-owner-info::text', re=r'Password:\s(\w+)') | |
return l.load_item() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"callback": "http://domain1.ru/social/complete/yandex-oauth2/", | |
"homepage": "http://domain1.ru", | |
"description": "domain1.ru auth app", | |
"title": "DOMAIN1.RU" | |
}, | |
{ | |
"callback": "http://domain2.ru/social/complete/yandex-oauth2/", | |
"homepage": "http://domain2.ru", | |
"description": "domain2.ru auth app", | |
"title": "DOMAIN2.RU" | |
} | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment