Last active
June 14, 2021 21:37
-
-
Save rgov/ea3151f016aa2c2ce4b0893b07210f11 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Helps you submit an HTML form like a browser would. | |
form = Form(soup.find('form', name='my_form')) | |
requests.request( | |
form.get_method('login'), | |
form.get_action('login', relative_to='http://example.com/'), | |
data=form.fill('login', { | |
'username': 'bob', | |
'password': 'hunter2', | |
}) | |
) | |
The values passed to Form.fill() override default values. | |
Above, 'login' is the submit button to use. If omitted, the first submit button | |
(<input type="submit"> or <button type="submit">) is used. | |
Known issues: | |
- Cannot uncheck a checkbox whose default state is checked | |
- Does not find the default selected <option> for a <select> | |
''' | |
import urllib.parse | |
# see https://www.w3.org/TR/html52/sec-forms.html | |
class Form: | |
def __init__(self, form): | |
assert form.name == 'form' # feed me BeautifulSoup <form> tags | |
self.form = form | |
self._action = form.get('action', '') | |
self._method = form.get('method', 'GET') | |
self.fields = {} | |
self.buttons = {} | |
for field in form.find_all(('input', 'button', 'select', 'textarea')): | |
name = field.get('name') | |
if not name: | |
continue | |
self.fields[name] = (field, field.get('value')) | |
if field.name in ('input', 'button'): | |
if field.get('type') == 'submit': | |
self.buttons[name] = (field.get('formaction'), | |
field.get('formmethod')) | |
def _get_default_button(self): | |
if self.buttons: | |
return next(iter(self.buttons.keys())) | |
def get_action(self, button=None, relative_to=''): | |
# Get default submit button if none specified | |
if button is None: | |
button = self._get_default_button() | |
# Use the submit button's formaction if available | |
action = None if button is None else self.buttons[button][0] | |
action = action or self._action | |
return urllib.parse.urljoin(relative_to, action) | |
def get_method(self, button=None): | |
if button is None: | |
button = self._get_default_button() | |
method = None if button is None else self.buttons[button][1] | |
return method or self._method | |
def _fill_impl(self, button, values): | |
filled = {} | |
for form_name, (field, default_value) in self.fields.items(): | |
# Skip disabled fields | |
if field.has_attr('disabled'): | |
continue | |
# Skip buttons that are not the submit button | |
is_button = (field.name == 'button') or (field.name == 'input' and \ | |
field.get('type') in ('submit', 'image', 'reset', 'button')) | |
if is_button and form_name != button: | |
continue | |
# Skip radio buttons and checkboxes that are not checked | |
is_radio_or_checkbox = (field.name == 'input' and \ | |
field.get('type') in ('radio', 'checkbox')) | |
if is_radio_or_checkbox and not field.has_attr('checked'): | |
continue | |
# Add the default value | |
if default_value is None: | |
if is_button: | |
default_value = 'Submit' | |
elif is_radio_or_checkbox: | |
default_value = 'on' | |
else: | |
default_value = '' | |
filled[form_name] = default_value | |
# Override any form values with our input | |
filled.update(values) | |
return filled | |
def fill(self, *args): | |
if len(args) == 0: | |
return self._fill_impl(self._get_default_button(), {}) | |
elif len(args) == 1: | |
return self._fill_impl(self._get_default_button(), args[0]) | |
elif len(args) == 2: | |
return self._fill_impl(args[0], args[1]) | |
raise ValueError('Expected fill(values) or fill(button, values)') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment