-
-
Save CoMPaTech/f42a306dda106809f20d1cc6f8c16584 to your computer and use it in GitHub Desktop.
Turn a BeautifulSoup form in to a dict of fields and default values - useful for screen scraping forms and then resubmitting them
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_form_fields(self, soup): | |
"Turn a BeautifulSoup form in to a dict of fields and default values" | |
fields = {} | |
for input in soup.findAll('input'): | |
# ignore submit/image with no name attribute | |
if input['type'] in ('submit', 'image') and not input.has_key('name'): | |
continue | |
# single element nome/value fields | |
if input['type'] in ('text', 'hidden', 'password', 'submit', 'image'): | |
value = '' | |
if input.has_key('value'): | |
value = input['value'] | |
fields[input['name']] = value | |
continue | |
# checkboxes and radios | |
if input['type'] in ('checkbox', 'radio'): | |
value = '' | |
if input.has_key('checked'): | |
if input.has_key('value'): | |
value = input['value'] | |
else: | |
value = 'on' | |
if fields.has_key(input['name']) and value: | |
fields[input['name']] = value | |
if not fields.has_key(input['name']): | |
fields[input['name']] = value | |
continue | |
assert False, 'input type %s not supported' % input['type'] | |
# textareas | |
for textarea in soup.findAll('textarea'): | |
fields[textarea['name']] = textarea.string or '' | |
# select fields | |
for select in soup.findAll('select'): | |
value = '' | |
options = select.findAll('option') | |
is_multiple = select.has_key('multiple') | |
selected_options = [ | |
option for option in options | |
if option.has_key('selected') | |
] | |
# If no select options, go with the first one | |
if not selected_options and options: | |
selected_options = [options[0]] | |
if not is_multiple: | |
assert(len(selected_options) < 2) | |
if len(selected_options) == 1: | |
value = selected_options[0]['value'] | |
else: | |
value = [option['value'] for option in selected_options] | |
fields[select['name']] = value | |
return fields | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment