Created
April 30, 2009 11:19
-
-
Save simonw/104413 to your computer and use it in GitHub Desktop.
Turn a BeautifulSoup form in to a dict of fields and default values - useful for screen scraping forms and then resubmitting them
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_form_fields(self, soup): | |
"Turn a BeautifulSoup form in to a dict of fields and default values" | |
fields = {} | |
for input in soup.findAll('input'): | |
# ignore submit/image with no name attribute | |
if input['type'] in ('submit', 'image') and not input.has_key('name'): | |
continue | |
# single element nome/value fields | |
if input['type'] in ('text', 'hidden', 'password', 'submit', 'image'): | |
value = '' | |
if input.has_key('value'): | |
value = input['value'] | |
fields[input['name']] = value | |
continue | |
# checkboxes and radios | |
if input['type'] in ('checkbox', 'radio'): | |
value = '' | |
if input.has_key('checked'): | |
if input.has_key('value'): | |
value = input['value'] | |
else: | |
value = 'on' | |
if fields.has_key(input['name']) and value: | |
fields[input['name']] = value | |
if not fields.has_key(input['name']): | |
fields[input['name']] = value | |
continue | |
assert False, 'input type %s not supported' % input['type'] | |
# textareas | |
for textarea in soup.findAll('textarea'): | |
fields[textarea['name']] = textarea.string or '' | |
# select fields | |
for select in soup.findAll('select'): | |
value = '' | |
options = select.findAll('option') | |
is_multiple = select.has_key('multiple') | |
selected_options = [ | |
option for option in options | |
if option.has_key('selected') | |
] | |
# If no select options, go with the first one | |
if not selected_options and options: | |
selected_options = [options[0]] | |
if not is_multiple: | |
assert(len(selected_options) < 2) | |
if len(selected_options) == 1: | |
value = selected_options[0]['value'] | |
else: | |
value = [option['value'] for option in selected_options] | |
fields[select['name']] = value | |
return fields | |
thanks ! It was missing a :
if not 'type' in input:
continue
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
slightly modified python3