Skip to content

Instantly share code, notes, and snippets.

@simonw
Created April 30, 2009 11:19
Show Gist options
  • Save simonw/104413 to your computer and use it in GitHub Desktop.
Save simonw/104413 to your computer and use it in GitHub Desktop.
Turn a BeautifulSoup form in to a dict of fields and default values - useful for screen scraping forms and then resubmitting them
def extract_form_fields(self, soup):
"Turn a BeautifulSoup form in to a dict of fields and default values"
fields = {}
for input in soup.findAll('input'):
# ignore submit/image with no name attribute
if input['type'] in ('submit', 'image') and not input.has_key('name'):
continue
# single element nome/value fields
if input['type'] in ('text', 'hidden', 'password', 'submit', 'image'):
value = ''
if input.has_key('value'):
value = input['value']
fields[input['name']] = value
continue
# checkboxes and radios
if input['type'] in ('checkbox', 'radio'):
value = ''
if input.has_key('checked'):
if input.has_key('value'):
value = input['value']
else:
value = 'on'
if fields.has_key(input['name']) and value:
fields[input['name']] = value
if not fields.has_key(input['name']):
fields[input['name']] = value
continue
assert False, 'input type %s not supported' % input['type']
# textareas
for textarea in soup.findAll('textarea'):
fields[textarea['name']] = textarea.string or ''
# select fields
for select in soup.findAll('select'):
value = ''
options = select.findAll('option')
is_multiple = select.has_key('multiple')
selected_options = [
option for option in options
if option.has_key('selected')
]
# If no select options, go with the first one
if not selected_options and options:
selected_options = [options[0]]
if not is_multiple:
assert(len(selected_options) < 2)
if len(selected_options) == 1:
value = selected_options[0]['value']
else:
value = [option['value'] for option in selected_options]
fields[select['name']] = value
return fields
@jfialkoff
Copy link

Thanks! That saved me a bunch of time.

@ltiao
Copy link

ltiao commented Feb 23, 2013

oh this is good

@asifmallik
Copy link

This turned out to be very helpful but I am not sure why there is a self.

@raphaelm
Copy link

Hey @simonw, Would you mind putting this under a free license, e.g. the Apache or MIT license? :)

@mikers1
Copy link

mikers1 commented Aug 20, 2016

Thx!

@genunity
Copy link

def extract_form_fields(soup):
    "Turn a BeautifulSoup form in to a dict of fields and default values"
    fields = {}
    for input in soup.findAll('input'):
        # ignore submit/image with no name attribute
        if input['type'] in ('submit', 'image') and not 'name' in input:
            continue

        # single element nome/value fields
        if input['type'] in ('text', 'hidden', 'password', 'submit', 'image'):
            value = ''
            if 'value' in input:
                value = input['value']
            fields[input['name']] = value
            continue

        # checkboxes and radios
        if input['type'] in ('checkbox', 'radio'):
            value = ''
            if input.has_attr("checked"):
                if input.has_attr('value'):
                    value = input['value']
                else:
                    value = 'on'
            if 'name' in input and value:
                fields[input['name']] = value

            if not 'name' in input:
                fields[input['name']] = value

            continue

        assert False, 'input type %s not supported' % input['type']

    # textareas
    for textarea in soup.findAll('textarea'):
        fields[textarea['name']] = textarea.string or ''

    # select fields
    for select in soup.findAll('select'):
        value = ''
        options = select.findAll('option')
        is_multiple = select.has_key('multiple')
        selected_options = [
            option for option in options
            if option.has_key('selected')
        ]

        # If no select options, go with the first one
        if not selected_options and options:
            selected_options = [options[0]]

        if not is_multiple:
            assert(len(selected_options) < 2)
            if len(selected_options) == 1:
                value = selected_options[0]['value']
        else:
            value = [option['value'] for option in selected_options]

        fields[select['name']] = value

    return fields

slightly modified python3

@malaterre
Copy link

thanks ! It was missing a :

        if not 'type' in input:
          continue

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment