Skip to content

Instantly share code, notes, and snippets.

@renfredxh
Forked from simonw/gist:104413
Last active December 21, 2015 15:59
Show Gist options
  • Save renfredxh/6330738 to your computer and use it in GitHub Desktop.
Save renfredxh/6330738 to your computer and use it in GitHub Desktop.
Turn a BeautifulSoup form in to url encoded fields and default values - useful for screen scraping forms and then resubmitting them
from urllib.parse import urlencode
def extract_form_fields(self, soup):
"Turn a BeautifulSoup form into url encoded fields and default values"
fields = ""
for input in soup.findAll('input'):
# ignore submit/image with no name attribute
if input['type'] in ('submit', 'image') and not input.has_attr('name'):
continue
# single element nome/value fields
if input['type'] in ('text', 'hidden', 'password', 'submit', 'image'):
value = ''
if input.has_attr('value'):
value = input['value']
fields += urlencode({input['name']:value}) + '&'
continue
# checkboxes and radios
if input['type'] in ('checkbox', 'radio'):
value = ''
if input.has_attr('checked'):
if input.has_attr('value'):
value = input['value']
else:
value = 'on'
if fields.has_attr(input['name']) and value:
fields += urlencode({input['name']:value}) + '&'
if not fields.has_attr(input['name']):
fields += urlencode({input['name']:value}) + '&'
# textareas
for textarea in soup.findAll('textarea'):
fields += urlencode({input['name']:textarea.string or ''}) + '&'
# select fields
for select in soup.findAll('select'):
value = ''
options = select.findAll('option')
is_multiple = select.has_attr('multiple')
selected_options = [
option for option in options
if option.has_attr('selected')
]
# If no select options, go with the first one
if not selected_options and options:
selected_options = [options[0]]
if not is_multiple:
assert(len(selected_options) < 2)
if len(selected_options) == 1:
value = selected_options[0]['value']
else:
value = [option['value'] for option in selected_options]
fields += urlencode({select['name']:value}) + '&'
return fields[:-1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment