-
-
Save renfredxh/6330738 to your computer and use it in GitHub Desktop.
Turn a BeautifulSoup form in to url encoded fields and default values - useful for screen scraping forms and then resubmitting them
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from urllib.parse import urlencode | |
| def extract_form_fields(self, soup): | |
| "Turn a BeautifulSoup form into url encoded fields and default values" | |
| fields = "" | |
| for input in soup.findAll('input'): | |
| # ignore submit/image with no name attribute | |
| if input['type'] in ('submit', 'image') and not input.has_attr('name'): | |
| continue | |
| # single element nome/value fields | |
| if input['type'] in ('text', 'hidden', 'password', 'submit', 'image'): | |
| value = '' | |
| if input.has_attr('value'): | |
| value = input['value'] | |
| fields += urlencode({input['name']:value}) + '&' | |
| continue | |
| # checkboxes and radios | |
| if input['type'] in ('checkbox', 'radio'): | |
| value = '' | |
| if input.has_attr('checked'): | |
| if input.has_attr('value'): | |
| value = input['value'] | |
| else: | |
| value = 'on' | |
| if fields.has_attr(input['name']) and value: | |
| fields += urlencode({input['name']:value}) + '&' | |
| if not fields.has_attr(input['name']): | |
| fields += urlencode({input['name']:value}) + '&' | |
| # textareas | |
| for textarea in soup.findAll('textarea'): | |
| fields += urlencode({input['name']:textarea.string or ''}) + '&' | |
| # select fields | |
| for select in soup.findAll('select'): | |
| value = '' | |
| options = select.findAll('option') | |
| is_multiple = select.has_attr('multiple') | |
| selected_options = [ | |
| option for option in options | |
| if option.has_attr('selected') | |
| ] | |
| # If no select options, go with the first one | |
| if not selected_options and options: | |
| selected_options = [options[0]] | |
| if not is_multiple: | |
| assert(len(selected_options) < 2) | |
| if len(selected_options) == 1: | |
| value = selected_options[0]['value'] | |
| else: | |
| value = [option['value'] for option in selected_options] | |
| fields += urlencode({select['name']:value}) + '&' | |
| return fields[:-1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment