Created
October 19, 2023 13:14
-
-
Save jnns/04149898f622ee5b0c4483fe378404f7 to your computer and use it in GitHub Desktop.
Use this to get a Python dictionary of the form data given in a HTML page. lxml is used to read the form data.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def form_data(response: HttpResponse, **kwargs) -> dict: | |
"""Read form data from an HTML response.""" | |
return parse_html_form(response.content, **kwargs) | |
def parse_html_form(html: str, **kwargs) -> dict: | |
"""Returns form data as a dict. | |
The form lookup can be narrowed down by using attributes to filter for | |
as `kwargs`. | |
""" | |
tree = lxml.html.fromstring(html) | |
filter = " and ".join(f"@{key}='{val}'" for key, val in kwargs.items()) | |
form = tree.xpath(f"//form[{filter}]" if filter else "//form")[0] | |
data = {} | |
for key, element in form.inputs.items(): | |
match element.value: | |
case lxml.html.CheckboxValues(): | |
data[key] = [sub.value for sub in element.value.group if sub.value] | |
case lxml.html.MultipleSelectOptions(): | |
data[key] = list(element.value) | |
case _: | |
data[key] = element.value or "" | |
return data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment