Skip to content

Instantly share code, notes, and snippets.

@pkillnine
Created December 25, 2016 13:48
Show Gist options
  • Save pkillnine/14580b85d2f4e0ac707f89b472458924 to your computer and use it in GitHub Desktop.
Save pkillnine/14580b85d2f4e0ac707f89b472458924 to your computer and use it in GitHub Desktop.
def extract_tag_words(self, elem):
"""Extract tag words form the given element."""
attr_extractors = {
"alt": lambda elem: elem["alt"],
"name": lambda elem: elem["name"],
"title": lambda elem: elem["title"],
"placeholder": lambda elem: elem["placeholder"],
"src": lambda elem: elem["src"].split('/')[-1],
"href": lambda elem: elem["href"].split('/')[-1],
"text": str,
"value": lambda elem: elem["value"]
}
extractable_attrs = collections.defaultdict(list, {
"img": ["alt", "title", "src"],
"a": ["title", "href", "text"],
"input": ["name", "placeholder"],
"textarea": ["name", "placeholder"],
"button": ["text", "value"]
})
return (attr_extractors[attr](elem)
for attr in extractable_attrs[elem.tag_name()]
if attr in elem or attr == "text")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment