Last active
August 29, 2015 14:04
-
-
Save lightstrike/08322a945b7d8966a903 to your computer and use it in GitHub Desktop.
Simple CSS class extraction script from HTML file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Usage: get_classes_from_html.py path/to/file.html | |
""" | |
def convert_html_to_string(html_file_path): | |
html_file = open(html_file_path, 'r') | |
return html_file.read() | |
def get_css_classes(html_string, attribute='class'): | |
# TODO: Add dynamic attribute finding, agnostic quote types | |
# TODO: split up multiple classes | |
class_list = html_string.split('class="') | |
class_list.pop(0) # first item will not be in list due to split | |
for index, class_block in enumerate(class_list): | |
trim_index = class_block.find('"') | |
class_list[index] = ''.join(['.', class_block[0:trim_index], ' {\n}\n']) | |
return class_list | |
import sys | |
if __name__ == "__main__": | |
html_file_path = sys.argv[1] | |
html_string = convert_html_to_string(html_file_path) | |
css_classes = get_css_classes(html_string) | |
for css_class in css_classes: | |
print css_class |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment