|
#!/usr/bin/env python |
|
from itertools import islice, izip |
|
from lxml import etree, html |
|
import sys |
|
|
|
""" |
|
Takes a specified HTML, reads the client-side image map at the specified ID and |
|
generates an SVG of all of those areas. |
|
""" |
|
|
|
def parse_html(input_filename): |
|
""" |
|
Takes the filename of an HTML file and returns an ElementTree representation |
|
of its contents, using the BeautifulSoup parser as a fallback if needed. |
|
""" |
|
input_file = open(input_filename, 'rb') |
|
input_contents = input_file.read() |
|
input_file.close() |
|
|
|
# Thanks to http://lxml.de/elementsoup.html#using-soupparser-as-a-fallback |
|
# for this technique. |
|
html_root = html.fromstring(input_contents) |
|
try: |
|
ignore = etree.tostring(html_root, encoding=unicode) |
|
except UnicodeDecodeError: |
|
html_root = html.soupparser.fromstring(input_contents) |
|
|
|
return html_root |
|
|
|
def group_by(input_list, n): |
|
# Thanks to http://code.activestate.com/recipes/ |
|
# 303060-group-a-list-into-sequential-n-tuples/#c5 for this technique. |
|
return izip(*[islice(input_list, i, None, n) for i in range(n)]) |
|
|
|
def area_to_path_string(area): |
|
area_type = area.get('shape') |
|
coords_string = area.get('coords') |
|
if not area_type or not coords_string: |
|
return '' # This isn't good. |
|
|
|
# Make sure we have a comma-delimited set of integral coordinates. |
|
try: |
|
coords = [int(x.strip()) for x in coords_string.split(',')] |
|
except ValueError: |
|
return '' # Non-integral coordinates |
|
|
|
if area_type == 'polygon': |
|
# Draw a polygon as multiple line segments. |
|
if len(coords) % 2 != 0: |
|
return '' # Not an even number of coordinates |
|
|
|
coord_pairs = group_by(coords, 2) |
|
|
|
return 'M %s' % ' L '.join([ |
|
'%(x)s,%(y)s' % { |
|
'x': point[0], |
|
'y': point[1] |
|
} for point in coord_pairs |
|
]) |
|
elif area_type == 'circle': |
|
# Draw a circle as two arcs. |
|
if len(coords) != 3: |
|
return '' # Not enough coordinates |
|
|
|
return 'M %(x)s,%(y)s m -%(r)s,0 a %(r)s,%(r)s 0 1,0 %(d)s,0 a %(r)s,%(r)s 0 1,0 -%(d)s,0' % { |
|
'x': coords[0], |
|
'y': coords[1], |
|
'r': coords[2], |
|
'd': 2 * coords[2] |
|
} |
|
else: # 'default' or 'rect' |
|
# Draw a rectangle as four line segments (three specified plus one |
|
# close-path command). |
|
if len(coords) != 4: |
|
return '' # Not enough coordinates |
|
|
|
return 'M %(xa)s,%(ya)s L %(xb)s,%(ya)s L %(xb)s,%(yb)s L %(xa)s,%(yb)s z' % { |
|
'xa': coords[0], |
|
'xb': coords[1], |
|
'ya': coords[2], |
|
'yb': coords[3] |
|
} |
|
|
|
return path_string |
|
|
|
def main(input_filename='', map_id=''): |
|
svg_root = etree.Element('svg', xmlns='http://www.w3.org/2000/svg') |
|
|
|
input_html = parse_html(input_filename) |
|
input_map = input_html.xpath('//*[@id="%s"]' % map_id) |
|
if input_map: |
|
input_map = input_map[0] |
|
|
|
# Build a path string for each class of <area> element in the specified map. |
|
input_map_classes = {} |
|
placeholder_counter = 0 |
|
for child in input_map: |
|
path_string = area_to_path_string(child) |
|
|
|
if 'class' in child.attrib: |
|
area_class = child.get('class') |
|
else: |
|
area_class = 'area_%s' % placeholder_counter |
|
placeholder_counter += 1 |
|
|
|
if area_class in input_map_classes: |
|
input_map_classes[area_class] += ' %s' % path_string |
|
else: |
|
input_map_classes[area_class] = path_string |
|
|
|
# Build a <path> element for each class and append it to the SVG tree. |
|
for area_class in input_map_classes: |
|
area_path = etree.Element('path') |
|
area_path.set('d', input_map_classes[area_class]) |
|
area_path.set('id', area_class) |
|
|
|
area_path.set('fill', 'none') |
|
area_path.set('stroke', 'black') |
|
area_path.set('stroke-width', '1') |
|
|
|
svg_root.append(area_path) |
|
|
|
output_svg = etree.ElementTree(svg_root) |
|
output_file = open('%s.svg' % input_filename, 'w') |
|
output_svg.write(output_file, encoding='utf-8', pretty_print=True, xml_declaration=True) |
|
output_file.close() |
|
|
|
if __name__ == '__main__': |
|
if len(sys.argv) != 3: |
|
sys.stderr.write("Usage: %s input_html map_id\n" % sys.argv[0]) |
|
sys.exit(1) |
|
else: |
|
main(*sys.argv[1:]) |
This is the version I needed to troubleshoot and process a map I had, it somehow had different attribute values to get it going
Exec: python convert.py file.html map1
from itertools import islice, izip
from lxml import etree, html
import sys
"""
Takes a specified HTML, reads the client-side image map at the specified ID and
generates an SVG of all of those areas.
"""
def parse_html(input_filename):
"""
Takes the filename of an HTML file and returns an ElementTree representation
of its contents, using the BeautifulSoup parser as a fallback if needed.
"""
input_file = open(input_filename, 'rb')
input_contents = input_file.read()
input_file.close()
def group_by(input_list, n):
# Thanks to http://code.activestate.com/recipes/
# 303060-group-a-list-into-sequential-n-tuples/#c5 for this technique.
return izip(*[islice(input_list, i, None, n) for i in range(n)])
def area_to_path_string(area):
area_type = area.get('shape')
coords_string = area.get('coords')
if not area_type or not coords_string:
print "# This isn't good."
return '' # This isn't good.
def main(input_filename='', map_id=''):
svg_root = etree.Element('svg', xmlns='http://www.w3.org/2000/svg')
if name == 'main':
if len(sys.argv) != 3:
sys.stderr.write("Usage: %s input_html map_id\n" % sys.argv[0])
sys.exit(1)
else:
main(*sys.argv[1:])