Skip to content

Instantly share code, notes, and snippets.

@un1tz3r0
Created June 19, 2021 17:47
Show Gist options
  • Save un1tz3r0/280ef1fbd74ad28b5ea8ea102df85b36 to your computer and use it in GitHub Desktop.
Save un1tz3r0/280ef1fbd74ad28b5ea8ea102df85b36 to your computer and use it in GitHub Desktop.
Misc python stuff for searching and comparing SVGs for similarity
import re
def parsesvg(svg_file):
from xml.dom import minidom
#from svg.path import parse_path
#import svg.path
#from shapely import geometry as g
#from shapely import ops
try:
with open(svg_file, "r") as f:
svg_string = f.read()
svg_dom = minidom.parseString(svg_string)
return svg_dom
except Exception as err:
print(f"[warning] error while parsing xml in svg file {repr(svg_file)}: {err}")
return None
def makesvgsymbol(filename, symbol_id):
from xml.dom import minidom
try:
with open(filename, "r") as f:
buf = f.read()
svg = minidom.parseString(buf)
except Exception as err:
#print(f"[warning] error while parsing xml in svg file {repr(svg_file)}: {err}")
#return None
raise
svg.documentElement.tagName = "symbol"
svg.documentElement.setAttribute("id", symbolid)
#breakpoint()
return svg.documentElement.toprettyxml()
colornames = {
'aliceblue': [240, 248, 255],
'antiquewhite': [250, 235, 215],
'aqua': [0, 255, 255],
'aquamarine': [127, 255, 212],
'azure': [240, 255, 255],
'beige': [245, 245, 220],
'bisque': [255, 228, 196],
'black': [0, 0, 0],
'blanchedalmond': [255, 235, 205],
'blue': [0, 0, 255],
'blueviolet': [138, 43, 226],
'brown': [165, 42, 42],
'burlywood': [222, 184, 135],
'cadetblue': [95, 158, 160],
'chartreuse': [127, 255, 0],
'chocolate': [210, 105, 30],
'coral': [255, 127, 80],
'cornflowerblue': [100, 149, 237],
'cornsilk': [255, 248, 220],
'crimson': [220, 20, 60],
'cyan': [0, 255, 255],
'darkblue': [0, 0, 139],
'darkcyan': [0, 139, 139],
'darkgoldenrod': [184, 134, 11],
'darkgray': [169, 169, 169],
'darkgreen': [0, 100, 0],
'darkgrey': [169, 169, 169],
'darkkhaki': [189, 183, 107],
'darkmagenta': [139, 0, 139],
'darkolivegreen': [85, 107, 47],
'darkorange': [255, 140, 0],
'darkorchid': [153, 50, 204],
'darkred': [139, 0, 0],
'darksalmon': [233, 150, 122],
'darkseagreen': [143, 188, 143],
'darkslateblue': [72, 61, 139],
'darkslategray': [47, 79, 79],
'darkslategrey': [47, 79, 79],
'darkturquoise': [0, 206, 209],
'darkviolet': [148, 0, 211],
'deeppink': [255, 20, 147],
'deepskyblue': [0, 191, 255],
'dimgray': [105, 105, 105],
'dimgrey': [105, 105, 105],
'dodgerblue': [30, 144, 255],
'firebrick': [178, 34, 34],
'floralwhite': [255, 250, 240],
'forestgreen': [34, 139, 34],
'fuchsia': [255, 0, 255],
'gainsboro': [220, 220, 220],
'ghostwhite': [248, 248, 255],
'gold': [255, 215, 0],
'goldenrod': [218, 165, 32],
'gray': [128, 128, 128],
'green': [0, 128, 0],
'greenyellow': [173, 255, 47],
'grey': [128, 128, 128],
'honeydew': [240, 255, 240],
'hotpink': [255, 105, 180],
'indianred': [205, 92, 92],
'indigo': [75, 0, 130],
'ivory': [255, 255, 240],
'khaki': [240, 230, 140],
'lavender': [230, 230, 250],
'lavenderblush': [255, 240, 245],
'lawngreen': [124, 252, 0],
'lemonchiffon': [255, 250, 205],
'lightblue' : [173, 216, 230],
'lightcoral': [240, 128, 128],
'lightcyan': [224, 255, 255],
'lightgoldenrodyellow': [250, 250, 210],
'lightgray': [211, 211, 211],
'lightgreen': [144, 238, 144],
'lightgrey': [211, 211, 211],
'lightpink': [255, 182, 193],
'lightsalmon': [255, 160, 122],
'lightseagreen': [32, 178, 170],
'lightskyblue': [135, 206, 250],
'lightslategray': [119, 136, 153],
'lightslategrey': [119, 136, 153],
'lightsteelblue': [176, 196, 222],
'lightyellow': [255, 255, 224],
'lime': [0, 255, 0],
'limegreen': [50, 205, 50],
'linen': [250, 240, 230],
'magenta': [255, 0, 255],
'maroon': [128, 0, 0],
'mediumaquamarine': [102, 205, 170],
'mediumblue': [0, 0, 205],
'mediumorchid': [186, 85, 211],
'mediumpurple': [147, 112, 219],
'mediumseagreen': [60, 179, 113],
'mediumslateblue': [123, 104, 238],
'mediumspringgreen': [0, 250, 154],
'mediumturquoise': [72, 209, 204],
'mediumvioletred': [199, 21, 133],
'midnightblue': [25, 25, 112],
'mintcream': [245, 255, 250],
'mistyrose': [255, 228, 225],
'moccasin': [255, 228, 181],
'navajowhite': [255, 222, 173],
'navy': [0, 0, 128],
'oldlace': [253, 245, 230],
'olive': [128, 128, 0],
'olivedrab': [107, 142, 35],
'orange': [255, 165, 0],
'orangered': [255, 69, 0],
'orchid': [218, 112, 214],
'palegoldenrod': [238, 232, 170],
'palegreen': [152, 251, 152],
'paleturquoise': [175, 238, 238],
'palevioletred': [219, 112, 147],
'papayawhip': [255, 239, 213],
'peachpuff': [255, 218, 185],
'peru': [205, 133, 63],
'pink': [255, 192, 203],
'plum': [221, 160, 221],
'powderblue': [176, 224, 230],
'purple': [128, 0, 128],
'red': [255, 0, 0],
'rosybrown': [188, 143, 143],
'royalblue': [65, 105, 225],
'saddlebrown': [139, 69, 19],
'salmon': [250, 128, 114],
'sandybrown': [244, 164, 96],
'seagreen': [46, 139, 87],
'seashell': [255, 245, 238],
'sienna': [160, 82, 45],
'silver': [192, 192, 192],
'skyblue': [135, 206, 235],
'slateblue': [106, 90, 205],
'slategray': [112, 128, 144],
'slategrey': [112, 128, 144],
'snow': [255, 250, 250],
'springgreen': [0, 255, 127],
'steelblue': [70, 130, 180],
'tan': [210, 180, 140],
'teal': [0, 128, 128],
'thistle': [216, 191, 216],
'tomato': [255, 99, 71],
'turquoise': [64, 224, 208],
'violet': [238, 130, 238],
'wheat': [245, 222, 179],
'white': [255, 255, 255],
'whitesmoke': [245, 245, 245],
'yellow': [255, 255, 0],
'yellowgreen': [154, 205, 50]
}
def parsestyle(text, debug=False):
import tinycss2, colorsys, binascii
fill = None
stroke = None
stroke_width = None
if debug:
print(f"[debug] entered parsestyle({repr(text)})")
def parsefuncdecl(functok):
args = list(filter(lambda tok: tok.type != 'whitespace' and not (tok.type == 'literal' and tok.value == ','), functok.arguments))
if functok.lower_name.startswith('hsl'):
h = args[0].value / 360.0
s = args[1].value / 100.0
l = args[2].value / 100.0
r, g, b = colorsys.hls_to_rgb(h, l, s)
return (r * 255, g * 255, b * 255, ((args[3].value * 255) if len(args) > 3 else 255))
elif functok.lower_name.startswith('rgb'):
r = args[0].value / (100.0 if args[0].type == 'percent' else 255.0)
g = args[1].value / (100.0 if args[1].type == 'percent' else 255.0)
b = args[2].value / (100.0 if args[2].type == 'percent' else 255.0)
return (r * 255, g * 255, b * 255, ((args[3].value * 255) if len(args) > 3 else 255))
else:
print(f"[warning] unknown color function {repr(functok)} in css color property value")
return None
def parsehash(hashtok):
s = hashtok.value
if len(s) in [3,4]:
s = "".join([ch + ch for ch in s])
if len(s) == 6:
s = s + "ff"
if len(s) != 8:
print(f"[warning] parse css color hash {repr(s)} has wrong length (expecting 8)")
r, g, b, a = [int(x) for x in binascii.a2b_hex(s)]
#print(repr([s, r, g, b, a]))
return [r, g, b, a]
for decl in tinycss2.parse_declaration_list(text): #"color: hsl(320deg, 10%, 80%); stroke: #fe0; stroke-width: 10; fill: rgb(100, 250, 25)")
if decl.type == 'declaration' and decl.lower_name in ['color', 'fill', 'stroke']:
vals = list(filter(lambda tok: tok.type != 'whitespace', decl.value))
for val in vals:
color = None
if val.type == 'function':
color = parsefuncdecl(val)
elif val.type == 'hash':
color = parsehash(val)
elif val.type == 'ident':
if val.lower_value in colornames.keys():
color = colornames[val.lower_value]
else:
print("[warning] Unknown identifier {repr(val.lower_value)} in css color property decl")
else:
print(f"[warning] Unknown value type {repr(val.type)} for css color property decl {repr(decl)}")
if color != None:
if decl.lower_name in ['fill', 'color']:
fill = color
elif decl.lower_name in ['stroke', 'color']:
stroke = color
elif decl.type == 'declaration' and decl.lower_name in ['stroke-width']:
vals = list(filter(lambda tok: tok.type != 'whitespace', decl.value))
for val in vals:
stroke_width = val.value
return [stroke_width, stroke, fill]
def parsecolor(svg_value):
import binascii, re, colorsys
s = svg_value.strip()
if s == "none":
return None
if s in colornames.keys():
r,g,b = colornames[s]
return (r, g, b, 255)
if s.startswith("#"):
h = s[1:]
if len(h) >= 3 and len(h) <= 4:
h = "".join([ch + ch for ch in h])
if len(h) == 6:
h = h + "ff"
if len(h) == 8:
return tuple([int(b) for b in binascii.a2b_hex(h)])
return None
m = re.match("(rgba?|hs[lv]a?|gr[ea]y)\s*\((.*)\)", s)
if m != None:
colorspace, params = m.group(1), m.group(2)
params = re.findall("\s*([^,]+)\s*", params)
for i in range(0, len(params)):
if colorspace.startswith("hs") and i == 0:
if params[i].endswith("deg"):
params[i] = params[i][:-4]
params[i] = float(params[i])/360.0
elif params[i].endswith("%"):
params[i] = float(params[i][:-2])/100.0
else:
params[i] = float(params[i])
if i > 2:
params[i] = params[i]/1.0
elif colorspace.startswith("hs"):
params[i] = params[i]/100.0
else:
params[i] = params[i]/255.0
if colorspace.startswith("gr") and len(params) > 0:
return (int(params[0] * 255), int(params[0] * 255), int(params[0] * 255), 255)
if colorspace.startswith("rgb") and len(params) >= 3:
return (int(params[0] * 255), int(params[1] * 255), int(params[2] * 255), 255 if len(params) < 4 else int(params[3] * 255))
if colorspace.startswith("hs") and len(params) >= 3:
rgb = colorsys.hls_to_rgb(params[0], params[2], params[1])
return (int(rgb[0] * 255), int(rgb[1] * 255), int(rgb[2] * 255), 255 if len(params) < 4 else int(params[3] * 255))
return None
def rgb8_to_hsl(rgb8):
if rgb8 == None or len(rgb8) < 3:
print(f"[warning] rgb8_to_hsl: bad rgb8 value ({repr(rgb8)})!")
return None
if len(rgb8) > 3:
af = int(rgb8[3])/255.0
else:
af = 1.0
import colorsys
hf, lf, sf = colorsys.rgb_to_hls(rgb8[0]/255.0, rgb8[1]/255.0, rgb8[2]/255.0)
return tuple([hf*360.0, sf*100.0, lf*100.0, af*100.0])
def analyzesvg(svg_dom, debug=True):
if svg_dom == None:
return [], []
docviewbox, docwidth, docheight = None, None, None
doctitle, docdesc, docids, docclasses = [], [], [], []
if 'viewBox' in svg_dom.documentElement.attributes.keys():
docviewbox = svg_dom.documentElement.attributes['viewBox']
if 'width' in svg_dom.documentElement.attributes.keys():
docwidth = svg_dom.documentElement.attributes['width']
if 'height' in svg_dom.documentElement.attributes.keys():
docheight = svg_dom.documentElement.attributes['height']
strokes = list()
fills = list()
if debug:
print(f"svg viewbox={docviewbox}")
# process elements
for el in svg_dom.getElementsByTagName("*"):
if debug:
print(f" element {repr(el)}")
if el.localName.lower() == "title":
doctitle = "".join([chel.nodeValue for chel in el.childNodes if chel.nodeType == 'text'])
if el.localName.lower() == "desc":
docdesc = "".join([chel.nodeValue for chel in el.childNodes if chel.nodeType == 'text'])
el_stroke = None
el_fill = None
el_stroke_width = None
# process attributes
for attr, value in el.attributes.items():
if debug:
print(f" attribute {repr(attr)}={repr(value)}")
if attr=='class':
for cls in re.findall("[^\\s]+", value):
docclasses.append(cls)
elif attr=='id':
docids.append(value)
elif attr=='stroke':
el_stroke = parsecolor(value)
elif attr=='stroke-width':
el_stroke_width = float(value)
elif attr=='fill':
el_fill = parsecolor(value)
elif attr=='style':
st_stroke_width, st_stroke, st_fill = parsestyle(value)
if st_stroke != None:
el_stroke = st_stroke
if st_stroke_width != None:
el_stroke_width = st_stroke_width
if st_fill != None:
el_fill = st_fill
if el_stroke_width != None or el_stroke != None:
strokes.append(tuple([el_stroke_width, rgb8_to_hsl(el_stroke)]))
if el_fill != None:
fills.append(rgb8_to_hsl(el_fill))
return [strokes, fills]
def analyzeallsvgs(pattern, debug=False):
from glob import glob
strokes = []
fills = []
for filename in glob(pattern, recursive=True):
if debug:
print(f">>> Analyzing file {repr(filename)}...")
filestrokes, filefills = analyzesvg(parsesvg(filename), debug=debug)
if debug:
print(f"<<< strokes in file: {filestrokes}")
print(f"<<< fills in file: {filefills}")
strokes = list(strokes) + list(filestrokes)
fills = list(fills) + list(filefills)
strokecolors = [strokecolor for strokewidth, strokecolor in strokes if strokecolor != None]
strokewidths = [strokewidth for strokewidth, strokecolor in strokes if strokewidth != None]
strokesats = [sat for hue, sat, lum, alpha in strokecolors]
strokelums = [lum for hue, sat, lum, alpha in strokecolors]
fillhuesats = [(hue, sat) for hue, sat, lum, alpha in fills]
fillsatlums = [(sat, lum) for hue, sat, lum, alpha in fills]
return [strokes, fills]
# -------------------------------------------------------------------------------
# a very basic k-means/elbow clustering
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# "wrote this up one day while sitting on the terlet. i do some of my best
# thinking there" - Author - Victor M. Condino - Wednesday, Feb. 3rd, 2020
#
# this is some code to find the optimal clustering of points in a n-dimensional
# dataset, when the number of clusters is not known uses the elbow-method
# -------------------------------------------------------------------------------
from itertools import zip_longest
from math import sqrt
import math
from wrapt import decorator
from itertools import permutations
def isstr(val):
return isinstance(val, (str, bytes))
def isseq(val):
from collections import abc
return isinstance(val, abc.Iterable) and not isstr(val)
@decorator
def elementwise(wrapped, instance, args, kwargs):
''' decorate a function so that when it is passed one or more array arguments, it is
called once for each element or permutation of elements if multiple arrays.
@elementwise
def product(x, y):
return x * y
>> list(product([1,2,3],[0.5, 2.0]))
--> [0.5, 1, 0.5, 2, 4, 6]
also works with non-array parameters:
>> list(product(-10.0, [0.0, -3.0, 9.9]))
--> [0, 30, -99]
'''
argseqs = [[arg] if not isseq(arg) else list(arg) for arg in args]
for perm in permutations(*argseqs):
yield wrapped(*perm, **kwargs)
def toseq(val):
if val == None:
return []
if isseq(val):
return val
return [val]
def fromseq(val):
if not isseq(val):
return val
if len(val) == 0:
return None
if len(val) == 1:
return fromseq(val[0])
return val
def avg(values):
try:
return [sum(col) / len(col) for col in zip_longest(*values)]
except TypeError: # indicates not all points have matching number of dimensions (or are even iterables)
return sum(values) / len(values)
def distance(a, b):
''' the euclidean distance from points a and b, given their N-dimensional coordinate tuples '''
if (not isseq(a)) and (not isseq(b)):
return ((a-b) if (a>b) else (b-a))
if not (isseq(a) and isseq(b) and len(a) == len(b)):
raise TypeError("a and b must be sequences of same length")
return math.sqrt(sum([(a[i] - b[i])*(a[i] - b[i]) for i in range(0, len(a))]))
def minindex(a):
''' return a tuple of the minimum index and the minimum element value in the given array '''
if not isseq(a):
raise TypeError("not a sequence")
if len(a) < 1:
raise LengthError("cannot find min index of empty sequence")
minv = a[0]
mini = 0
for i in range(1, len(a)):
if a[i] < minv:
mini = i
minv = a[i]
return mini, minv
def assignlabels(pts, centroids):
''' given a set of points and a set of centerpoints, label each point in the first set with
the index in the second set of the nearest point. '''
def nearestpointindex(apt, bpts):
i, v = minindex([distance(apt, bpts[i]) for i in range(0, len(bpts))])
return i
return [nearestpointindex(pt, centroids) for pt in pts]
def calccentroids(pts, labels):
''' given an array of points and an array of cluster-labels corresponding to those points,
return the centroid of the points for each unique label value '''
for label in set(labels):
center = avg([pts[i] for i in range(0, len(pts)) if labels[i] == label])
yield center
def samelabels(newlabels, oldlabels):
''' compares two arrays of point-cluster labels to determine when the k-means algorithm
has converged on the optimal clustering '''
if len(newlabels) != len(oldlabels):
raise LengthError("number of elements is inconsistent")
for i in range(0, len(newlabels)):
if newlabels[i] != oldlabels[i]:
return False
return True
def kmeans(pts, k):
''' k-means algorithm to cluster points in pts into k groups minimizing the distortion, or
euclidean distance from each point to the centroid (or average) of the points in the group
returns a tuple of (labels, centroids) where labels is an array of the same length as pts,
with the label (0..k-1) assigned to each corresponding point in pts. the second result,
centroids, is an array with k elements which contains the centroid of the points labeled
in each cluster. '''
centroids = list(sorted(list(set(pts))[0:k]))
labels = assignlabels(pts, centroids)
while True:
newcentroids = list(sorted(list(calccentroids(pts, labels))))
newlabels = assignlabels(pts, newcentroids)
if samelabels(newlabels, labels):
break
labels = newlabels
centroids = newcentroids
return newlabels, newcentroids
def sqr(x):
return x * x
def abs(x):
return x if x >= 0 else -x
def wcss(pts, labels, centroids):
''' within-cluster-sum-of-squared error function
gives the sum of the distances from each point to the centroid of the cluster its labeled with,
used to evaluate distortion at varying values of k for optimizing k-means when the number of
clusters is not initially known. does a pretty good job '''
errors = [sqr(distance(pts[i], centroids[labels[i]])) for i in range(0, len(pts))]
return sum(errors)
def elbow(kwcsspts):
''' where pts is an array of N 2-dimensional point coordinate pairs, draw a line from
pts[0] to pts[N-1] and find the point in pts which is farthest from the line. this is
the 'elbow', and if y is wcss of k-means where k is x, then the elbow is the optimal
value of k. '''
x1, y1 = kwcsspts[0]
x2, y2 = kwcsspts[-1]
results = []
for pindex, pt in enumerate(kwcsspts):
x0, y0 = pt
d = abs((x2-x1)*(y1-y0)-(x1-x0)*(y2-y1))/math.sqrt(sqr(x2-x1)+sqr(y2-y1))
results.append(-d)
return kwcsspts[minindex(results)[0]]
def optimizek(pts, mink, maxk):
''' select optimal k from the range mink..maxk for k-means clustering of data in pts '''
kwcss = {}
kcentroids = {}
klabels = {}
for k in range(mink, maxk):
labs, cens = kmeans(pts, k)
kcentroids[k] = cens
klabels[k] = labs
kwcss[k] = wcss(pts, labs, cens)
k, err = elbow(list(kwcss.items()))
return k, err, kcentroids[k], klabels[k]
# -------------------------------------------------------------------
# Lab-colorspace perceptual colorimetric comparison support functions
# -------------------------------------------------------------------
def hsl_to_lab(h, s, l):
from colormath.color_objects import LabColor, XYZColor, sRGBColor, HSLColor
from colormath.color_conversions import convert_color
hsl = HSLColor((h%360.0+360.0)%360.0, s/100.0, l/100.0)
xyz = convert_color(hsl, XYZColor, through_rgb_type=sRGBColor)
lab = convert_color(xyz, LabColor)
return lab.get_value_tuple()
def lab_to_hsl(l, a, b):
from colormath.color_objects import LabColor, XYZColor, sRGBColor, HSLColor
from colormath.color_conversions import convert_color
lab = LabColor(l, a, b)
xyz = convert_color(lab, XYZColor)
hsl = convert_color(xyz, HSLColor, through_rgb_type=sRGBColor)
return hsl.get_value_tuple()
def hsl_distance(hsla, hslb):
return distance(hsl_to_lab(hsla[0], hsla[1], hsla[2]), hsl_to_lab(hslb[0], hslb[1], hslb[2]))
def hsl_interp(hsla, hslb, f=0.5):
laba = hsl_to_lab(hsla[0], hsla[1], hsla[2])
labb = hsl_to_lab(hslb[0], hslb[1], hslb[2])
return lab_to_hsl(laba[0]*f + labb[0]*(1-f), laba[1]*f + labb[1]*(1-f), laba[2]*f + labb[2]*(1-f))
def hsl_average(*hsls):
labs = average([hsl_to_lab(hsli[0], hsli[1], hsli[2]) for hsli in hsls])
return lab_to_hsl(laba[0]*f + labb[0]*(1-f), laba[1]*f + labb[1]*(1-f), laba[2]*f + labb[2]*(1-f))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment