Skip to content

Instantly share code, notes, and snippets.

@keewon
Last active June 21, 2017 17:07
Show Gist options
  • Save keewon/c0062e60e00f5c3ac46112abeabe33eb to your computer and use it in GitHub Desktop.
Save keewon/c0062e60e00f5c3ac46112abeabe33eb to your computer and use it in GitHub Desktop.
Pubchem 2D structure json to SVG
import os, sys, json, math
svg_width = 512
svg_height = 512
margin_x = svg_width / 10.0
margin_y = svg_height / 10.0
font_size = 16
charge_stroke_width=2.0
line_stroke_width = 3.0
wedge_down_stroke_width = 3.0
atom_stroke_width = 1.0
wedge_down_count = 3
atom_circle = True
atom_text = False
atom_aid = False
output_html = False
output_svg = True
D2 = 3 # for double bond
D3 = 5 # for triple bond
DW = 8 # wavy
def get_min_max(coord):
min_v = coord[0]
max_v = coord[0]
for v in coord:
if min_v > v:
min_v = v
if max_v < v:
max_v = v
return (min_v, max_v)
scale_factor = 1
pan_x = 0
pan_y = 0
def cx(x):
return margin_x + (x + pan_x) * scale_factor
def cy(y):
return margin_y + (y + pan_y) * scale_factor
def element_to_text(element):
x = " H CNO"
if 1 <= element <= 8:
if x[element] != ' ':
return x[element]
if element == 16:
return "S"
return " " # TODO
def element_to_color(element):
if element == 1:
return 'lightgray'
if element == 6:
return 'black'
if element == 7:
return 'blue'
if element == 8:
return 'red'
if element == 16:
return 'yellow'
return 'black'
def aid_index_in_coords_aid(aid, coords_aid1):
l = len(coords_aid1)
for i in range(0, l):
if coords_aid1[i] == aid:
return i
return -1
def get_multiple_line_coord(x1, y1, x2, y2, D):
l = math.sqrt( (x2-x1) * (x2-x1) + (y2-y1) * (y2-y1) )
dx = -(y2-y1) * D / l
dy = (x2-x1) * D / l
return (dx, dy)
fn = sys.argv[1]
f = open(fn, 'rb')
j = json.loads(f.read())
atoms = j['PC_Compounds'][0]['atoms']
bonds = j['PC_Compounds'][0]['bonds']
coords = j['PC_Compounds'][0]['coords']
atoms_aid = atoms['aid'] # list of int, id of each atom
atoms_element = atoms['element'] # list of int, 1=H, 6=C, 7=N, 8=O, 16=S, 17=Cl
atoms_charge = {}
try:
atoms_charge1 = atoms['charge'] # list of { 'aid':int, 'value':+1/0/-1 }
for kv in atoms_charge1:
atoms_charge[ kv['aid'] ] = kv['value']
except KeyError:
pass
bonds_aid1 = bonds['aid1']
bonds_aid2 = bonds['aid2']
bonds_order = bonds['order']
coords_aid = coords[0]['aid']
coords_conformers_x = coords[0]['conformers'][0]['x']
coords_conformers_y = coords[0]['conformers'][0]['y']
coords_style = []
try:
coords_style = coords[0]['conformers'][0]['style']
except KeyError:
pass
atoms_count = len(atoms_aid)
bonds_count = len(bonds_aid1)
# flip y
for i in range(0, len(coords_conformers_y)):
coords_conformers_y[i] = -coords_conformers_y[i]
(min_x, max_x) = get_min_max(coords_conformers_x)
(min_y, max_y) = get_min_max(coords_conformers_y)
pan_x = -min_x
pan_y = -min_y
if max_x - min_x > max_y - min_y:
diff = max_x - min_x
#scale_factor = (svg_width - margin_x * 2) / diff
scale_factor = 30
pan_y = pan_y + ((svg_height - margin_y * 2) - (max_y - min_y) * scale_factor) / 2 / scale_factor
else:
diff = max_y - min_y
#scale_factor = (svg_height - margin_y * 2) / diff
scale_factor = 30
pan_x = pan_x + ((svg_width - margin_x * 2) - (max_x - min_x) * scale_factor) / 2 / scale_factor
def get_element_by_aid(aid):
for i in range(0, atoms_count):
if atoms_aid[i] == aid:
return atoms_element[i]
return None
def get_bond_style(aid1, aid2):
if coords_style:
style_aid1 = coords_style['aid1']
style_aid2 = coords_style['aid2']
for si in range(0, len(style_aid1)):
if (style_aid1[si] == aid1 and style_aid2[si] == aid2):
style = coords_style['annotation'][si]
return (style, False)
if (style_aid1[si] == aid2 and style_aid2[si] == aid1):
style = coords_style['annotation'][si]
return (style, True)
return (None, False)
def aromatic_bond_classifier(x1, y1, x2, y2, aid1, aid2):
a1_neighbors = []
a2_neighbors = []
for i in range(0, bonds_count):
aid1_i = bonds_aid1[i]
aid2_i = bonds_aid2[i]
bond_style, _ = get_bond_style(aid1_i, aid2_i)
if bond_style == 8:
if aid1_i == aid1:
a1_neighbors.append(aid2_i)
elif aid2_i == aid1:
a1_neighbors.append(aid1_i)
elif aid1_i == aid2:
a2_neighbors.append(aid2_i)
elif aid2_i == aid2:
a2_neighbors.append(aid1_i)
if aid2 in a1_neighbors:
a1_neighbors.remove(aid2)
if aid1 in a2_neighbors:
a2_neighbors.remove(aid1)
for a in a1_neighbors[:]:
if get_element_by_aid(a) == 1:
a1_neighbors.remove(a)
for a in a2_neighbors[:]:
if get_element_by_aid(a) == 1:
a2_neighbors.remove(a)
result = 0
for neighbors in [a1_neighbors, a2_neighbors]:
for a in neighbors:
i = aid_index_in_coords_aid(a, coords_aid)
x = coords_conformers_x[i]
y = coords_conformers_y[i]
v1 = (x2-x1, y2-y1)
v2 = (x-x1, y-y1)
t = v1[0] * v2[1] - v1[1] * v2[0]
if t > 0:
result = result + 1
elif t < 0:
result = result - 1
if result < 0:
return -1
if result > 0:
return 1
return result
def get_angle(x1, y1, x2, y2):
if y2 == y1 and x2 >= x1:
return 0
if x1 == x2 and y2 > y1:
return math.pi / 2
if y1 == y2 and x2 < x1:
return math.pi
if x1 == x2 and y2 < y1:
return math.pi * 3 / 2
v1 = (1, 0)
v2 = (x2-x1, y2-y1)
cos_theta = ((v1[0] * v2[0] + v1[1] * v2[1]) /
(math.sqrt(v1[0] * v1[0] + v1[1] * v1[1]) *
math.sqrt(v2[0] * v2[0] + v2[1] * v2[1])))
theta = math.acos(cos_theta)
cp = v1[0] * v2[1] - v1[1] * v2[0]
#print(theta, cp)
if cp > 0:
return theta
else:
return -theta
def get_best_pos_for_charge(aid):
i1 = aid_index_in_coords_aid(aid, coords_aid)
x1 = coords_conformers_x[i1]
y1 = coords_conformers_y[i1]
angles = []
neighbors_xy = []
for i in range(0, bonds_count):
aid1_i = bonds_aid1[i]
aid2_i = bonds_aid2[i]
a = None
if aid1_i == aid:
a = aid2_i
elif aid2_i == aid:
a = aid1_i
if a:
i2 = aid_index_in_coords_aid(a, coords_aid)
x2 = coords_conformers_x[i2]
y2 = coords_conformers_y[i2]
#print(aid, a, x1, y1, x2, y2)
angle = get_angle(x1, y1, x2, y2)
angles.append(angle)
#print(angles)
if len(angles) == 0:
return math.pi * 3 / 2
elif len(angles) == 1:
angle1 = angles[0]
angle1 = angle1 + math.pi
if angle1 > math.pi * 2:
angle1 = angle1 - math.pi * 2
#print('len=1', angle1)
return angle1
else:
angles.sort()
angles.append(angles[0])
max_angle_diff = 0
max_angle_i = 0
for i in range(0, len(angles)-1):
angle_diff = angles[i+1] - angles[i]
if angle_diff < 0:
angle_diff = angle_diff + math.pi * 2
if angle_diff > max_angle_diff:
max_angle_diff = angle_diff
max_angle_i = i
angle1 = angles[max_angle_i]
angle2 = angles[max_angle_i+1]
diff = (angle2 - angle1) /2
if diff < 0:
diff = diff + math.pi * 2
if diff >= math.pi * 2:
diff = diff - math.pi * 2
#print(diff, max_angle_i)
return angle1 + diff
result = []
for i in range(0, bonds_count):
aid1 = bonds_aid1[i]
aid2 = bonds_aid2[i]
order = bonds_order[i]
i1 = aid_index_in_coords_aid(aid1, coords_aid)
i2 = aid_index_in_coords_aid(aid2, coords_aid)
x1 = coords_conformers_x[i1]
x2 = coords_conformers_x[i2]
y1 = coords_conformers_y[i1]
y2 = coords_conformers_y[i2]
style = None # 1:Cross, 3:Wavy, 5:Wedge-up, 6:Wedge-down, 8:Aromatic
exchange_coord = False
style, exchange_coord = get_bond_style(aid1, aid2)
if (order == 1 and (style == None or style == 8)) or order == 3:
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % (
cx(x1), cy(y1), cx(x2), cy(y2), line_stroke_width
)
result.append(tag)
elif (order == 1 and style == 5):
cx1 = cx(x1)
cy1 = cy(y1)
cx2 = cx(x2)
cy2 = cy(y2)
if exchange_coord:
cx1, cy1, cx2, cy2 = cx2, cy2, cx1, cy1
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, D3)
tag = """<path d="M %f %f L %f %f %f %f %f %f Z" stroke="black" stroke-width="%d" fill="black" stroke-linecap="round" />""" % (
cx1, cy1, cx2+dx, cy2+dy, cx2-dx, cy2-dy, cx1, cy1, line_stroke_width
)
result.append(tag)
elif (order == 1 and style == 6):
cx1 = cx(x1)
cy1 = cy(y1)
cx2 = cx(x2)
cy2 = cy(y2)
if exchange_coord:
cx1, cy1, cx2, cy2 = cx2, cy2, cx1, cy1
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, D3)
dlist = []
for k in range(0, wedge_down_count):
cx3 = cx2 - (cx2 - cx1) * k / wedge_down_count
cy3 = cy2 - (cy2 - cy1) * k / wedge_down_count
dlist.append("M %f %f L %f %f" % (
cx3-dx*(wedge_down_count-k)/wedge_down_count,
cy3-dy*(wedge_down_count-k)/wedge_down_count,
cx3+dx*(wedge_down_count-k)/wedge_down_count,
cy3+dy*(wedge_down_count-k)/wedge_down_count)
)
d = " ".join(dlist)
d = d
tag = """<path d="%s" stroke="black" stroke-width="%d" stroke-linecap="round" fill="black" />""" % (d, wedge_down_stroke_width)
result.append(tag)
elif (order == 1 and style == 3):
cx1 = cx(x1)
cy1 = cy(y1)
cx2 = cx(x2)
cy2 = cy(y2)
if exchange_coord:
cx1, cy1, cx2, cy2 = cx2, cy2, cx1, cy1
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, DW)
dlist = []
dlist.append("M %f %f" % (cx2, cy2))
I = 16
for k in range(1, I+1, 2):
cx3 = cx2 - (cx2 - cx1) * k / I
cy3 = cy2 - (cy2 - cy1) * k / I
cx4 = cx2 - (cx2 - cx1) * (k+1) / I
cy4 = cy2 - (cy2 - cy1) * (k+1) / I
if k == 1:
dlist.append("Q %f %f %f %f" % (cx3-dx/2, cy3-dy/2, cx4, cy4))
elif k == I-1:
dlist.append("Q %f %f %f %f" % (cx3+dx/2, cy3+dy/2, cx4, cy4))
elif k % 4 == 1:
dlist.append("Q %f %f %f %f" % (cx3-dx, cy3-dy, cx4, cy4))
#dlist.append("M %f %f" % (cx4, cy4))
else:
dlist.append("Q %f %f %f %f" % (cx3+dx, cy3+dy, cx4, cy4))
#dlist.append("M %f %f" % (cx4, cy4))
d = " ".join(dlist)
d = d
tag = """<path d="%s" stroke="black" stroke-width="%d" stroke-linecap="round" fill="none" />""" % (d, line_stroke_width)
result.append(tag)
elif (order == 1):
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d" />""" % (
cx(x1), cy(y1), cx(x2), cy(y2), line_stroke_width
)
result.append(tag)
if order == 2:
cx1 = cx(x1)
cy1 = cy(y1)
cx2 = cx(x2)
cy2 = cy(y2)
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, D2)
x11 = cx1 + dx
x12 = cx1 - dx
x21 = cx2 + dx
x22 = cx2 - dx
y11 = cy1 + dy
y12 = cy1 - dy
y21 = cy2 + dy
y22 = cy2 - dy
if style == 8:
ac = aromatic_bond_classifier(x1, y1, x2, y2, aid1, aid2)
color1 = 'black'
color2 = 'black'
if ac == 1:
#1 is shorter
x11 = cx1 + 2 * dx
x12 = cx1
x21 = cx2 + 2 * dx
x22 = cx2
y11 = cy1 + 2 * dy
y12 = cy1
y21 = cy2 + 2 * dy
y22 = cy2
x11 = x11 + (x21-x11) * 0.2
y11 = y11 + (y21-y11) * 0.2
x21 = x21 - (x21-x11) * (2.0/8.0)
y21 = y21 - (y21-y11) * (2.0/8.0)
elif ac == -1:
#2 is shorter
x11 = cx1
x12 = cx1 - 2 * dx
x21 = cx2
x22 = cx2 - 2 * dx
y11 = cy1
y12 = cy1 - 2 * dy
y21 = cy2
y22 = cy2 - 2 * dy
x12 = x12 + (x22-x12) * 0.2
y12 = y12 + (y22-y12) * 0.2
x22 = x22 - (x22-x12) * (2.0/8.0)
y22 = y22 - (y22-y12) * (2.0/8.0)
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:%s;stroke-width:%d;stroke-linecap=round" />""" % (
(x11), (y11), (x21), (y21), color1, line_stroke_width
)
result.append(tag)
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:%s;stroke-width:%d;stroke-linecap=round" />""" % (
(x12), (y12), (x22), (y22), color2, line_stroke_width
)
result.append(tag)
elif style == 1:
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % (
(x11), (y11), (x22), (y22), line_stroke_width
)
result.append(tag)
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % (
(x12), (y12), (x21), (y21), line_stroke_width
)
result.append(tag)
else:
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % (
(x11), (y11), (x21), (y21), line_stroke_width
)
result.append(tag)
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % (
(x12), (y12), (x22), (y22), line_stroke_width
)
result.append(tag)
if order == 3:
(dx, dy) = get_multiple_line_coord(x1, y1, x2, y2, D3)
x11 = x1 + dx
x12 = x1 - dx
x21 = x2 + dx
x22 = x2 - dx
y11 = y1 + dy
y12 = y1 - dy
y21 = y2 + dy
y22 = y2 - dy
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % (
(x11), (y11), (x21), (y21), line_stroke_width
)
result.append(tag)
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % (
(x12), (y12), (x22), (y22), line_stroke_width
)
result.append(tag)
for i in range(0, atoms_count):
aid = atoms_aid[i]
element = atoms_element[i]
charge = None
if aid in atoms_charge:
charge = atoms_charge[aid]
stroke_color = fill_color = element_to_color(element)
i2 = aid_index_in_coords_aid(aid, coords_aid)
x = coords_conformers_x[i2]
y = coords_conformers_y[i2]
if atom_circle:
tag = """<circle cx="%f" cy="%f" r="5" stroke="%s" stroke-width="%d" fill="%s" />""" % (
cx(x), cy(y), stroke_color,
atom_stroke_width,
fill_color
)
result.append(tag)
# charge
if charge != None and charge != 0:
charge_text = '+'
if charge < 0:
charge_text = '-'
angle = get_best_pos_for_charge(aid)
dx = math.cos(angle) * font_size / 1.2
dy = math.sin(angle) * font_size / 1.2
#tag = """<!-- aid=%d angle=%f dx=%f dy=%f -->""" % (aid, angle, dx, dy)
#result.append(tag)
#tag = """<text x="%f" y="%f" fill="%s">%s</text>""" % (
# cx(x) - font_size/4 + dx, cy(y) + font_size/3 + dy, 'black', charge_text
# )
if charge_text == '+':
tag = """<path d="M %f %f L %f %f M %f %f L %f %f" stroke="black" stroke-width="%d" />""" % (
cx(x) + dx - font_size / 3, cy(y) + dy,
cx(x) + dx + font_size / 3, cy(y) + dy,
cx(x) + dx, cy(y) + dy - font_size / 3,
cx(x) + dx, cy(y) + dy+ font_size / 3,
charge_stroke_width
)
elif charge_text == '-':
tag = """<path d="M %f %f L %f %f" stroke="black" stroke-width="%d" />""" % (
cx(x) + dx - font_size / 3, cy(y) + dy,
cx(x) + dx + font_size / 3, cy(y) + dy,
charge_stroke_width
)
result.append(tag)
# atom
if atom_text:
tag = """<text x="%f" y="%f" fill="%s">%s</text>""" % (
cx(x) - font_size/4, cy(y) + font_size/3, stroke_color, element_to_text(element)
)
result.append(tag)
if atom_aid:
tag = """<text x="%f" y="%f" fill="%s">%s</text>""" % (
cx(x) - font_size/4, cy(y) + font_size/3, stroke_color, aid
)
result.append(tag)
svg = "\n".join(result)
if output_html:
form = """<!DOCTYPE html>
<html>
<head>
<style type="text/css">
<!--
body {
font-family:Monaco;
font-size:%dpx;
}
//-->
</style>
</head>
<body>
<svg width="512" height="512">
%s
</svg>
</body>
</html>"""
print(form % (font_size, svg))
elif output_svg:
form = """<?xml version="1.0" encoding="UTF-8" ?>
<svg xmlns="http://www.w3.org/2000/svg" version="1.1">
%s
</svg>"""
print(form % (svg))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment