Last active
June 21, 2017 17:07
-
-
Save keewon/c0062e60e00f5c3ac46112abeabe33eb to your computer and use it in GitHub Desktop.
Pubchem 2D structure json to SVG
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys, json, math | |
svg_width = 512 | |
svg_height = 512 | |
margin_x = svg_width / 10.0 | |
margin_y = svg_height / 10.0 | |
font_size = 16 | |
charge_stroke_width=2.0 | |
line_stroke_width = 3.0 | |
wedge_down_stroke_width = 3.0 | |
atom_stroke_width = 1.0 | |
wedge_down_count = 3 | |
atom_circle = True | |
atom_text = False | |
atom_aid = False | |
output_html = False | |
output_svg = True | |
D2 = 3 # for double bond | |
D3 = 5 # for triple bond | |
DW = 8 # wavy | |
def get_min_max(coord): | |
min_v = coord[0] | |
max_v = coord[0] | |
for v in coord: | |
if min_v > v: | |
min_v = v | |
if max_v < v: | |
max_v = v | |
return (min_v, max_v) | |
scale_factor = 1 | |
pan_x = 0 | |
pan_y = 0 | |
def cx(x): | |
return margin_x + (x + pan_x) * scale_factor | |
def cy(y): | |
return margin_y + (y + pan_y) * scale_factor | |
def element_to_text(element): | |
x = " H CNO" | |
if 1 <= element <= 8: | |
if x[element] != ' ': | |
return x[element] | |
if element == 16: | |
return "S" | |
return " " # TODO | |
def element_to_color(element): | |
if element == 1: | |
return 'lightgray' | |
if element == 6: | |
return 'black' | |
if element == 7: | |
return 'blue' | |
if element == 8: | |
return 'red' | |
if element == 16: | |
return 'yellow' | |
return 'black' | |
def aid_index_in_coords_aid(aid, coords_aid1): | |
l = len(coords_aid1) | |
for i in range(0, l): | |
if coords_aid1[i] == aid: | |
return i | |
return -1 | |
def get_multiple_line_coord(x1, y1, x2, y2, D): | |
l = math.sqrt( (x2-x1) * (x2-x1) + (y2-y1) * (y2-y1) ) | |
dx = -(y2-y1) * D / l | |
dy = (x2-x1) * D / l | |
return (dx, dy) | |
fn = sys.argv[1] | |
f = open(fn, 'rb') | |
j = json.loads(f.read()) | |
atoms = j['PC_Compounds'][0]['atoms'] | |
bonds = j['PC_Compounds'][0]['bonds'] | |
coords = j['PC_Compounds'][0]['coords'] | |
atoms_aid = atoms['aid'] # list of int, id of each atom | |
atoms_element = atoms['element'] # list of int, 1=H, 6=C, 7=N, 8=O, 16=S, 17=Cl | |
atoms_charge = {} | |
try: | |
atoms_charge1 = atoms['charge'] # list of { 'aid':int, 'value':+1/0/-1 } | |
for kv in atoms_charge1: | |
atoms_charge[ kv['aid'] ] = kv['value'] | |
except KeyError: | |
pass | |
bonds_aid1 = bonds['aid1'] | |
bonds_aid2 = bonds['aid2'] | |
bonds_order = bonds['order'] | |
coords_aid = coords[0]['aid'] | |
coords_conformers_x = coords[0]['conformers'][0]['x'] | |
coords_conformers_y = coords[0]['conformers'][0]['y'] | |
coords_style = [] | |
try: | |
coords_style = coords[0]['conformers'][0]['style'] | |
except KeyError: | |
pass | |
atoms_count = len(atoms_aid) | |
bonds_count = len(bonds_aid1) | |
# flip y | |
for i in range(0, len(coords_conformers_y)): | |
coords_conformers_y[i] = -coords_conformers_y[i] | |
(min_x, max_x) = get_min_max(coords_conformers_x) | |
(min_y, max_y) = get_min_max(coords_conformers_y) | |
pan_x = -min_x | |
pan_y = -min_y | |
if max_x - min_x > max_y - min_y: | |
diff = max_x - min_x | |
#scale_factor = (svg_width - margin_x * 2) / diff | |
scale_factor = 30 | |
pan_y = pan_y + ((svg_height - margin_y * 2) - (max_y - min_y) * scale_factor) / 2 / scale_factor | |
else: | |
diff = max_y - min_y | |
#scale_factor = (svg_height - margin_y * 2) / diff | |
scale_factor = 30 | |
pan_x = pan_x + ((svg_width - margin_x * 2) - (max_x - min_x) * scale_factor) / 2 / scale_factor | |
def get_element_by_aid(aid): | |
for i in range(0, atoms_count): | |
if atoms_aid[i] == aid: | |
return atoms_element[i] | |
return None | |
def get_bond_style(aid1, aid2): | |
if coords_style: | |
style_aid1 = coords_style['aid1'] | |
style_aid2 = coords_style['aid2'] | |
for si in range(0, len(style_aid1)): | |
if (style_aid1[si] == aid1 and style_aid2[si] == aid2): | |
style = coords_style['annotation'][si] | |
return (style, False) | |
if (style_aid1[si] == aid2 and style_aid2[si] == aid1): | |
style = coords_style['annotation'][si] | |
return (style, True) | |
return (None, False) | |
def aromatic_bond_classifier(x1, y1, x2, y2, aid1, aid2): | |
a1_neighbors = [] | |
a2_neighbors = [] | |
for i in range(0, bonds_count): | |
aid1_i = bonds_aid1[i] | |
aid2_i = bonds_aid2[i] | |
bond_style, _ = get_bond_style(aid1_i, aid2_i) | |
if bond_style == 8: | |
if aid1_i == aid1: | |
a1_neighbors.append(aid2_i) | |
elif aid2_i == aid1: | |
a1_neighbors.append(aid1_i) | |
elif aid1_i == aid2: | |
a2_neighbors.append(aid2_i) | |
elif aid2_i == aid2: | |
a2_neighbors.append(aid1_i) | |
if aid2 in a1_neighbors: | |
a1_neighbors.remove(aid2) | |
if aid1 in a2_neighbors: | |
a2_neighbors.remove(aid1) | |
for a in a1_neighbors[:]: | |
if get_element_by_aid(a) == 1: | |
a1_neighbors.remove(a) | |
for a in a2_neighbors[:]: | |
if get_element_by_aid(a) == 1: | |
a2_neighbors.remove(a) | |
result = 0 | |
for neighbors in [a1_neighbors, a2_neighbors]: | |
for a in neighbors: | |
i = aid_index_in_coords_aid(a, coords_aid) | |
x = coords_conformers_x[i] | |
y = coords_conformers_y[i] | |
v1 = (x2-x1, y2-y1) | |
v2 = (x-x1, y-y1) | |
t = v1[0] * v2[1] - v1[1] * v2[0] | |
if t > 0: | |
result = result + 1 | |
elif t < 0: | |
result = result - 1 | |
if result < 0: | |
return -1 | |
if result > 0: | |
return 1 | |
return result | |
def get_angle(x1, y1, x2, y2): | |
if y2 == y1 and x2 >= x1: | |
return 0 | |
if x1 == x2 and y2 > y1: | |
return math.pi / 2 | |
if y1 == y2 and x2 < x1: | |
return math.pi | |
if x1 == x2 and y2 < y1: | |
return math.pi * 3 / 2 | |
v1 = (1, 0) | |
v2 = (x2-x1, y2-y1) | |
cos_theta = ((v1[0] * v2[0] + v1[1] * v2[1]) / | |
(math.sqrt(v1[0] * v1[0] + v1[1] * v1[1]) * | |
math.sqrt(v2[0] * v2[0] + v2[1] * v2[1]))) | |
theta = math.acos(cos_theta) | |
cp = v1[0] * v2[1] - v1[1] * v2[0] | |
#print(theta, cp) | |
if cp > 0: | |
return theta | |
else: | |
return -theta | |
def get_best_pos_for_charge(aid): | |
i1 = aid_index_in_coords_aid(aid, coords_aid) | |
x1 = coords_conformers_x[i1] | |
y1 = coords_conformers_y[i1] | |
angles = [] | |
neighbors_xy = [] | |
for i in range(0, bonds_count): | |
aid1_i = bonds_aid1[i] | |
aid2_i = bonds_aid2[i] | |
a = None | |
if aid1_i == aid: | |
a = aid2_i | |
elif aid2_i == aid: | |
a = aid1_i | |
if a: | |
i2 = aid_index_in_coords_aid(a, coords_aid) | |
x2 = coords_conformers_x[i2] | |
y2 = coords_conformers_y[i2] | |
#print(aid, a, x1, y1, x2, y2) | |
angle = get_angle(x1, y1, x2, y2) | |
angles.append(angle) | |
#print(angles) | |
if len(angles) == 0: | |
return math.pi * 3 / 2 | |
elif len(angles) == 1: | |
angle1 = angles[0] | |
angle1 = angle1 + math.pi | |
if angle1 > math.pi * 2: | |
angle1 = angle1 - math.pi * 2 | |
#print('len=1', angle1) | |
return angle1 | |
else: | |
angles.sort() | |
angles.append(angles[0]) | |
max_angle_diff = 0 | |
max_angle_i = 0 | |
for i in range(0, len(angles)-1): | |
angle_diff = angles[i+1] - angles[i] | |
if angle_diff < 0: | |
angle_diff = angle_diff + math.pi * 2 | |
if angle_diff > max_angle_diff: | |
max_angle_diff = angle_diff | |
max_angle_i = i | |
angle1 = angles[max_angle_i] | |
angle2 = angles[max_angle_i+1] | |
diff = (angle2 - angle1) /2 | |
if diff < 0: | |
diff = diff + math.pi * 2 | |
if diff >= math.pi * 2: | |
diff = diff - math.pi * 2 | |
#print(diff, max_angle_i) | |
return angle1 + diff | |
result = [] | |
for i in range(0, bonds_count): | |
aid1 = bonds_aid1[i] | |
aid2 = bonds_aid2[i] | |
order = bonds_order[i] | |
i1 = aid_index_in_coords_aid(aid1, coords_aid) | |
i2 = aid_index_in_coords_aid(aid2, coords_aid) | |
x1 = coords_conformers_x[i1] | |
x2 = coords_conformers_x[i2] | |
y1 = coords_conformers_y[i1] | |
y2 = coords_conformers_y[i2] | |
style = None # 1:Cross, 3:Wavy, 5:Wedge-up, 6:Wedge-down, 8:Aromatic | |
exchange_coord = False | |
style, exchange_coord = get_bond_style(aid1, aid2) | |
if (order == 1 and (style == None or style == 8)) or order == 3: | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % ( | |
cx(x1), cy(y1), cx(x2), cy(y2), line_stroke_width | |
) | |
result.append(tag) | |
elif (order == 1 and style == 5): | |
cx1 = cx(x1) | |
cy1 = cy(y1) | |
cx2 = cx(x2) | |
cy2 = cy(y2) | |
if exchange_coord: | |
cx1, cy1, cx2, cy2 = cx2, cy2, cx1, cy1 | |
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, D3) | |
tag = """<path d="M %f %f L %f %f %f %f %f %f Z" stroke="black" stroke-width="%d" fill="black" stroke-linecap="round" />""" % ( | |
cx1, cy1, cx2+dx, cy2+dy, cx2-dx, cy2-dy, cx1, cy1, line_stroke_width | |
) | |
result.append(tag) | |
elif (order == 1 and style == 6): | |
cx1 = cx(x1) | |
cy1 = cy(y1) | |
cx2 = cx(x2) | |
cy2 = cy(y2) | |
if exchange_coord: | |
cx1, cy1, cx2, cy2 = cx2, cy2, cx1, cy1 | |
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, D3) | |
dlist = [] | |
for k in range(0, wedge_down_count): | |
cx3 = cx2 - (cx2 - cx1) * k / wedge_down_count | |
cy3 = cy2 - (cy2 - cy1) * k / wedge_down_count | |
dlist.append("M %f %f L %f %f" % ( | |
cx3-dx*(wedge_down_count-k)/wedge_down_count, | |
cy3-dy*(wedge_down_count-k)/wedge_down_count, | |
cx3+dx*(wedge_down_count-k)/wedge_down_count, | |
cy3+dy*(wedge_down_count-k)/wedge_down_count) | |
) | |
d = " ".join(dlist) | |
d = d | |
tag = """<path d="%s" stroke="black" stroke-width="%d" stroke-linecap="round" fill="black" />""" % (d, wedge_down_stroke_width) | |
result.append(tag) | |
elif (order == 1 and style == 3): | |
cx1 = cx(x1) | |
cy1 = cy(y1) | |
cx2 = cx(x2) | |
cy2 = cy(y2) | |
if exchange_coord: | |
cx1, cy1, cx2, cy2 = cx2, cy2, cx1, cy1 | |
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, DW) | |
dlist = [] | |
dlist.append("M %f %f" % (cx2, cy2)) | |
I = 16 | |
for k in range(1, I+1, 2): | |
cx3 = cx2 - (cx2 - cx1) * k / I | |
cy3 = cy2 - (cy2 - cy1) * k / I | |
cx4 = cx2 - (cx2 - cx1) * (k+1) / I | |
cy4 = cy2 - (cy2 - cy1) * (k+1) / I | |
if k == 1: | |
dlist.append("Q %f %f %f %f" % (cx3-dx/2, cy3-dy/2, cx4, cy4)) | |
elif k == I-1: | |
dlist.append("Q %f %f %f %f" % (cx3+dx/2, cy3+dy/2, cx4, cy4)) | |
elif k % 4 == 1: | |
dlist.append("Q %f %f %f %f" % (cx3-dx, cy3-dy, cx4, cy4)) | |
#dlist.append("M %f %f" % (cx4, cy4)) | |
else: | |
dlist.append("Q %f %f %f %f" % (cx3+dx, cy3+dy, cx4, cy4)) | |
#dlist.append("M %f %f" % (cx4, cy4)) | |
d = " ".join(dlist) | |
d = d | |
tag = """<path d="%s" stroke="black" stroke-width="%d" stroke-linecap="round" fill="none" />""" % (d, line_stroke_width) | |
result.append(tag) | |
elif (order == 1): | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d" />""" % ( | |
cx(x1), cy(y1), cx(x2), cy(y2), line_stroke_width | |
) | |
result.append(tag) | |
if order == 2: | |
cx1 = cx(x1) | |
cy1 = cy(y1) | |
cx2 = cx(x2) | |
cy2 = cy(y2) | |
(dx, dy) = get_multiple_line_coord(cx1, cy1, cx2, cy2, D2) | |
x11 = cx1 + dx | |
x12 = cx1 - dx | |
x21 = cx2 + dx | |
x22 = cx2 - dx | |
y11 = cy1 + dy | |
y12 = cy1 - dy | |
y21 = cy2 + dy | |
y22 = cy2 - dy | |
if style == 8: | |
ac = aromatic_bond_classifier(x1, y1, x2, y2, aid1, aid2) | |
color1 = 'black' | |
color2 = 'black' | |
if ac == 1: | |
#1 is shorter | |
x11 = cx1 + 2 * dx | |
x12 = cx1 | |
x21 = cx2 + 2 * dx | |
x22 = cx2 | |
y11 = cy1 + 2 * dy | |
y12 = cy1 | |
y21 = cy2 + 2 * dy | |
y22 = cy2 | |
x11 = x11 + (x21-x11) * 0.2 | |
y11 = y11 + (y21-y11) * 0.2 | |
x21 = x21 - (x21-x11) * (2.0/8.0) | |
y21 = y21 - (y21-y11) * (2.0/8.0) | |
elif ac == -1: | |
#2 is shorter | |
x11 = cx1 | |
x12 = cx1 - 2 * dx | |
x21 = cx2 | |
x22 = cx2 - 2 * dx | |
y11 = cy1 | |
y12 = cy1 - 2 * dy | |
y21 = cy2 | |
y22 = cy2 - 2 * dy | |
x12 = x12 + (x22-x12) * 0.2 | |
y12 = y12 + (y22-y12) * 0.2 | |
x22 = x22 - (x22-x12) * (2.0/8.0) | |
y22 = y22 - (y22-y12) * (2.0/8.0) | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:%s;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x11), (y11), (x21), (y21), color1, line_stroke_width | |
) | |
result.append(tag) | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:%s;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x12), (y12), (x22), (y22), color2, line_stroke_width | |
) | |
result.append(tag) | |
elif style == 1: | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x11), (y11), (x22), (y22), line_stroke_width | |
) | |
result.append(tag) | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x12), (y12), (x21), (y21), line_stroke_width | |
) | |
result.append(tag) | |
else: | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x11), (y11), (x21), (y21), line_stroke_width | |
) | |
result.append(tag) | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x12), (y12), (x22), (y22), line_stroke_width | |
) | |
result.append(tag) | |
if order == 3: | |
(dx, dy) = get_multiple_line_coord(x1, y1, x2, y2, D3) | |
x11 = x1 + dx | |
x12 = x1 - dx | |
x21 = x2 + dx | |
x22 = x2 - dx | |
y11 = y1 + dy | |
y12 = y1 - dy | |
y21 = y2 + dy | |
y22 = y2 - dy | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x11), (y11), (x21), (y21), line_stroke_width | |
) | |
result.append(tag) | |
tag = """<line x1="%f" y1="%f" x2="%f" y2="%f" style="stroke:black;stroke-width:%d;stroke-linecap=round" />""" % ( | |
(x12), (y12), (x22), (y22), line_stroke_width | |
) | |
result.append(tag) | |
for i in range(0, atoms_count): | |
aid = atoms_aid[i] | |
element = atoms_element[i] | |
charge = None | |
if aid in atoms_charge: | |
charge = atoms_charge[aid] | |
stroke_color = fill_color = element_to_color(element) | |
i2 = aid_index_in_coords_aid(aid, coords_aid) | |
x = coords_conformers_x[i2] | |
y = coords_conformers_y[i2] | |
if atom_circle: | |
tag = """<circle cx="%f" cy="%f" r="5" stroke="%s" stroke-width="%d" fill="%s" />""" % ( | |
cx(x), cy(y), stroke_color, | |
atom_stroke_width, | |
fill_color | |
) | |
result.append(tag) | |
# charge | |
if charge != None and charge != 0: | |
charge_text = '+' | |
if charge < 0: | |
charge_text = '-' | |
angle = get_best_pos_for_charge(aid) | |
dx = math.cos(angle) * font_size / 1.2 | |
dy = math.sin(angle) * font_size / 1.2 | |
#tag = """<!-- aid=%d angle=%f dx=%f dy=%f -->""" % (aid, angle, dx, dy) | |
#result.append(tag) | |
#tag = """<text x="%f" y="%f" fill="%s">%s</text>""" % ( | |
# cx(x) - font_size/4 + dx, cy(y) + font_size/3 + dy, 'black', charge_text | |
# ) | |
if charge_text == '+': | |
tag = """<path d="M %f %f L %f %f M %f %f L %f %f" stroke="black" stroke-width="%d" />""" % ( | |
cx(x) + dx - font_size / 3, cy(y) + dy, | |
cx(x) + dx + font_size / 3, cy(y) + dy, | |
cx(x) + dx, cy(y) + dy - font_size / 3, | |
cx(x) + dx, cy(y) + dy+ font_size / 3, | |
charge_stroke_width | |
) | |
elif charge_text == '-': | |
tag = """<path d="M %f %f L %f %f" stroke="black" stroke-width="%d" />""" % ( | |
cx(x) + dx - font_size / 3, cy(y) + dy, | |
cx(x) + dx + font_size / 3, cy(y) + dy, | |
charge_stroke_width | |
) | |
result.append(tag) | |
# atom | |
if atom_text: | |
tag = """<text x="%f" y="%f" fill="%s">%s</text>""" % ( | |
cx(x) - font_size/4, cy(y) + font_size/3, stroke_color, element_to_text(element) | |
) | |
result.append(tag) | |
if atom_aid: | |
tag = """<text x="%f" y="%f" fill="%s">%s</text>""" % ( | |
cx(x) - font_size/4, cy(y) + font_size/3, stroke_color, aid | |
) | |
result.append(tag) | |
svg = "\n".join(result) | |
if output_html: | |
form = """<!DOCTYPE html> | |
<html> | |
<head> | |
<style type="text/css"> | |
<!-- | |
body { | |
font-family:Monaco; | |
font-size:%dpx; | |
} | |
//--> | |
</style> | |
</head> | |
<body> | |
<svg width="512" height="512"> | |
%s | |
</svg> | |
</body> | |
</html>""" | |
print(form % (font_size, svg)) | |
elif output_svg: | |
form = """<?xml version="1.0" encoding="UTF-8" ?> | |
<svg xmlns="http://www.w3.org/2000/svg" version="1.1"> | |
%s | |
</svg>""" | |
print(form % (svg)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment