Created
July 21, 2021 15:38
-
-
Save Luthaf/0798757483390aae0bb73e245604eeb9 to your computer and use it in GitHub Desktop.
Transform chemiscope input with NaN to the new sparse environenments format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip | |
import json | |
import numpy as np | |
input_path = "./app/examples/Zeolites.json.gz" | |
output_path = "Zeolites.json.gz" | |
with gzip.open(input_path) as fd: | |
data = json.load(fd) | |
atom_properties = [ | |
name | |
for name, property in data["properties"].items() | |
if property["target"] == "atom" | |
] | |
# only take the non-NaN entries | |
valid = np.isfinite(data["properties"][atom_properties[0]]["values"]) | |
environments = [] | |
for i, environment in enumerate(data["environments"]): | |
if valid[i]: | |
environments.append(environment) | |
old_environments = data["environments"] | |
data["environments"] = environments | |
if "settings" in data and "pinned" in data["settings"]: | |
cleaned_pinned = [] | |
for pinned in data["settings"]["pinned"]: | |
structure = old_environments[pinned]["structure"] | |
center = old_environments[pinned]["center"] | |
for new_index, env in enumerate(environments): | |
if env["structure"] == structure and env["center"] == center: | |
cleaned_pinned.append(new_index) | |
data["settings"]["pinned"] = cleaned_pinned | |
for name, property in data["properties"].items(): | |
if name in atom_properties: | |
values = np.array(property["values"])[valid] | |
property["values"] = values.tolist() | |
with gzip.open(output_path, "w") as fd: | |
data = json.dumps(data) | |
fd.write(data.encode("utf8")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment