Created
November 16, 2022 14:45
-
-
Save sneumann/6c814c5357bb35a948cd8e3c8b57fca1 to your computer and use it in GitHub Desktop.
Import Metadata from Chemotion to FIZ-OAI provider
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## 1) Get paginated list of Molecules from Chemotion | |
## 2) extract DOI and internal chemotion ID for each molecule with `jq` | |
## 3) Query each molecule and convert with `jq` to (bio)schemas | |
## 4) Embedd the JSON in a CDATA in a minimalistic XML | |
## 5) Pipe the XML to the FIZ-OAI backend running at $BACKEND, e.g. localhost | |
export BACKEND=http://localhost:8081/oai-backend | |
for I in 1 2 3 4 5 6 7 8 9 10; do | |
wget -O- "https://www.chemotion-repository.net/api/v1/public/molecules.json?page=$I&per_page=100" |\ | |
jq --raw-output '.molecules | map([.id , .tag.taggable_data.chemotion.doi] | join("\t") ) | join("\n")' |\ | |
while read ID DOI ; do | |
( | |
cat <<EOF | |
<json xmlns="http://denbi.de/schemas/json-container"> | |
<![CDATA[ | |
EOF | |
wget -q -O- https://www.chemotion-repository.net/api/v1/public/molecule.json?id=$ID |\ | |
jq "$(cat <<EOF | |
{ "@context": "https://schema.org/", | |
"@type": "MolecularEntity", | |
"dct:conformsTo": { "http://purl.org/dc/terms/conformsTo": { | |
"@id": "https://bioschemas.org/profiles/MolecularEntity/0.5-RELEASE", | |
"@type": "CreativeWork" } | |
}, | |
"@id": .molecule.tag.taggable_data.chemotion.doi, | |
name: .molecule.iupac_name, | |
url: "http://chemotion-repository.net/home/publications/molecules/\(.molecule.id)", | |
identifier: "chemotion ID ?", | |
iupacName:.molecule.iupac_name, | |
smiles: .molecule.cano_smiles, | |
inChI: .molecule.inchistring, | |
inChIKey: .molecule.inchikey, | |
molecularFormula: .molecule.sum_formular, | |
molecularWeight: { value: .molecule.molecular_weight } | |
} | |
EOF | |
)" | |
cat <<EOF | |
]]> | |
</json> | |
EOF | |
) | curl -v -X POST -H 'Content-Type: multipart/form-data' \ | |
-i "$BACKEND/item" \ | |
-F "item={\"identifier\":\"$DOI\",\"deleteFlag\":false,\"ingestFormat\":\"json_container\"};type=application/json" \ | |
-F content=@- ;\ | |
done # while read | |
done # for |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment