Last active
April 26, 2023 22:24
-
-
Save timdecode/a00e2ee1e5fd9f643d7656dee51a9baa to your computer and use it in GitHub Desktop.
cellPACK C++ Parser (for an Objective-C project, hence the Objective-C++ code)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// (Objective-C header) | |
// CellPackParser.h | |
// Frameworks | |
// | |
// Created by Timothy Davison on 2022-05-11. | |
// | |
#pragma once | |
#import <Foundation/Foundation.h> | |
#import <simd/simd.h> | |
NS_ASSUME_NONNULL_BEGIN | |
typedef NS_ENUM(NSUInteger, CellPackType) { | |
lipid, | |
protein | |
}; | |
@interface CellPackAssembly: NSObject | |
@property NSString * name; | |
@property NSString * entityName; | |
@property NSString * entityDescription; | |
@property CellPackType type; | |
- (instancetype) initWithName: (NSString*) name; | |
- (simd_float4*) atoms; | |
- (size_t) atomsCount; | |
- (simd_float4x4*) instanceTransforms; | |
- (size_t) instanceTransformsCount; | |
@end | |
@interface CellPackParser: NSObject | |
- (instancetype) initWithCIFPath:(NSString*) path; | |
- (NSArray<CellPackAssembly*>*) assemblies; | |
@end | |
NS_ASSUME_NONNULL_END | |
// | |
// CellPackParser.mm | |
// PDBParser | |
// | |
// Created by Timothy Davison on 2022-05-11. | |
// | |
#import <Foundation/Foundation.h> | |
#import "CellPackParser.h" | |
#include <gemmi/cif.hpp> | |
#include <gemmi/numb.hpp> // for as_number | |
#include <gemmi/mmread.hpp> | |
#include <gemmi/gz.hpp> | |
#include <gemmi/select.hpp> | |
#include <vector> | |
#include <fstream> | |
#include <map> | |
#include <optional> | |
namespace cif = gemmi::cif; | |
@interface CellPackAssembly () | |
{ | |
std::vector<simd_float4> _atoms; | |
std::vector<simd_float4x4> _instanceTransforms; | |
} | |
@property std::vector<simd_float4> cppAtoms; | |
@property std::vector<simd_float4x4> cppTransforms; | |
@end | |
@implementation CellPackAssembly | |
- (instancetype) initWithName:(NSString *)name { | |
if( self = [super init] ) { | |
self.name = name; | |
self.entityName = @""; | |
self.entityDescription = @""; | |
self.type = CellPackType::protein; | |
} | |
return self; | |
} | |
- (simd_float4*) atoms { | |
return &_cppAtoms[0]; | |
} | |
- (size_t) atomsCount { | |
return _cppAtoms.size(); | |
} | |
- (simd_float4x4*) instanceTransforms { | |
return &_cppTransforms[0]; | |
} | |
- (size_t) instanceTransformsCount { | |
return _cppTransforms.size(); | |
} | |
@end | |
@interface CellPackParser () | |
+ (NSString*) fixupForGemmi:(NSString*) path; | |
@end; | |
@implementation CellPackParser | |
{ | |
cif::Document doc; | |
gemmi::Structure structure; | |
} | |
- (instancetype)initWithCIFPath:(NSString*) path { | |
self = [super init]; | |
NSString * fixedFile = [CellPackParser fixupForGemmi:path]; | |
NSLog(@"%@", fixedFile); | |
std::string stdPath = std::string([fixedFile UTF8String]); | |
doc = cif::read_file(stdPath); | |
structure = gemmi::make_structure(doc); | |
const auto selector = gemmi::Selection(); | |
return self; | |
} | |
+ (NSString*) fixupForGemmi:(NSString*)path { | |
/* | |
loop_ | |
_atom_site.group_PDB | |
_atom_site.id | |
_atom_site.type_symbol | |
_atom_site.label_atom_id | |
_atom_site.label_alt_id | |
_atom_site.label_comp_id | |
_atom_site.label_asym_id | |
_atom_site.label_entity_id | |
_atom_site.label_seq_id | |
_atom_site.pdbx_PDB_ins_code | |
_atom_site.Cartn_x | |
_atom_site.Cartn_y | |
_atom_site.Cartn_z | |
_atom_site.occupancy | |
_atom_site.B_iso_or_equiv | |
_atom_site.pdbx_formal_charge | |
_atom_site.auth_seq_id | |
_atom_site.auth_comp_id | |
_atom_site.auth_asym_id | |
_atom_site.auth_atom_id | |
_atom_site.pdbx_PDB_model_num | |
comp_id seq_id | |
| asym_id | comp_id | |
| | entity_id | | asym_id | |
| | | sed_id | | | | |
| | | | | | | | |
ATOM 1 N N . GLN NG 683 . . -11.289 60.909 45.412 1.00 0.00 . GLN NG 683 N 1 | |
| | | |
Should be: | | | |
ATOM 1 N N . GLN NG 683 . . -11.289 60.909 45.412 1.00 0.00 . . GLN NG N 1 | |
We need to rewrite: | |
auth_seq_id to . | |
auth_comp_id to label_comp_id | |
auth_asym_id to label_asym_id | |
*/ | |
enum class atomSiteKeys: int { | |
label_comp_id = 0, | |
label_asym_id, | |
auth_seq_id, | |
auth_comp_id, | |
auth_asym_id, | |
count | |
}; | |
std::array<int, size_t(atomSiteKeys::count)> atomSiteIndices; | |
std::string stdPath = std::string([path UTF8String]); | |
std::ifstream infile(stdPath); | |
std::string line; | |
// search for | |
// loop_ | |
// _atom_site | |
bool inAtomSite = false; | |
int column = 0; | |
NSUUID * uuid = [NSUUID UUID]; | |
NSString * outPath = [NSTemporaryDirectory() stringByAppendingPathComponent: uuid.description]; | |
std::string stdOutPath = std::string([outPath UTF8String]); | |
std::ofstream outfile(stdOutPath); | |
std::vector<std::string> lineTokens; | |
while( std::getline(infile, line) ) { | |
if( line.rfind("_atom_site") == 0 && !inAtomSite ) { | |
inAtomSite = true; | |
column = 0; | |
} | |
if( line.rfind("_atom_site") != 0 ) { | |
inAtomSite = false; | |
} | |
// Rewrite atoms | |
if( inAtomSite ) { | |
if( line.rfind("_atom_site.label_comp_id") == 0 ) { | |
atomSiteIndices[int(atomSiteKeys::label_comp_id)] = column; | |
} | |
else if( line.rfind("_atom_site.label_asym_id") == 0 ) { | |
atomSiteIndices[int(atomSiteKeys::label_asym_id)] = column; | |
} | |
else if( line.rfind("_atom_site.auth_seq_id") == 0 ) { | |
atomSiteIndices[int(atomSiteKeys::auth_seq_id)] = column; | |
} | |
else if( line.rfind("_atom_site.auth_comp_id") == 0 ) { | |
atomSiteIndices[int(atomSiteKeys::auth_comp_id)] = column; | |
} | |
else if( line.rfind("_atom_site.auth_asym_id") == 0 ) { | |
atomSiteIndices[int(atomSiteKeys::auth_asym_id)] = column; | |
} | |
column += 1; | |
} | |
// Rewrite the atoms | |
if( line.rfind("ATOM") == 0 ) { | |
// tokenize and rewrite the line | |
{ | |
auto lineStream = std::istringstream{line}; | |
auto token = std::string{}; | |
lineTokens.clear(); | |
while( lineStream >> token ) { | |
lineTokens.push_back(token); | |
} | |
// rewrite the line | |
lineTokens[atomSiteIndices[int(atomSiteKeys::auth_seq_id)]] = "."; | |
lineTokens[atomSiteIndices[int(atomSiteKeys::auth_comp_id)]] = lineTokens[atomSiteIndices[int(atomSiteKeys::label_comp_id)]]; | |
lineTokens[atomSiteIndices[int(atomSiteKeys::auth_asym_id)]] = lineTokens[atomSiteIndices[int(atomSiteKeys::label_asym_id)]]; | |
} | |
// output the line | |
{ | |
auto outLine = std::ostringstream{}; | |
for( auto& token : lineTokens ) { | |
outLine << token << " "; | |
} | |
outfile << outLine.str() << std::endl; | |
} | |
} | |
// Append the line unadulaterated | |
else { | |
outfile << line << std::endl; | |
} | |
} | |
outfile.close(); | |
return outPath; | |
} | |
- (NSArray<CellPackAssembly *> *)assemblies { | |
// Ludo's Python Gemmi code for CellPack | |
// import gemmi | |
// f = "C:\\Users\\ludov\\Downloads\\cellpack_atom_instances_test.cif"; | |
// st = gemmi.read_structure(f);st | |
// cellpack_assembly = st.assemblies[0] | |
// for gen in cellpack_assembly.generators : | |
// print (gen.subchains) | |
// print (len(gen.operators)) | |
// #selct model 1 with joined subchain | |
// #/models/chains/residues/atoms | |
// sel = gemmi.Selection("/1/"+",".join(gen.subchains)+"/") | |
// print (sel) | |
// model = sel.first(st)[0] | |
// print('Model', model.name) | |
// chains = sel.chains(model) | |
// for chain in chains: | |
// print('-', chain.name) | |
// for residue in chain: | |
// for atom in residue: | |
// print (residue.name, atom.name, atom.pos) | |
// for oper in gen.operators : | |
// print(oper.transform.vec) #position | |
// print(oper.transform.mat) #rotation | |
NSMutableArray<CellPackAssembly*>* result = [[NSMutableArray<CellPackAssembly*> alloc] init]; | |
// Entity columns so we can set the assemblies CellPack name | |
auto entityIds = doc.sole_block().find_loop("_entity.id"); | |
auto entityDescriptions = doc.sole_block().find_loop("_entity.pdbx_description"); | |
// Create our string > entity lookup table | |
std::map<std::string, int> entities; | |
for( int i = 0; i < entityIds.length(); ++i ) { | |
auto entityId = entityIds[i]; | |
entities[entityId] = i; | |
} | |
// Parse the assemblies | |
for( auto& assembly : structure.assemblies ) { | |
for( auto& generator : assembly.generators ) { | |
std::vector<simd_float4> atomsOut; | |
std::vector<simd_float4x4> transformsOut; | |
// Build the structure (the regular Gemmi model code doesn't work) | |
std::string query = "/1/"; | |
for( int i = 0; i < generator.subchains.size(); ++i ) { | |
const auto name = generator.subchains[i]; | |
if( i == 0 ) { | |
query += name; | |
} else { | |
query += "," + name; | |
} | |
} | |
const auto selection = gemmi::Selection(query); | |
CellPackType type { ::protein }; | |
std::optional<int> entityIndex; | |
auto chains = selection.chains(*selection.first(structure).first); | |
// Get the atoms | |
for( auto& chain : chains ) { | |
for( auto& residue : chain.residues ) { | |
type = residue.name == "LIP" ? CellPackType::lipid : CellPackType::protein; | |
entityIndex = entities[residue.entity_id]; | |
for( auto& atom : residue.atoms ) { | |
float radius = gemmi::Element(atom.element).vdw_r(); | |
simd_float4 simAtom = simd_make_float4(float(atom.pos.x), | |
float(atom.pos.y), | |
float(atom.pos.z), | |
radius); | |
atomsOut.push_back(simAtom); | |
} | |
} | |
} | |
// Get the transforms | |
for( auto op : generator.operators ) { | |
// The matrices are row-major | |
const auto rotation = op.transform.mat; | |
const auto translation = op.transform.vec; | |
// 2. Convert to column-major | |
simd_float4x4 transform; | |
// column-0 | |
transform.columns[0][0] = rotation[0][0]; | |
transform.columns[0][1] = rotation[1][0]; | |
transform.columns[0][2] = rotation[2][0]; | |
// column-1 | |
transform.columns[1][0] = rotation[0][1]; | |
transform.columns[1][1] = rotation[1][1]; | |
transform.columns[1][2] = rotation[2][1]; | |
// column-2 | |
transform.columns[2][0] = rotation[0][2]; | |
transform.columns[2][1] = rotation[1][2]; | |
transform.columns[2][2] = rotation[2][2]; | |
// translation-column-3 | |
transform.columns[3][0] = translation.x; | |
transform.columns[3][1] = translation.y; | |
transform.columns[3][2] = translation.z; | |
transformsOut.push_back(transform); | |
} | |
NSString * nsName = [NSString stringWithUTF8String:assembly.name.c_str()]; | |
CellPackAssembly * cellPackAssembly = [[CellPackAssembly alloc] initWithName:nsName]; | |
cellPackAssembly.cppAtoms = atomsOut; | |
cellPackAssembly.cppTransforms = transformsOut; | |
cellPackAssembly.type = type; | |
if( entityIndex.has_value() ) { | |
const auto name = entityIds[entityIndex.value()]; | |
auto description = entityDescriptions[entityIndex.value()]; | |
// unquote our description | |
{ | |
auto first = description.find("'"); | |
if( first != std::string::npos ) { | |
description.erase(0, 1); | |
} | |
auto last = description.rfind("'"); | |
if( last != std::string::npos ) { | |
description.erase(description.end() - 1); | |
} | |
} | |
cellPackAssembly.entityName = [NSString stringWithUTF8String:name.c_str()]; | |
cellPackAssembly.entityDescription = [NSString stringWithUTF8String:description.c_str()]; | |
} | |
[result addObject:cellPackAssembly]; | |
} | |
} | |
return result; | |
} | |
@end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment