Last active
April 19, 2022 15:35
-
-
Save louity/c7bb42fb577a0827aebed1c984e9098c to your computer and use it in GitHub Desktop.
Angular Fourier Series in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Python implementation of the Angular Fourier Series descriptors defined in the paper | |
'On representing chemical environments', DOI: https://doi.org/10.1103/PhysRevB.87.184115 | |
""" | |
import argparse | |
import os | |
import numpy as np | |
import scipy | |
import scipy.spatial as spatial | |
from mpl_toolkits.mplot3d import axes3d # noqa: f401 unused import | |
import matplotlib.pyplot as plt | |
try: | |
from tqdm import tqdm | |
except ImportError: tqdm = lambda x: x | |
def gaussian(x, mu, sig): | |
return np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.))) | |
def compute_W_matrix(n_max, reg=0.): | |
""" W matrix involved in formula (25) section III.D (page 7) concerning | |
orthonomalization of the radial functions \phi. | |
W is defined as the square root of the scalar product matrix S(i,j) = <phi_i, phi_j>. | |
Beacause phi functions are correlated, large n_max may cause negative | |
eigenvalues that yield nans in sqrt matrix, so we added a reg term on the diagonal. | |
Note that scipy.linalg.sqrtm can also be used, but similarly, complex value appear for large | |
n_max. | |
""" | |
overlapp_matrix = np.zeros((n_max, n_max)) | |
for alpha in range(1, n_max+1): | |
for beta in range(1, n_max+1): | |
overlapp_matrix[alpha-1, beta-1] = np.sqrt((5 + 2*alpha)*(5 + 2*beta)) / (5 + alpha + beta) | |
if alpha == beta and reg > 0: | |
overlapp_matrix[alpha-1, beta-1] += reg | |
eigvals, eigvecs = scipy.linalg.eigh(overlapp_matrix) | |
if (eigvals < 0).sum(): | |
print('Negative eigenvalues in matrix W: {}'.format(eigvals)) | |
W_matrix = np.dot(np.dot(eigvecs, np.diag(1. / np.sqrt(eigvals))), eigvecs.transpose()) | |
return W_matrix | |
def periodize_configuration(configuration, r_cut, dimensions): | |
"""Periodically replicate the atoms in a rectangular box that are distance <= r_cut of the faces. | |
Parameters | |
configuration: np.array of shape (n_atoms, 3) | |
coordinates of the atoms to be periodized | |
r_cut: float | |
cutoff radius | |
dimensions: np.array of shape (3,) (or list length 3) | |
dimensions of the periodic rectangle | |
Returns | |
periodized_configuration: np.array of shape (n_atoms_periodized, 3) | |
initial_atom_ids: np.array of shape (n_atoms_periodized, ) | |
ids of the periodized atoms in the initial configuration | |
""" | |
periodized_configuration = [] | |
initial_atom_ids = [] | |
x_translation = np.array([[dimensions[0], 0, 0]], dtype=configuration.dtype) | |
y_translation = np.array([[0, dimensions[1], 0]], dtype=configuration.dtype) | |
z_translation = np.array([[0, 0, dimensions[2]]], dtype=configuration.dtype) | |
mask_true = np.ones(configuration.shape[0], dtype=bool) | |
for i_x, mask_x in [(-1., configuration[:, 0] > (dimensions[0] - r_cut)), (0., mask_true), (1., configuration[:, 0] < r_cut)]: | |
for i_y, mask_y in [(-1., configuration[:, 1] > (dimensions[1] - r_cut)), (0., mask_true), (1., configuration[:, 1] < r_cut)]: | |
for i_z, mask_z in [(-1., configuration[:, 2] > (dimensions[2] - r_cut)), (0., mask_true), (1., configuration[:, 2] < r_cut)]: | |
mask = mask_x * mask_y * mask_z | |
initial_atom_ids.append(np.nonzero(mask)[0]) | |
periodized_configuration.append(configuration[mask] + i_x*x_translation + i_y*y_translation + i_z*z_translation) | |
periodized_configuration = np.concatenate(periodized_configuration, axis=0) | |
initial_atom_ids = np.concatenate(initial_atom_ids, axis=0) | |
return periodized_configuration, initial_atom_ids | |
def compute_AFS_descriptors(configurations, n_max, l_max, r_cut, dimensions, | |
radial_function_type='g_functions', reg_eigenvalues=0., | |
neighbors_in_r_cut=False, radial_tensor_product=False): | |
"""Implementation of the formula given in section III.G (page 9). | |
The indices i and i' in the sum are interpreted as the neighbor indices of a central atom. | |
If neighbors_in_r_cut=True, we add the constraint that the neighbors i and i' must be at a distance <= r_cut | |
""" | |
assert radial_function_type in ['g_function', 'gaussian'], f'invalid radial function type {radial_function_type}' | |
l_values = np.arange(l_max+1).reshape(1, 1, -1) | |
if radial_function_type == 'g_function': | |
W_matrix = compute_W_matrix(n_max, reg=reg_eigenvalues) | |
alphas = np.arange(1, n_max+1).astype('float64').reshape((1, -1)) | |
exponents = alphas + 2 | |
normalizing_constants = np.sqrt(2*alphas+5) / np.power(r_cut, alphas+2.5) | |
elif radial_function_type == 'gaussian': | |
centers = np.linspace(0, r_cut, n_max, endpoint=False).reshape((1, -1)) | |
sigma = 0.5 * centers[0, 1] | |
if radial_tensor_product: | |
AFS_descriptors = np.zeros((configurations.shape[0], configurations.shape[1], n_max**2, l_max+1)) | |
else: | |
AFS_descriptors = np.zeros((configurations.shape[0], configurations.shape[1], n_max, l_max+1)) | |
for i_config in tqdm(range(configurations.shape[0])): | |
configuration = configurations[i_config] | |
periodized_configuration, initial_atom_ids = periodize_configuration(configuration, r_cut, dimensions) | |
point_tree = spatial.cKDTree(periodized_configuration) | |
for i_atom in range(configuration.shape[0]): | |
atom = configuration[i_atom:i_atom+1] | |
neighbors_indices = [n_id for n_id in point_tree.query_ball_point(configuration[i_atom], r_cut) if initial_atom_ids[n_id] != i_atom] | |
neighbors = periodized_configuration[neighbors_indices] | |
r_vectors = neighbors - atom | |
r_norms = np.linalg.norm(r_vectors, axis=1, keepdims=True) | |
if radial_function_type == 'g_function': | |
phi_functions = normalizing_constants * (r_cut - r_norms)**exponents | |
radial_functions = np.dot(phi_functions, W_matrix) | |
elif radial_function_type == 'gaussian': | |
radial_functions = gaussian(r_norms, centers, sigma) | |
r_normalized = r_vectors / r_norms | |
cos_angles = np.dot(r_normalized, r_normalized.transpose()) | |
# triangular-upper mask corresponding to pairs (i,j) with i<j, i.e. pair of different atoms | |
n_neighbors = neighbors.shape[0] | |
triu_mask = np.arange(n_neighbors)[:,None] < np.arange(n_neighbors) | |
if neighbors_in_r_cut: | |
neighbors_indices_pdist_matrix = spatial.distance.squareform(spatial.distance.pdist(neighbors)) | |
neighbors_in_r_cut_mask = neighbors_indices_pdist_matrix < r_cut | |
triu_mask *= neighbors_in_r_cut_mask | |
# triangular-upper indices correspond to pairs (i,j) with i<j, i.e. pair of different atoms | |
cos_angles = np.clip(cos_angles, -1, 1) | |
angles_triu = np.arccos(cos_angles[triu_mask]).reshape(-1, 1, 1) | |
cos_l_angles_triu = np.cos(l_values * angles_triu) | |
if radial_tensor_product: | |
radial_functions_product_triu = np.tensordot(radial_functions.transpose(), radial_functions, axes=0).transpose(1, 2, 0, 3)[triu_mask, :, :].reshape(-1, n_max**2, 1) | |
else: | |
radial_functions_product_triu = np.stack([np.dot(radial_functions[:, n:n+1], radial_functions[:, n:n+1].transpose())[triu_mask] for n in range(n_max)], axis=-1)[:, :, np.newaxis] | |
AFS_descriptors[i_config, i_atom] += (radial_functions_product_triu * cos_l_angles_triu).sum(axis=0) | |
return AFS_descriptors | |
def compute_AFS_descriptors_method_2(configurations, n_max, l_max, r_cut, dimensions, radial_function_type='g_functions', reg_eigenvalues=0.): | |
"""Implementation of the formula given in section III.G (page 9). | |
The index i in the sum is interpreted as the neighbor indices of a central atom and the index i' is interpreted as the neighbors of i. | |
""" | |
assert radial_function_type in ['g_function', 'gaussian'], 'invalid radial function type {}'.format(radial_function_type) | |
l_values = np.arange(l_max+1)[np.newaxis, np.newaxis, :] | |
if radial_function_type == 'g_function': | |
W_matrix = compute_W_matrix(n_max, reg=reg_eigenvalues) | |
alphas = np.arange(1, n_max+1).astype('float64').reshape((1, -1)) | |
exponents = alphas + 2 | |
normalizing_constants = np.sqrt(2*alphas+5) / np.power(r_cut, alphas+2.5) | |
elif radial_function_type == 'gaussian': | |
centers = np.linspace(0, r_cut, n_max, endpoint=False).reshape((1, -1)) | |
sigma = 0.5 * centers[0, 1] | |
AFS_descriptors = np.zeros((configurations.shape[0], configurations.shape[1], n_max, l_max+1)) | |
for i_config in tqdm(range(configurations.shape[0])): | |
configuration = configurations[i_config] | |
periodized_configuration, initial_atom_ids = periodize_configuration(configuration, r_cut, dimensions) | |
point_tree = spatial.cKDTree(periodized_configuration) | |
neighbors_indices_periodized = [] | |
neighbors_indices_initial = [] | |
neighbors_radius_vectors = [] | |
neighbors_radial_functions = [] | |
for i_atom in range(configuration.shape[0]): | |
indices = point_tree.query_ball_point(configuration[i_atom], r_cut) | |
indices = np.array([ind for ind in indices if initial_atom_ids[ind] != i_atom]) | |
neighbors_indices_periodized.append(indices) | |
neighbors_indices_initial.append(initial_atom_ids[indices]) | |
neighbors = periodized_configuration[indices] | |
atom = configuration[i_atom:i_atom+1] | |
radius_vectors = neighbors - atom | |
r_norms = np.linalg.norm(radius_vectors, axis=1, keepdims=True) | |
neighbors_radius_vectors.append(radius_vectors / r_norms) | |
if radial_function_type == 'g_function': | |
phi_functions = normalizing_constants * (r_cut - r_norms)**exponents | |
neighbors_radial_functions.append(np.dot(phi_functions, W_matrix)) | |
elif radial_function_type == 'gaussian': | |
neighbors_radial_functions.append(gaussian(r_norms, centers, sigma)) | |
for i_atom in range(configuration.shape[0]): | |
for j_, j_atom in enumerate(neighbors_indices_initial[i_atom]): | |
r_ij_vector = neighbors_radius_vectors[i_atom][j_][:, np.newaxis] | |
cos_angles_jk = np.dot(neighbors_radius_vectors[j_atom], r_ij_vector) | |
cos_angles_jk = np.clip(cos_angles_jk, -1, 1) | |
angles_jk = np.arccos(cos_angles_jk)[:, :, np.newaxis] | |
g_r_ij = neighbors_radial_functions[i_atom][j_][:, np.newaxis] | |
g_r_jk = neighbors_radial_functions[j_atom][:, :, np.newaxis] | |
cos_l_angles_jk = np.cos(l_values * angles_jk) | |
AFS_descriptors[i_config, i_atom] += 0.5 * (g_r_ij * (g_r_jk * cos_l_angles_jk).sum(axis=0) - g_r_ij**2) | |
return AFS_descriptors | |
def read_poscar_files(path): | |
poscar_filepathes = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and os.path.splitext(f)[1] == '.poscar'] | |
poscar_filepathes.sort() | |
configurations = [] | |
for file_path in poscar_filepathes: | |
f = open(os.path.join(path, file_path)) | |
f.readline() # ignore first line " 111 1 Fe 26 0 0 0" | |
f.readline() # ignore second line " 1.0000000 " | |
lattice = np.array([[float(x) for x in f.readline().split()] for _ in range(3)]) | |
n_atoms = int(f.readline()) | |
f.readline() # ignore coordinate type line " Cartesian " | |
atom_positions = np.zeros((n_atoms, 3)) | |
for i_atom in range(n_atoms): | |
atom_positions[i_atom] = np.array([float(x) for x in f.readline().split()])[:3] | |
f.close() | |
configurations.append(atom_positions) | |
return np.stack(configurations, axis=0), lattice | |
def plot_config(configuration): | |
fig = plt.figure() | |
ax = fig.add_subplot(111, projection='3d') | |
x, y, z = configuration[:, 0].copy(), configuration[:, 1].copy(), configuration[:, 2].copy() | |
ax.scatter(x, y, z, marker='o') | |
ax.set_xlabel('X') | |
ax.set_ylabel('Y') | |
ax.set_zlabel('Z') | |
plt.show() | |
return None | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser('Compute AFS descriptor') | |
parser.add_argument('--poscar_paths', nargs='+', help="paths of poscar files", required=True) | |
parser.add_argument('--r_cut', type=float, help="cutoff radius", required=True) | |
parser.add_argument('--n_max', type=int, help="n radial functions", required=True) | |
parser.add_argument('--l_max', type=int, help="n angular functions", required=True) | |
parser.add_argument('--output_paths', nargs='+', help="path of output np array", default=[]) | |
parser.add_argument('--radial_function_type', choices=['gaussian', 'g_function'], help='radial function type', required=True) | |
parser.add_argument('--reg_eigenvalues', type=float, default=1e-15, help="reg for eigvalues calculations") | |
parser.add_argument('--neighbors_in_r_cut', action='store_true', help="constraint ") | |
parser.add_argument('--radial_tensor_product', action='store_true', help="tensor product of radial") | |
args = parser.parse_args() | |
for i, poscar_path in enumerate(args.poscar_paths): | |
print(f'reading poscar files in {poscar_path}...') | |
configurations, lattice = read_poscar_files(poscar_path) | |
dimensions = np.linalg.norm(lattice, axis=1) | |
print('...done, configurations {} , lattice dimensions {}'.format(configurations.shape, dimensions)) | |
n_max, l_max, r_cut = args.n_max, args.l_max, args.r_cut | |
AFS_desc = compute_AFS_descriptors(configurations, n_max, l_max, r_cut, dimensions, | |
args.radial_function_type, args.reg_eigenvalues, | |
neighbors_in_r_cut=args.neighbors_in_r_cut, | |
radial_tensor_product=args.radial_tensor_product) | |
print(f'AFS descriptor of shape {AFS_desc.shape} computed.') | |
if i < len(args.output_paths): | |
print(f'saving to file {args.output_paths[i]}') | |
np.save(args.output_paths[i], AFS_desc.sum(axis=1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# commande pour lancer le calcul des descripteurs | |
python AFS.py --poscar_paths data/bulk/ data/DB_92_002/ data/DB_92_003/ data/DB_92_004/ data/DB_92_400/ --output_paths AFS_20_10_5_gfunc_bulk.npy AFS_20_10_5_gfunc_92_002.npy AFS_20_10_5_gfunc_92_003.npy AFS_20_10_5_gfunc_92_004.npy AFS_20_10_5_gfunc_92_400.npy --n_max 20 --l_max 10 --r_cut 5.0 --reg_eigenvalues 1e-15 --radial_function_type g_function | |
# commande pour lancer la regression lineaire | |
python linear_regression.py --bulk_desc_file AFS_20_10_5_gfunc_bulk.npy --thermo_files data/92_002_normalized.data data/92_003_normalized.data data/92_004_normalized.data data/92_400_normalized.data --desc_files AFS_20_10_5_gfunc_92_002.npy AFS_20_10_5_gfunc_92_003.npy AFS_20_10_5_gfunc_92_004.npy AFS_20_10_5_gfunc_92_400.npy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import numpy as np | |
from sklearn import linear_model, model_selection | |
from sklearn.metrics import mean_squared_error, mean_absolute_error | |
parser = argparse.ArgumentParser('Linear regression with descriptors') | |
parser.add_argument('--thermo_files', nargs='+', help="paths of thermo files", required=True) | |
parser.add_argument('--desc_files', nargs='+', help="path of descriptors files", required=True) | |
parser.add_argument('--bulk_desc_file', help="path of bulk desc files", required=True) | |
args = parser.parse_args() | |
assert len(args.thermo_files) == len(args.desc_files), f'number of themo files must be equal to number of desc files' | |
bulk_descriptor = np.load(args.bulk_desc_file).reshape((1, -1)) | |
X, y = [], [] | |
for i_file, thermo_file in enumerate(args.thermo_files): | |
thermo_data = np.loadtxt(thermo_file) | |
sorted_ids = thermo_data[:,0].argsort() | |
thermo_data = thermo_data[sorted_ids] | |
config_ids = (thermo_data[:, 0] - 10001).astype('int') | |
entropies = thermo_data[:, 3] | |
y.append(entropies) | |
missing_entropy_ids = [i for i in np.arange(1, config_ids[-1]+1) if i not in config_ids] | |
print('missing_entropy_ids {}'.format(missing_entropy_ids)) | |
descriptors = np.load(args.desc_files[i_file]) | |
descriptors = descriptors.reshape((descriptors.shape[0], -1))[config_ids] - bulk_descriptor | |
X.append(descriptors) | |
X = np.concatenate(X) | |
y = np.concatenate(y) | |
print('X shape {}, y shape {} y min {} y max {} '.format(X.shape,y.shape, y.min(), y.max())) | |
clf = linear_model.BayesianRidge(compute_score=True, fit_intercept=False) | |
clf.fit(X, y) | |
y_predict_bayes = clf.predict(X) | |
train_rmse = np.sqrt(mean_squared_error(y, y_predict_bayes)) | |
train_mae = mean_absolute_error(y, y_predict_bayes) | |
print(f'BayesianRidge train mae {train_mae:.4f} kB, train rmse {train_rmse:.4f}kB') | |
n_crossval_folds = 5 | |
y_prediction = model_selection.cross_val_predict(clf, X=X, y=y, cv=n_crossval_folds) | |
cv_mae = mean_absolute_error(y_prediction, y) | |
cv_rmse = np.sqrt(mean_squared_error(y_prediction, y)) | |
print(f'BayesianRidge {n_crossval_folds} folds cross val mae {cv_mae:.4f} kB, rmse {cv_rmse:.4f} kB') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment