Source code for npl.descriptors.local_environment_feature_classifier

import numpy as np
from sklearn.cluster import KMeans
import copy


[docs] class LocalEnvironmentFeatureClassifier: """ LocalEnvironmentFeatureClassifier is a class that classifies features of local environments around atoms in a particle. Attributes: local_environment_calculator: An instance responsible for computing local environments. feature_key: A key to identify the feature being computed. Methods: __init__(local_environment_calculator): Initializes the classifier with a local environment calculator. compute_atom_features(particle, recompute_local_environments=False): Computes features for each atom in the particle. Optionally recomputes local environments. compute_feature_vector(particle, recompute_atom_features=True, recompute_local_environments=False): Computes the feature vector for the particle. Optionally recomputes atom features and local environments. compute_atom_feature(particle, atom_index, recompute_local_environment=False): Computes the feature for a specific atom in the particle. get_feature_key(): Returns the feature key. set_feature_key(feature_key): Sets the feature key. compute_n_features(particle): Abstract method to compute the number of features for the particle. Must be implemented by subclasses. predict_atom_feature(particle, lattice_index, recompute_local_environment=False): Abstract method to predict the feature of a specific atom. Must be implemented by subclasses. """ def __init__(self, local_environment_calculator): self.local_environment_calculator = local_environment_calculator self.feature_key = None
[docs] def compute_atom_features(self, particle, recompute_local_environments=False): if recompute_local_environments: self.local_environment_calculator.compute_local_environments(particle) for atom_index in particle.get_indices(): self.compute_atom_feature(particle, atom_index, recompute_local_environments)
[docs] def compute_feature_vector(self, particle, recompute_atom_features=True, recompute_local_environments=False): if recompute_atom_features: self.compute_atom_features(particle, recompute_local_environments) n_features = self.compute_n_features(particle) feature_vector = np.zeros(n_features) atom_features = particle.get_atom_features(self.feature_key) for index in particle.get_indices(): feature_vector[atom_features[index]] += 1 particle.set_feature_vector(self.feature_key, feature_vector)
[docs] def compute_atom_feature(self, particle, atom_index, recompute_local_environment=False): feature = self.predict_atom_feature(particle, atom_index, recompute_local_environment) atom_features = particle.get_atom_features(self.feature_key) atom_features[atom_index] = feature
[docs] def get_feature_key(self): return self.feature_key
[docs] def set_feature_key(self, feature_key): self.feature_key = feature_key
[docs] def compute_n_features(self, particle): raise NotImplementedError
[docs] def predict_atom_feature(self, particle, lattice_index, recompute_local_environment=False): raise NotImplementedError
[docs] class KMeansClassifier(LocalEnvironmentFeatureClassifier): def __init__(self, n_cluster, local_environment_calculator, feature_key): LocalEnvironmentFeatureClassifier.__init__(self, local_environment_calculator) self.kMeans = None self.n_cluster = n_cluster self.feature_key = feature_key
[docs] def compute_n_features(self, particle): n_elements = len(particle.get_all_symbols()) n_features = self.n_cluster * n_elements return n_features
# TODO problematic: length of feature vector different for pure particles and bimetallic ones
[docs] def predict_atom_feature(self, particle, atom_index, recompute_local_environment=False): symbol = particle.get_symbol(atom_index) symbols = sorted(particle.get_all_symbols()) symbol_index = symbols.index(symbol) offset = symbol_index*self.n_cluster if recompute_local_environment: environment = self.kMeans.predict([ self.local_environment_calculator.predict_local_environment(particle, atom_index)] )[0] else: environment = self.kMeans.predict([particle.get_local_environment(atom_index)])[0] return offset + environment
[docs] def kmeans_clustering(self, training_set): local_environments = list() for particle in training_set: local_environments = local_environments + list( particle.get_local_environments().values()) print("Starting kMeans") self.kMeans = KMeans(n_clusters=self.n_cluster, random_state=0).fit(local_environments)
# TODO rename, rework with offsets etc.
[docs] class TopologicalEnvironmentClassifier(LocalEnvironmentFeatureClassifier): def __init__(self, local_environment_calculator, symbols): LocalEnvironmentFeatureClassifier.__init__(self, local_environment_calculator) symbols_copy = copy.deepcopy(symbols) symbols_copy.sort() self.symbols = symbols_copy self.coordination_number_offsets = [int(cn*(cn + 1)/2) for cn in range(13)] self.feature_key = 'TEC'
[docs] def compute_n_features(self, particle): return 182
[docs] def predict_atom_feature(self, particle, atom_index, recompute_local_environment=False): symbol = particle.get_symbol(atom_index) symbol_index = self.symbols.index(symbol) element_offset = symbol_index*91 if recompute_local_environment: self.local_environment_calculator.compute_local_environment(particle, atom_index) environment = particle.get_local_environment(atom_index) coordination_number = len(particle.neighbor_list[atom_index]) atom_feature = int(element_offset + self.coordination_number_offsets[coordination_number] + environment[0]) return atom_feature
# TODO rename
[docs] class CoordinationNumberClassifier(LocalEnvironmentFeatureClassifier): def __init__(self, local_environment_calculator): LocalEnvironmentFeatureClassifier.__init__(self, local_environment_calculator) self.coordination_number_offsets = [int(cn * (cn + 1) / 2) for cn in range(13)] self.feature_key = 'TEC'
[docs] def compute_n_features(self, particle): return 182
[docs] def predict_atom_feature(self, particle, atom_index, recompute_local_environment=False): symbol = particle.get_symbol(atom_index) if symbol == 'X': symbol_index = 1 else: symbol_index = 0 element_offset = symbol_index*91 if recompute_local_environment: self.local_environment_calculator.compute_local_environment(particle, atom_index) environment = particle.get_local_environment(atom_index) coordination_number = environment[0] # TODO not robust, only works if 'X' as 'empty site' is second entry # TODO should specify index of non-vacancy element atom_feature = element_offset + self.coordination_number_offsets[coordination_number] + environment[0] return atom_feature