Subsample a-HfO2 dataset and fit with ACE

Setup experiment

Load packages.

using AtomsBase, InteratomicPotentials, PotentialLearning
using Unitful, UnitfulAtomic
using LinearAlgebra, Random, DisplayAs

Define paths.

base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
ds_path   = "$base_path/examples/data/a-HfO2/a-HfO2-300K-NVT-6000.extxyz"
res_path  = "$base_path/examples/DPP-ACE-aHfO2-1/results/";

Load utility functions.

include("$base_path/examples/utils/utils.jl");

Create experiment folder.

run(`mkdir -p $res_path`);

Load datasets

Load atomistic dataset: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)

ds = load_data(ds_path, uparse("eV"), uparse("Å"))[1:1000]; # Load first 1K samples.

Split atomistic dataset into training and test

n_train, n_test = 100, 50 # Few samples per dataset are used in this example.
conf_train, conf_test = split(ds, n_train, n_test)
(DataSet{num_configs = 100} 
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
	 ⋮
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}, DataSet{num_configs = 50} 
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
	 ⋮
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}})

Subsample dataset

Compute ACE descriptors for energies to be used as subsampling input.

basis = ACE(species           = [:Hf, :O],
            body_order        = 2,
            polynomial_degree = 3,
            rcutoff           = 5.0,
            wL                = 1.0,
            csp               = 1.0,
            r0                = 1.0)
e_descr = compute_local_descriptors(conf_train,
                                    basis,
                                    pbar = false);

Update subsampling dataset.

conf_train_kDPP = DataSet(conf_train .+ e_descr);

Create DPP subselector.

dataset_selector = kDPP(  conf_train_kDPP,
                          GlobalMean(),
                          DotProduct();
                          batch_size = 50)
kDPP(L-Ensemble.
Number of items in ground set : 100. Max. rank : 100. Rescaling constant α=1.7269532697470318e15
, 50)

Subsample trainig dataset.

inds = get_random_subset(dataset_selector)
conf_train = @views conf_train[inds];

Compute descriptors

Create ACE basis.

basis = ACE(species           = [:Hf, :O],
            body_order        = 3,
            polynomial_degree = 4,
            rcutoff           = 5.0,
            wL                = 1.0,
            csp               = 1.0,
            r0                = 1.0)
@save_var res_path basis;

Compute ACE descriptors for energy and forces based on the atomistic training configurations.

println("Computing energy descriptors of training dataset...")
e_descr_train = compute_local_descriptors(conf_train, basis;
                                          pbar=false)
println("Computing force descriptors of training dataset...")
f_descr_train = compute_force_descriptors(conf_train, basis;
                                          pbar=false)
50-element Vector{ForceDescriptors}:
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ⋮
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}
 ForceDescriptors{n = 96, d = 3}

Update training dataset by adding energy and force descriptors.

ds_train = DataSet(conf_train .+ e_descr_train .+ f_descr_train);

Learn coefficients

Learn ACE coefficients based on ACE descriptors and DFT data.

println("Learning energies and forces...")
lb = LBasisPotential(basis)
ws, int = [1.0, 1.0], false
learn!(lb, ds_train, ws, int)
@save_var res_path lb.β
@save_var res_path lb.β0
lb.β, lb.β0
([623.1064321157913, 347.1332782034459, 137.81193696582082, 27.57505320704419, -251737.29687868553, 74603.55914295642, 97988.1812646828, 37920.0230862549, 14713.817976087548, 8018.749019118775  …  144.91824189132498, 78.63895303117746, -11963.674336554115, -2425.9624224510344, 78.85349005853638, 1.0756412166049816e6, 554846.7628225728, 25240.41462350998, -1029.7600401155412, 25895.160756742054], [0.0])

Post-process results

Compute ACE descriptors for energy and forces based on the atomistic test configurations.

println("Computing energy descriptors of test dataset...")
e_descr_test = compute_local_descriptors(conf_test, basis;
                                         pbar = false)
println("Computing force descriptors of test dataset...")
f_descr_test = compute_force_descriptors(conf_test, basis;
                                         pbar = false);
Computing energy descriptors of test dataset...
Computing force descriptors of test dataset...

Update test dataset by adding energy and force descriptors.

ds_test = DataSet(conf_test .+ e_descr_test .+ f_descr_test);

Get and save true and predicted values for energies and forces.

n_atoms_train = length.(get_system.(ds_train))
n_atoms_test = length.(get_system.(ds_test))

e_train, e_train_pred = get_all_energies(ds_train) ./ n_atoms_train,
                        get_all_energies(ds_train, lb) ./ n_atoms_train
f_train, f_train_pred = get_all_forces(ds_train),
                        get_all_forces(ds_train, lb)
@save_var res_path e_train
@save_var res_path e_train_pred
@save_var res_path f_train
@save_var res_path f_train_pred

e_test, e_test_pred = get_all_energies(ds_test) ./ n_atoms_test,
                      get_all_energies(ds_test, lb) ./ n_atoms_test
f_test, f_test_pred = get_all_forces(ds_test),
                      get_all_forces(ds_test, lb)
@save_var res_path e_test
@save_var res_path e_test_pred
@save_var res_path f_test
@save_var res_path f_test_pred;

Compute and save training metrics.

e_train_metrics = get_metrics(e_train, e_train_pred,
                              metrics = [mae, rmse, rsq],
                              label = "e_train")
f_train_metrics = get_metrics(f_train, f_train_pred,
                              metrics = [mae, rmse, rsq, mean_cos],
                              label = "f_train")
train_metrics = merge(e_train_metrics, f_train_metrics)
@save_dict res_path train_metrics
train_metrics
OrderedCollections.OrderedDict{String, Float64} with 7 entries:
  "e_train_mae"      => 0.00183458
  "e_train_rmse"     => 0.00205321
  "e_train_rsq"      => 0.591606
  "f_train_mae"      => 0.172047
  "f_train_rmse"     => 0.219337
  "f_train_rsq"      => 0.849921
  "f_train_mean_cos" => 0.886787

Compute and save test metrics.

e_test_metrics = get_metrics(e_test, e_test_pred,
                             metrics = [mae, rmse, rsq],
                             label = "e_test")
f_test_metrics = get_metrics(f_test, f_test_pred,
                             metrics = [mae, rmse, rsq, mean_cos],
                             label = "f_test")
test_metrics = merge(e_test_metrics, f_test_metrics)
@save_dict res_path test_metrics
test_metrics
OrderedCollections.OrderedDict{String, Float64} with 7 entries:
  "e_test_mae"      => 0.000756613
  "e_test_rmse"     => 0.000914047
  "e_test_rsq"      => 0.92634
  "f_test_mae"      => 0.174149
  "f_test_rmse"     => 0.219437
  "f_test_rsq"      => 0.849902
  "f_test_mean_cos" => 0.887488

Plot and save energy results.

e_plot = plot_energy(e_train, e_train_pred,
                     e_test, e_test_pred)
@save_fig res_path e_plot
DisplayAs.PNG(e_plot)
Example block output

Plot and save force results.

f_plot = plot_forces(f_train, f_train_pred,
                     f_test, f_test_pred)
@save_fig res_path f_plot
DisplayAs.PNG(f_plot)
Example block output

Plot and save training force cosine.

e_train_plot = plot_energy(e_train, e_train_pred)
f_train_plot = plot_forces(f_train, f_train_pred)
f_train_cos  = plot_cos(f_train, f_train_pred)
@save_fig res_path e_train_plot
@save_fig res_path f_train_plot
@save_fig res_path f_train_cos
DisplayAs.PNG(f_train_cos)
Example block output

Plot and save test force cosine.

e_test_plot = plot_energy(e_test, e_test_pred)
f_test_plot = plot_forces(f_test, f_test_pred)
f_test_cos  = plot_cos(f_test, f_test_pred)
@save_fig res_path e_test_plot
@save_fig res_path f_test_plot
@save_fig res_path f_test_cos
DisplayAs.PNG(f_test_cos)
Example block output

This page was generated using Literate.jl.