Subsample Si dataset and fit with ACE

Setup experiment

Load packages.

using LinearAlgebra, Random, InvertedIndices
using Statistics, StatsBase, Distributions, Determinantal
using Unitful, UnitfulAtomic
using AtomsBase, InteratomicPotentials, PotentialLearning
using CSV, JLD, DataFrames

Define atomic type information.

elname, elspec = "Si", [:Si];

Define paths.

base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
inpath   = "$base_path/examples/data/Si-3Body-LAMMPS/"
outpath  = "$base_path/examples/DPP-ACE-Si/output/$elname/"
"/home/runner/work/PotentialLearning.jl/PotentialLearning.jl/docs/..//examples/DPP-ACE-Si/output/Si/"

Load utility functions.

include("$base_path/examples/DPP-ACE-Si/subsampling_utils.jl");

Load datasets

Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)

file_arr = readext(inpath, "xyz")
nfile = length(file_arr)
confs_arr = [load_data(inpath*file, ExtXYZ(u"eV", u"Å")) for file in file_arr]
confs = concat_dataset(confs_arr)
DataSet{num_configs = 201} 
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
	 ⋮
	 Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}

Id of configurations per file.

n = 0
confs_id = Vector{Vector{Int64}}(undef, nfile)
for k = 1:nfile
    global n
    confs_id[k] = (n+1):(n+length(confs_arr[k]))
    n += length(confs_arr[k])
end

Subsample dataset

Create ACE basis.

nbody = 4
deg = 5
ace = ACE(species = elspec,             # species
          body_order = nbody,           # n-body
          polynomial_degree = deg,      # degree of polynomials
          wL = 1.0,                     # Defaults, See ACE.jl documentation
          csp = 1.0,                    # Defaults, See ACE.jl documentation
          r0 = 1.0,                     # minimum distance between atoms
          rcutoff = 10.0);

Compute and save ACE descriptors for energies and forces.

println("Computing local descriptors")
e_descr = compute_local_descriptors(confs, ace; pbar=false)
f_descr = compute_force_descriptors(confs, ace; pbar=false)
JLD.save(outpath*"$(elname)_energy_descriptors.jld", "e_descr", e_descr)
JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)
Computing local descriptors

Update training dataset by adding energy and force descriptors.

ds = DataSet(confs .+ e_descr .+ f_descr)
ndata = length(ds);

Post-process results

Compute cross validation error from training dataset.

batch_size = [80, 40]
sel_ind = Dict{Int64, Vector}()
cond_num = Dict{Int64, Vector}()

for bs in batch_size
    println("=============== Starting batch size $bs ===============")
    sel_ind[bs], cond_num[bs] = cross_validation_training(ds; ndiv=5, dpp_batch=bs)
    println("condnum: $(cond_num[bs])")
end

JLD.save(outpath*"$(elname)_ACE-$(nbody)-$(deg)_DPP_indices_and_condnum.jld",
         "ind", sel_ind,
         "condnum", cond_num)
=============== Starting batch size 80 ===============
batch 1
batch 2
batch 3
batch 4
batch 5
LinearAlgebra.SingularException(18)
Linear system will be solved using pinv.
condnum: [1.9720680789816124e10, 1.837293213026608e10, 1.804373394439172e10, 1.9958444390060787e10, 1.976247074085906e10]
=============== Starting batch size 40 ===============
batch 1
batch 2
batch 3
batch 4
batch 5
condnum: [1.79172982810321e10, 1.7935491927640392e10, 1.9895906928284584e10, 1.7063741088236666e10, 1.862179734152307e10]

This page was generated using Literate.jl.