Subsample Si dataset and fit with ACE
Setup experiment
Load packages.
using LinearAlgebra, Random, InvertedIndices
using Statistics, StatsBase, Distributions, Determinantal
using Unitful, UnitfulAtomic
using AtomsBase, InteratomicPotentials, PotentialLearning
using CSV, JLD, DataFrames
Define atomic type information.
elname, elspec = "Si", [:Si];
Define paths.
base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
inpath = "$base_path/examples/data/Si-3Body-LAMMPS/"
outpath = "$base_path/examples/DPP-ACE-Si/output/$elname/"
"/home/runner/work/PotentialLearning.jl/PotentialLearning.jl/docs/..//examples/DPP-ACE-Si/output/Si/"
Load utility functions.
include("$base_path/examples/DPP-ACE-Si/subsampling_utils.jl");
Load datasets
Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
file_arr = readext(inpath, "xyz")
nfile = length(file_arr)
confs_arr = [load_data(inpath*file, ExtXYZ(u"eV", u"Å")) for file in file_arr]
confs = concat_dataset(confs_arr)
DataSet{num_configs = 201}
Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
⋮
Configuration{S, Energy, Forces, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}}
Id of configurations per file.
n = 0
confs_id = Vector{Vector{Int64}}(undef, nfile)
for k = 1:nfile
global n
confs_id[k] = (n+1):(n+length(confs_arr[k]))
n += length(confs_arr[k])
end
Subsample dataset
Create ACE basis.
nbody = 4
deg = 5
ace = ACE(species = elspec, # species
body_order = nbody, # n-body
polynomial_degree = deg, # degree of polynomials
wL = 1.0, # Defaults, See ACE.jl documentation
csp = 1.0, # Defaults, See ACE.jl documentation
r0 = 1.0, # minimum distance between atoms
rcutoff = 10.0);
Compute and save ACE descriptors for energies and forces.
println("Computing local descriptors")
e_descr = compute_local_descriptors(confs, ace; pbar=false)
f_descr = compute_force_descriptors(confs, ace; pbar=false)
JLD.save(outpath*"$(elname)_energy_descriptors.jld", "e_descr", e_descr)
JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)
Computing local descriptors
Update training dataset by adding energy and force descriptors.
ds = DataSet(confs .+ e_descr .+ f_descr)
ndata = length(ds);
Post-process results
Compute cross validation error from training dataset.
batch_size = [80, 40]
sel_ind = Dict{Int64, Vector}()
cond_num = Dict{Int64, Vector}()
for bs in batch_size
println("=============== Starting batch size $bs ===============")
sel_ind[bs], cond_num[bs] = cross_validation_training(ds; ndiv=5, dpp_batch=bs)
println("condnum: $(cond_num[bs])")
end
JLD.save(outpath*"$(elname)_ACE-$(nbody)-$(deg)_DPP_indices_and_condnum.jld",
"ind", sel_ind,
"condnum", cond_num)
=============== Starting batch size 80 ===============
batch 1
batch 2
batch 3
batch 4
batch 5
LinearAlgebra.SingularException(18)
Linear system will be solved using pinv.
condnum: [1.9720680789816124e10, 1.837293213026608e10, 1.804373394439172e10, 1.9958444390060787e10, 1.976247074085906e10]
=============== Starting batch size 40 ===============
batch 1
batch 2
batch 3
batch 4
batch 5
condnum: [1.79172982810321e10, 1.7935491927640392e10, 1.9895906928284584e10, 1.7063741088236666e10, 1.862179734152307e10]
This page was generated using Literate.jl.