Subsample Si dataset and fit with ACE
Setup experiment
Load packages.
using LinearAlgebra, Random, InvertedIndices
using Statistics, StatsBase, Distributions, Determinantal
using Unitful, UnitfulAtomic
using AtomsBase, InteratomicPotentials, PotentialLearning
using CSV, JLD, DataFramesDefine atomic type information.
elname, elspec = "Si", [:Si];Define paths.
base_path = haskey(ENV, "BASE_PATH") ? ENV["BASE_PATH"] : "../../"
inpath = "$base_path/examples/data/Si-3Body-LAMMPS/"
outpath = "$base_path/examples/DPP-ACE-Si/output/$elname/""/home/runner/work/PotentialLearning.jl/PotentialLearning.jl/docs/..//examples/DPP-ACE-Si/output/Si/"Load utility functions.
include("$base_path/examples/DPP-ACE-Si/subsampling_utils.jl");Load datasets
Load all atomistic datasets: atomistic configurations (atom positions, geometry, etc.) + DFT data (energies, forces, etc.)
file_arr = readext(inpath, "xyz")
nfile = length(file_arr)
confs_arr = [load_data(inpath*file, ExtXYZ(u"eV", u"Å")) for file in file_arr]
confs = concat_dataset(confs_arr)DataSet{num_configs = 201}
Configuration{S, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}, Forces, Energy}
Configuration{S, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}, Forces, Energy}
⋮
Configuration{S, AtomsBase.FlexibleSystem{3, AtomsBase.Atom, Unitful.Quantity{Float64, 𝐋, Unitful.FreeUnits{(Å,), 𝐋, nothing}}}, Forces, Energy}Id of configurations per file.
n = 0
confs_id = Vector{Vector{Int64}}(undef, nfile)
for k = 1:nfile
global n
confs_id[k] = (n+1):(n+length(confs_arr[k]))
n += length(confs_arr[k])
endSubsample dataset
Create ACE basis.
nbody = 4
deg = 5
ace = ACE(species = elspec, # species
body_order = nbody, # n-body
polynomial_degree = deg, # degree of polynomials
wL = 1.0, # Defaults, See ACE.jl documentation
csp = 1.0, # Defaults, See ACE.jl documentation
r0 = 1.0, # minimum distance between atoms
rcutoff = 10.0);Compute and save ACE descriptors for energies and forces.
println("Computing local descriptors")
e_descr = compute_local_descriptors(confs, ace; pbar=false)
f_descr = compute_force_descriptors(confs, ace; pbar=false)
JLD.save(outpath*"$(elname)_energy_descriptors.jld", "e_descr", e_descr)
JLD.save(outpath*"$(elname)_force_descriptors.jld", "f_descr", f_descr)Computing local descriptorsUpdate training dataset by adding energy and force descriptors.
ds = DataSet(confs .+ e_descr .+ f_descr)
ndata = length(ds);Post-process results
Compute cross validation error from training dataset.
batch_size = [80, 40]
sel_ind = Dict{Int64, Vector}()
cond_num = Dict{Int64, Vector}()
for bs in batch_size
println("=============== Starting batch size $bs ===============")
sel_ind[bs], cond_num[bs] = cross_validation_training(ds; ndiv=5, dpp_batch=bs)
println("condnum: $(cond_num[bs])")
end
JLD.save(outpath*"$(elname)_ACE-$(nbody)-$(deg)_DPP_indices_and_condnum.jld",
"ind", sel_ind,
"condnum", cond_num)=============== Starting batch size 80 ===============
batch 1
batch 2
batch 3
batch 4
batch 5
condnum: [1.8250171327596577e10, 1.8834222892961536e10, 1.8371809106971245e10, 1.8999110129193974e10, 1.930632369850247e10]
=============== Starting batch size 40 ===============
batch 1
batch 2
batch 3
batch 4
batch 5
condnum: [2.0418578156265305e10, 1.9179722454348164e10, 2.015453871188599e10, 1.852346593259402e10, 1.848211189883588e10]This page was generated using Literate.jl.