From 2825da848bdd3c088af561d892322a6deb86bf41 Mon Sep 17 00:00:00 2001 From: mtsch Date: Sat, 11 Nov 2023 02:14:52 +1300 Subject: [PATCH] PDVec (#183) * Move `fciqmc_step!` into Intefaces (?) * Rework initiator dvec * Move operator-vector function to DictVectors * Add PDVecs and friends * some fixes and reworking * Fix MPI communication * Fix mpi_recv_any signature * Add tests * Fix bugs with dot and initiators, add MPI tests * Remove annihilation tracking workaround * Benchmark PDVec instead * Some cleaning in Hamiltonains, make PDVec the default. * Fix error caused by moving things around * Doc updates * Move hacks to pdworkingmemory * temp * Fix up after rebase * Change which project mpi_runtests is run from * instantiate MPI deps before test * Another attempt to fix MPI CI * Attempt to fix MPI CI again * using -> import * Add debugging info to job * Fix benchmark, do MPI first * MPI tests without MPI * Fix project paths * Fix install of MPI deps * Fix deps * Fix Pkg stuff * Fix deprecations in RMPI * Restore ordering, fixing errors. * Fix three-argument dotting issues * Benchmark with threads=auto * Attempt to add coverage for MPI * Fix more depwarns in RMPI * Fix even more depwarns; fix benchmark * Fix depwarn * Fix MPI probing * Split testing into phases * Fix yml * Re-rework actions * Change MPI coverage tracking * Remove dead code * Removed too much dead code * Remove foreach calls that modify LHS. * Reordering, more docs * Fix the doctest * Fix tests * Fix bug in AllOverlaps * Change example to regular code listing * Minor fix in tests, bugfix * skip some tests * Improve coverage and fix bugs * Remove folds from testset * Add distributed dot tests, remove dead code, reenable threads in testing * fix yml * Fix randomness in dot tests * Docstring fixup * Alleviate type instability * Make example numerically stable * Refix type instability * Fix doctests * enable colors, don't test PDVec docstring * Update src/DictVectors/communicators.jl Co-authored-by: christofbradly * Update src/strategies_and_params/replicastrategy.jl Co-authored-by: christofbradly * Change default vector choice, include it in docs. * Rename functions * Fix up docs * Keep executor in similar, fix `length` for working memory * Use PDVec in MPI example * Add an extension for using Rimu with KrylovKit (#187) * Add an extension for using Rimu with KrylovKit * Add the actual extension * Loosen the version requirement * Add KrylovKit to [extras] * Fix broken dot * Use starting address that does not produce a vector that is orthogonal to the groundstate * Remove method that provides starting vector. * apply bitstring fix * `PDVec` with tuples (#188) * Replace vectors with tuples, remove executors * add fix * doc tweak * Remove the option of setting the number of segments * Fix bugs and tests * single-segment optimization * fix depwarn * remove rogue num_segments * improve test coverage * fix mpi test * fix some dots * Merge branch 'feature/pdvec' of github.com:joachimbrand/Rimu.jl into feature/pdvec * Fix equality for `DVec` * Fix constructor and tests * Make bangbang functions work properly * checkpoint * checkpoint * set version for release * Fixes after merge * Update Project.toml version * Fix KrylovKit extension * fix MPI tests * Fix mul with deterministic vectors * `add!` -> `dict_add!` * docstring for `dict_add!`, remove `propagators.jl` * Remove mentions of `OperatorMulPropagator` * attempt at fixing type instability * Introduce FrozenPDVec for faster projected energy * Small fixes * Fix diagonal operator spawning * fix some type instabilities * Rework benchmarks - trial run * Update Project.toml * Update tune file * re-update benchmarks * Remove type stability enforcing * Add see also to all DVec types * CI setup * Remove unused parameter * Fix compression bug * Fix the benchmark setup * Fix docs issues * Fix compression issues * Doc improvements * Fix bug in tests, docstring * Docstring fix * Remove unused dependency, clean up example script * Review comments * Add MPI docs, review comments * Apply suggestions from code review Co-authored-by: Joachim Brand Co-authored-by: christofbradly * Update MPI doc * add `PDVecKeys/Vals/Pairs` docstrings * filter(!), out-of-place map, fix potential bug in map! * Fix doc links * Fix new code * Another bugfix * One more * Fix bug in filter! * Fix wrong method call * Remove type from `f` in `filter` * Fix method errors * Update docs/src/mpi.md Co-authored-by: christofbradly * bump version * bump version --------- Co-authored-by: christofbradly Co-authored-by: Joachim Brand Co-authored-by: Joachim Brand --- Project.toml | 10 +- benchmark/benchmarks.jl | 18 +- docs/make.jl | 1 + docs/src/dictvectors.md | 19 + docs/src/index.md | 11 +- docs/src/mpi.md | 92 ++ ext/KrylovKitExt.jl | 49 ++ scripts/BHM-example-mpi.jl | 22 +- src/DictVectors/DictVectors.jl | 12 +- src/DictVectors/abstractdvec.jl | 34 +- src/DictVectors/communicators.jl | 313 +++++++ src/DictVectors/dvec.jl | 2 +- src/DictVectors/initiatordvec.jl | 9 +- src/DictVectors/initiators.jl | 19 +- src/DictVectors/pdvec.jl | 872 +++++++++++++++++++ src/DictVectors/pdworkingmemory.jl | 270 ++++++ src/DictVectors/projectors.jl | 5 +- src/Interfaces/Interfaces.jl | 1 + src/Interfaces/dictvectors.jl | 10 +- src/Interfaces/stochasticstyles.jl | 19 +- src/RMPI/RMPI.jl | 3 +- src/RMPI/mpidata.jl | 2 +- src/StochasticStyles/compression.jl | 21 +- src/StochasticStyles/spawning.jl | 2 +- src/lomc.jl | 17 +- src/strategies_and_params/replicastrategy.jl | 48 +- test/DictVectors.jl | 184 +++- test/Interfaces.jl | 3 + test/KrylovKit.jl | 27 + test/StochasticStyles.jl | 27 +- test/lomc.jl | 134 +-- test/mpi_runtests.jl | 193 +++- 32 files changed, 2243 insertions(+), 206 deletions(-) create mode 100644 docs/src/mpi.md create mode 100644 ext/KrylovKitExt.jl create mode 100644 src/DictVectors/communicators.jl create mode 100644 src/DictVectors/pdvec.jl create mode 100644 src/DictVectors/pdworkingmemory.jl diff --git a/Project.toml b/Project.toml index 8c5ef8275..864d99f01 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Rimu" uuid = "c53c40cc-bd84-11e9-2cf4-a9fde2b9386e" authors = ["Joachim Brand "] -version = "0.9.1-dev" +version = "0.10.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" @@ -39,6 +39,12 @@ TerminalLoggers = "5d786b92-1e48-4d6f-9151-6b4477ca9bed" TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" +[weakdeps] +KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" + +[extensions] +KrylovKitExt = "KrylovKit" + [compat] Arrow = "1.5, 2" BSON = "0.3" @@ -50,6 +56,7 @@ Distributions = "0.25" FFTW = "1" Folds = "0.2" HypergeometricFunctions = "0.3" +KrylovKit = "0.6" MPI = "0.20" MacroTools = "0.5" Measurements = "2" @@ -72,6 +79,7 @@ VectorInterface = "0.2, 0.3, 0.4" julia = "1.7" [extras] +KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 0ec8ab84e..24685aec5 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -9,16 +9,16 @@ const SUITE = @benchmarkset "Rimu" begin M = 16 addr = FermiFS2C(M, 1 => 1, 2 => 1, 1 => -1, 2 => -1) ham = HubbardRealSpace(addr; geometry=PeriodicBoundaries(4, 4)) - dv = DVec(addr => 1.0) - eigsolve(ham, dv, 1, :SR; issymmetric=true, tol=1e-9) + dv = PDVec(addr => 1.0) + eigsolve(ham, dv, 1, :SR; tol=1e-9) end seconds=30 @case "Bose-Hubbard in momentum space" begin M = N = 10 addr = BoseFS(M, M ÷ 2 => N) ham = HubbardMom1D(addr; u=6.0) - dv = DVec(addr => 1.0) - eigsolve(ham, dv, 1, :SR; issymmetric=true, tol=1e-9) + dv = PDVec(addr => 1.0) + eigsolve(ham, dv, 1, :SR; tol=1e-9) end seconds=40 end @@ -28,7 +28,7 @@ const SUITE = @benchmarkset "Rimu" begin M = 20 addr = BoseFS(M, M÷2 => 10) ham = HubbardMom1D(addr; u=6.0) - dv1 = DVec(addr => 1.0) + dv1 = PDVec(addr => 1.0) dv2 = zerovector(dv1) mul!(dv2, ham, dv1) mul!(dv1, ham, dv2) @@ -42,7 +42,7 @@ const SUITE = @benchmarkset "Rimu" begin M = 30 addr = FermiFS2C(M, M÷2-1 => 1, M => 1, M÷2 => -1, M÷2+1 => -1) ham = Transcorrelated1D(addr) - dv1 = DVec(addr => 1.0) + dv1 = PDVec(addr => 1.0) dv2 = zerovector(dv1) mul!(dv2, ham, dv1) mul!(dv1, ham, dv2) @@ -56,7 +56,7 @@ const SUITE = @benchmarkset "Rimu" begin @case "(10, 20) Mom space with projected energy and initiator" begin addr = BoseFS(20, 10 => 10) ham = HubbardMom1D(addr, u=1.0) - dv = InitiatorDVec(addr => 1.0; style=IsDynamicSemistochastic()) + dv = PDVec(addr => 1.0; style=IsDynamicSemistochastic(), initiator=true) post_step = ProjectedEnergy(ham, dv) s_strat = DoubleLogUpdate(targetwalkers=40_000) @@ -66,7 +66,7 @@ const SUITE = @benchmarkset "Rimu" begin @case "(4+1, 11) 2C Mom space with G2Correlators" begin addr = BoseFS2C(ntuple(i -> ifelse(i == 5, 4, 0), 11), ntuple(==(5), 11)) ham = BoseHubbardMom1D2C(addr, v=0.1) - dv = DVec(addr => 1.0f0; style=IsDynamicSemistochastic{Float32}()) + dv = PDVec(addr => 1.0f0; style=IsDynamicSemistochastic{Float32}()) s_strat = DoubleLogUpdate(targetwalkers=10_000) replica = AllOverlaps(2, ntuple(i -> G2Correlator(i - 1), 7)) @@ -76,7 +76,7 @@ const SUITE = @benchmarkset "Rimu" begin @case "(50, 50) Real space" begin addr = near_uniform(BoseFS{50,50}) ham = HubbardReal1D(addr, u=6.0) - dv = DVec(addr => 1.0; style=IsDynamicSemistochastic()) + dv = PDVec(addr => 1.0; style=IsDynamicSemistochastic()) s_strat = DoubleLogUpdate(targetwalkers=50_000) lomc!(ham, dv; s_strat, dτ=1e-4, laststep=1000) diff --git a/docs/make.jl b/docs/make.jl index b21d0b4d8..6613cbe79 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -50,6 +50,7 @@ makedocs(; "Examples" => EXAMPLES_PAIRS[sortperm(EXAMPLES_NUMS)], "User documentation" => [ "StatsTools" => "statstools.md", + "Using MPI" => "mpi.md", ], "Developer documentation" => [ "Interfaces" => "interfaces.md", diff --git a/docs/src/dictvectors.md b/docs/src/dictvectors.md index 6a27ac252..4618b3318 100644 --- a/docs/src/dictvectors.md +++ b/docs/src/dictvectors.md @@ -10,6 +10,7 @@ AbstractDVec ```@docs DVec InitiatorDVec +PDVec ``` ## Interface functions @@ -52,6 +53,8 @@ NormProjector Norm2Projector UniformProjector Norm1ProjectorPPop +Rimu.DictVectors.FrozenDVec +Rimu.DictVectors.FrozenPDVec ``` ## Initiator rules @@ -70,6 +73,22 @@ Rimu.DictVectors.NonInitiator Rimu.DictVectors.NonInitiatorValue ``` +## `PDVec` internals + +### Working memory + +```@autodocs +Modules = [DictVectors] +Pages = ["pdworkingmemory.jl"] +``` + +### Communicators + +```@autodocs +Modules = [DictVectors] +Pages = ["communicators.jl"] +``` + ## Index ```@index Pages = ["dictvectors.md"] diff --git a/docs/src/index.md b/docs/src/index.md index 262823787..359f2892e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -25,7 +25,7 @@ The code supports parallelisation with MPI (harnessing [`MPI.jl`](https://github **Concept:** Joachim Brand and Elke Pahl. -**Contributors:** Joachim Brand, Elke Pahl, Mingrui Yang, Matija Cufar, Chris Bradly. +**Contributors:** Joachim Brand, Elke Pahl, Mingrui Yang, Matija Čufar, Chris Bradly. Discussions, help, and additional contributions are acknowledged by Ali Alavi, Didier Adrien, Chris Scott (NeSI), Alexander Pletzer (NeSI). @@ -110,10 +110,11 @@ on the same hardware makes use of 4 cores and the main part completes in 1.04 seconds, a speedup factor of 2.6. This seems reasonable, given that extra work needs to be done for communicating between different processes. -Using MPI parallelism with `Rimu` is easy. Enabling MPI for use in [`lomc!()`](@ref) -is done by wrapping the primary data structures as [`MPIData`](@ref Main.Rimu.RMPI.MPIData). As a consequence, data will -be stored in a distributed fashion among the MPI ranks and only communicated between ranks when -necessary. The relevant functionality is provided by the module `Rimu.RMPI`. +Using MPI parallelism with `Rimu` is easy. Enabling MPI for use in [`lomc!()`](@ref) is +enabled automatically if [`PDVec`](@ref) is used to store a vector. In that case, data will +be stored in a distributed fashion among the MPI ranks and only communicated between ranks +when necessary. Additional MPI-related functionality is provided by the module [`RMPI`](@ref +Rimu.RMPI). ## References The code implements the FCIQMC algorithm originally described in diff --git a/docs/src/mpi.md b/docs/src/mpi.md new file mode 100644 index 000000000..734a984aa --- /dev/null +++ b/docs/src/mpi.md @@ -0,0 +1,92 @@ +# Working with MPI + +If you are using [`PDVec`](@ref Main.DictVectors.PDVec)s to store your vectors, working with +MPI should be fairly straightforward. Generally, [`PDVec`](@ref Main.DictVectors.PDVec) will +work with MPI automatically, as long as MPI is set up correctly and a few common pitfalls +are avoided. + +Rimu includes an unexported module [`RMPI`](@ref Main.Rimu.RMPI), which must be imported to access +additional MPI-related functionality. + +## Configuring MPI + +When running on a cluster, ensure that MPI.jl is using the system binary. See [the MPI.jl +documentation](https://juliaparallel.org/MPI.jl/latest/configuration/) for more information. + +It is always a good idea to start your script with a quick test that ensures the MPI is set up correctly. One way to do this is to open with + +```julia +using Rimu.RMPI +mpi_allprintln("hello") +``` + +which should print something like + +``` +[ rank 0: hello +[ rank 1: hello +[ rank 2: hello +[ rank 3: hello +``` + +If it prints `rank 0` several times, the code will run, but ranks will not communicate. + +## Using Slurm + +When using [`PDVec`](@ref Main.DictVectors.PDVec), the recommended setup is to use threads to parallelise the +computation within a node, and to only use MPI for inter-node communication. In a slurm +script, this is done as follows: + +```bash +... +#SBATCH --ntasks-per-node=1 +#SBATCH --nodes=4 # replace 4 with the desired number of nodes +#SBATCH --cpus-per-task=28 # replace 28 with the number of cores available in a node +#SBATCH --hint=nomultithread # don't use hyperthreading +... + +srun julia --project -tauto script.jl +``` + +On some clusters, additional settings must be used with `srun`, for example the CTCP cluster +requires the following. + +```bash +srun mpi=pmi2 julia --project -tauto script.jl +``` + +## Common pitfalls with reducing functions + +### Using `@mpi_root` + +Take care to not use reducing functions (such as `length`, `sum`, `norm`, ...) inside +[`@mpi_root`](@ref Main.Rimu.RMPI.@mpi_root) blocks. Doing so will only initiate the +distributed reduction on one rank only, which will cause the code to go out of sync and +freeze. As an example, to report the current length of a vector, calculate the length before +the [`@mpi_root`](@ref Main.Rimu.RMPI.@mpi_root) block: + +```julia +len = length(pdvec) +@mpi_root println("vector length is $len") +``` + +## Threaded operations and reductions + +When using functions that take anonymous functions, such as `map(!)`, `sum`, or `mapreduce`, it is important that the anonymous functions passed to them do not perform any MPI-reducing operations (`length`, `norm`, `sum`, etc.). These anonymous functions are executed on multiple threads and initiating MPI communication from multiple threads may cause issues. + +As an example, suppose we want to scale a vector by its length by using `map!`. The correct way to write this code is as + +```julia +len = length(pdvec) +map!(values(pdvec)) do x + x / len +end +``` + +Similar to the previous example, `len` is calculated first and not within the body of +`map!`. In this specific case, an even better option is to use the `scale!` function from +[VectorInterface.jl](https://github.com/Jutho/VectorInterface.jl): + +```julia +scale!(pdvec, 1 / length(pdvec)) +``` diff --git a/ext/KrylovKitExt.jl b/ext/KrylovKitExt.jl new file mode 100644 index 000000000..4a3f28fad --- /dev/null +++ b/ext/KrylovKitExt.jl @@ -0,0 +1,49 @@ +module KrylovKitExt + +using Rimu +using KrylovKit +using LinearAlgebra + +const U = Union{Symbol,EigSorter} + +""" + OperatorMultiplier + +A struct that holds the working memory for repeatedly multiplying vectors with an operator. +""" +struct OperatorMultiplier{H,W<:PDWorkingMemory} + hamiltonian::H + working_memory::W +end +function OperatorMultiplier(hamiltonian, vector::PDVec) + return OperatorMultiplier(hamiltonian, PDWorkingMemory(vector; style=IsDeterministic())) +end + +function (o::OperatorMultiplier)(v) + result = zerovector(v) + return mul!(result, o.hamiltonian, v, o.working_memory) +end + +function KrylovKit.eigsolve( + ham::AbstractHamiltonian, vec::PDVec, howmany::Int=1, which::U=:LR; kwargs... +) + # Change the type of `vec` to float, if needed. + v = scale!!(vec, 1.0) + prop = OperatorMultiplier(ham, v) + return eigsolve( + prop, v, howmany, which; + ishermitian=ishermitian(ham), issymmetric=issymmetric(ham), kwargs... + ) +end + +# This method only exists to detect whether a Hamiltonian is Hermitian or not. +function KrylovKit.eigsolve( + ham::AbstractHamiltonian, vec::AbstractDVec, howmany::Int=1, which::U=:LR; kwargs... +) + return @invoke eigsolve( + ham::Any, vec::Any, howmany, which; + ishermitian=ishermitian(ham), issymmetric=issymmetric(ham), kwargs... + ) +end + +end diff --git a/scripts/BHM-example-mpi.jl b/scripts/BHM-example-mpi.jl index b30c8b9d1..2ac788a95 100644 --- a/scripts/BHM-example-mpi.jl +++ b/scripts/BHM-example-mpi.jl @@ -1,8 +1,8 @@ # # Example 2: Rimu with MPI -# In this example, we will demonstrate using Rimu with MPI. +# In this example, we will demonstrate using Rimu with MPI. -# A runnable script for this example is located +# A runnable script for this example is located # [here](https://github.com/joachimbrand/Rimu.jl/blob/develop/scripts/BHM-example-mpi.jl). # Run it with `mpirun julia BHM-example-mpi.jl`. @@ -16,17 +16,17 @@ using Rimu.RMPI # First, we define the Hamiltonian. We want to start from an address with zero momentum. -address = BoseFS((0, 0, 0, 0, 10, 0, 0, 0, 0, 0)) +address = BoseFS(10, 5 => 10) # We will set the interaction strength `u` to `6`. The hopping strength `t` defaults to `1.0`. hamiltonian = HubbardMom1D(address; u=6.0) -# Next, we construct the starting vector. Wrap a vector in `MPIData` to make it MPI -# distributed. We set the vector's style to [`IsDynamicSemistochastic`](@ref), which -# improves statistics and reduces the sign problem. +# Next, we construct the starting vector. We use a `PDVec`, which is automatically MPI +# distributed if MPI is available. We set the vector's stochastic style to +# [`IsDynamicSemistochastic`](@ref), which improves statistics and reduces the sign problem. -dvec = MPIData(DVec(address => 1.0; style=IsDynamicSemistochastic())) +dvec = PDVec(address => 1.0; style=IsDynamicSemistochastic()) # We set a reporting strategy. We will use [`ReportToFile`](@ref), which writes the reports # directly to a file. This is useful for reducing memory use in long-running jobs, as we @@ -43,7 +43,7 @@ s_strat = DoubleLogUpdate(targetwalkers=10_000) post_step = ProjectedEnergy(hamiltonian, dvec) # The `@mpi_root` macro performs an action on the root rank only, which is useful for printing. - + @mpi_root println("Running FCIQMC with ", mpi_size(), " rank(s).") # Finally, we can run the computation. @@ -54,6 +54,6 @@ using Test #hide @test isfile("result.arrow") #hide dfr = load_df("result.arrow") #hide qmcdata = last(dfr, 5000) #hide -(qmcShift,qmcShiftErr) = mean_and_se(qmcdata.shift) #hide -@test qmcShift ≈ -6.5 atol=0.5 #hide -rm("result.arrow", force=true) #hide \ No newline at end of file +qmc_shift, _ = mean_and_se(qmcdata.shift) #hide +@test qmc_shift ≈ -6.5 atol=0.5 #hide +rm("result.arrow", force=true) #hide diff --git a/src/DictVectors/DictVectors.jl b/src/DictVectors/DictVectors.jl index a11f3e1da..8c103fc51 100644 --- a/src/DictVectors/DictVectors.jl +++ b/src/DictVectors/DictVectors.jl @@ -2,12 +2,14 @@ Module that provides concrete implementations of the [`AbstractDVec`](@ref) interface. - [`DVec`](@ref): basic [`AbstractDVec`](@ref) +- [`PDVec`](@ref): parallel [`AbstractDVec`](@ref) with MPI and initiator support - [`InitiatorDVec`](@ref): allows storing information about initiator status See [`Interfaces`](@ref). """ module DictVectors +using Folds using LinearAlgebra using Random using VectorInterface @@ -18,12 +20,12 @@ using ..Interfaces using ..Hamiltonians using ..StochasticStyles -# TODO: ordering import ..Interfaces: deposit!, storage, StochasticStyle, default_style, freeze, localpart, working_memory, sort_into_targets! -export deposit!, storage, walkernumber, propagate!, dot_from_right -export DVec, InitiatorDVec +export deposit!, storage, walkernumber, dot_from_right +export DVec, InitiatorDVec, PDVec, PDWorkingMemory + export AbstractProjector, NormProjector, Norm2Projector, UniformProjector, Norm1ProjectorPPop @@ -57,8 +59,12 @@ include("abstractdvec.jl") include("projectors.jl") include("initiators.jl") +include("communicators.jl") include("dvec.jl") include("initiatordvec.jl") +include("pdvec.jl") +include("pdworkingmemory.jl") + end # module diff --git a/src/DictVectors/abstractdvec.jl b/src/DictVectors/abstractdvec.jl index edc016832..4fbb9dc15 100644 --- a/src/DictVectors/abstractdvec.jl +++ b/src/DictVectors/abstractdvec.jl @@ -38,7 +38,15 @@ Base.ndims(::AbstractDVec) = 1 Base.zero(v::AbstractDVec) = empty(v) VectorInterface.zerovector(v::AbstractDVec, ::Type{T}) where {T<:Number} = similar(v, T) VectorInterface.zerovector!(v::AbstractDVec) = empty!(v) -VectorInterface.zerovector!!(v::AbstractDVec) = zerovector!(v) +VectorInterface.zerovector!!(v::AbstractDVec{<:Any,T}) where {T<:Number} = zerovector!!(v, T) + +function VectorInterface.zerovector!!(v::AbstractDVec, ::Type{T}) where {T<:Number} + if scalartype(v) ≡ T + return zerovector!(v) + else + return zerovector(v, T) + end +end function Base.similar(dvec::AbstractDVec, args...; kwargs...) return sizehint!(empty(dvec, args...; kwargs...), length(dvec)) @@ -129,7 +137,14 @@ function VectorInterface.scale(v::AbstractDVec, α::Number) scale!(result, v, α) return result end -VectorInterface.scale!!(v::AbstractDVec, α::Number) = scale!(v, α) +function VectorInterface.scale!!(v::AbstractDVec, α::T) where {T<:Number} + U = scalartype(v) + if promote_type(U, T) == U + return scale!(v, α) + else + return scale(v, α) + end +end LinearAlgebra.mul!(w::AbstractDVec, v::AbstractDVec, α) = scale!(w, v, α) LinearAlgebra.lmul!(α, v::AbstractDVec) = scale!(v, α) @@ -141,9 +156,7 @@ Base.:*(x::AbstractDVec, α) = α * x @inline function VectorInterface.add!( w::AbstractDVec{K}, v::AbstractDVec{K}, α::Number=true, β::Number=true ) where {K} - if β ≠ one(β) - scale!(w, β) - end + scale!(w, β) for (key, val) in pairs(v) w[key] += α * val end @@ -159,9 +172,14 @@ function VectorInterface.add( end function VectorInterface.add!!( - v::AbstractDVec, w::AbstractDVec, α::Number=true, β::Number=true -) - add!(v, w, α, β) + v::AbstractDVec{K}, w::AbstractDVec{K}, α::Number=true, β::Number=true +) where {K} + T = promote_type(scalartype(v), scalartype(w), typeof(α), typeof(β)) + if T ≡ scalartype(v) + return add!(v, w, α, β) + else + return add(v, w, α, β) + end end Base.:+(v::AbstractDVec, w::AbstractDVec) = add(v, w) diff --git a/src/DictVectors/communicators.jl b/src/DictVectors/communicators.jl new file mode 100644 index 000000000..62273dccc --- /dev/null +++ b/src/DictVectors/communicators.jl @@ -0,0 +1,313 @@ +struct CommunicatorError <: Exception + msg::String +end +CommunicatorError(args...) = CommunicatorError(string(args...)) + +function Base.showerror(io::IO, ex::CommunicatorError) + print(io, "CommunicatorError: ", ex.msg) +end + +""" + abstract type Communicator + +Communicators are used to handle MPI communication when using [`PDVec`](@ref)s. Currently, +two implementations are provided, [`NotDistributed`](@ref), and [`PointToPoint`](@ref). The +communicator is picked automatically according to the number of MPI ranks available. + +When implementing a communicator, use [`local_segments`](@ref) and +[`remote_segments`](@ref). + +# Interface + +* [`synchronize_remote!`](@ref) +* [`mpi_rank`](@ref) +* [`mpi_size`](@ref) +* [`mpi_comm`](@ref) + +# Optional interface + +* [`is_distributed`](@ref): defaults to returning `true`. +* [`merge_remote_reductions`](@ref): defaults to using `MPI.Allreduce`. +* [`total_num_segments`](@ref): defaults to `n * mpi_size`. +* [`target_segment`](@ref): defaults to selecting using + [fastrange](https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/) to pick the segment. + +""" +abstract type Communicator end + +""" + is_distributed(::Communicator) + +Return `true` if communicator operates over MPI. +""" +is_distributed(::Communicator) = true + +""" + merge_remote_reductions(c::Communicator, op, x) + +Merge the results of reductions over MPI. By default, it uses `MPI.Allreduce`. +""" +merge_remote_reductions(c::Communicator, op, x) = MPI.Allreduce(x, op, mpi_comm(c)) + +""" + total_num_segments(c::Communicator, n) -> Int + +Return the total number of segments, including the remote ones, where `n` is number of +local segments. +""" +total_num_segments(c::Communicator, n) = n * mpi_size(c) + +""" + target_segment(c::Communicator, k, num_segments) -> target, is_local + +This function is used to determine where in the [`PDVec`](@ref) a key should be stored. + +If the key is local (stored on the same MPI rank), return its segment index and `true`. If the key is non-local, return any value and `false`. +""" +function target_segment(c::Communicator, k, num_segments) + total_segments = num_segments * mpi_size(c) + result = fastrange_hash(k, total_segments) - mpi_rank(c) * num_segments + return result, 1 ≤ result ≤ num_segments +end + +""" + mpi_rank(::Communicator) -> Int + +Return the MPI rank of the [`Communicator`](@ref). +""" +mpi_rank + +""" + mpi_size(::Communicator) -> Int + +Return the total number of MPI ranks in the [`Communicator`](@ref). +""" +mpi_size + +""" + mpi_comm(::Communicator) -> MPI.Comm + +Return the `MPI.Comm` that the [`Communicator`](@ref) operates on. +""" +mpi_comm + +""" + copy_to_local!([::Communicator,] w::PDWorkingMemory, t::PDVec) -> PDVec + +Copy pairs in `t` from all ranks and return them as (possibly) new [`PDVec`](@ref), possibly +using the [`PDWorkingMemory`](@ref) as temporary storage. +""" +copy_to_local! + +""" + synchronize_remote!([::Communicator,] ::PDWorkingMemory) + +Copy pairs from remote ranks to the local part of the [`PDWorkingMemory`](@ref). +""" +synchronize_remote! + + +""" + NotDistributed <: Communicator + +This [`Communicator`](@ref) is used when MPI is not available. +""" +struct NotDistributed <: Communicator end + +is_distributed(::NotDistributed) = false + +mpi_rank(::NotDistributed) = 0 +mpi_size(::NotDistributed) = 1 + +synchronize_remote!(::NotDistributed, w) = w + +function copy_to_local!(::NotDistributed, w, t) + return t +end + +merge_remote_reductions(::NotDistributed, _, x) = x + +total_num_segments(::NotDistributed, n) = n + +target_segment(::NotDistributed, k, num_segments) = fastrange_hash(k, num_segments), true + +""" + LocalPart <: Communicator + +When [`localpart`](@ref) is used, the vector's [`Communicator`](@ref) is replaced with this. +This allows iteration and local reductions. +""" +struct LocalPart{C} <: Communicator + communicator::C +end + +is_distributed(::LocalPart) = false +mpi_rank(lp::LocalPart) = mpi_rank(lp.communicator) +mpi_size(lp::LocalPart) = mpi_size(lp.communicator) + +function synchronize_remote!(::LocalPart, w) + throw(CommunicatorError("attemted to synchronize localpart")) +end + +merge_remote_reductions(::LocalPart, _, x) = x + +function target_segment(c::LocalPart, k, num_segments) + total_segments = num_segments * mpi_size(c) + result = fastrange_hash(k, total_segments) - mpi_rank(c) * num_segments + if 1 ≤ result ≤ num_segments + return result, true + else + throw(CommunicatorError("attempted to access non-local key $k")) + end +end + +""" + SegmentedBuffer + +Multiple vectors stored in a simple buffer with MPI communication. + +See [`replace_collections!`](@ref), [`mpi_send`](@ref), [`mpi_recv_any!`](@ref). +""" +struct SegmentedBuffer{T} <: AbstractVector{SubArray{T,1,Vector{T},Tuple{UnitRange{Int64}},true}} + offsets::Vector{Int} + buffer::Vector{T} +end +function SegmentedBuffer{T}() where {T} + return SegmentedBuffer(Int[], T[]) +end + +Base.size(buf::SegmentedBuffer) = size(buf.offsets) + +function Base.getindex(buf::SegmentedBuffer, i) + start_index = get(buf.offsets, i-1, 0) + 1 + end_index = buf.offsets[i] + return view(buf.buffer, start_index:end_index) +end + +""" + replace_collections!(buf::SegmentedBuffer, iters) + +Insert collections in `iters` into buffers. +""" +function replace_collections!(buf::SegmentedBuffer, iters) + resize!(buf.offsets, length(iters)) + resize!(buf.buffer, sum(length, iters)) + + # Compute offsets + curr = 0 + for (i, col) in enumerate(iters) + curr += length(col) + buf.offsets[i] = curr + end + + # Copy over the data + Folds.foreach(buf, iters) do dst, src + for (i, v) in enumerate(src) + dst[i] = v + end + end + return buf +end + +""" + mpi_send(buf::SegmentedBuffer, dest, comm) + +Send the buffers to `dest`. +""" +function mpi_send(buf::SegmentedBuffer, dest, comm) + MPI.Isend(buf.offsets, comm; dest, tag=0) + MPI.Isend(buf.buffer, comm; dest, tag=1) + return buf +end + +""" + mpi_recv_any!(buf::SegmentedBuffer, comm) -> Int + +Find a source that is ready to send a buffer and receive from it. Return the rank ID of the +sender. +""" +function mpi_recv_any!(buf::SegmentedBuffer, comm) + status = offset_status = MPI.Probe(MPI.ANY_SOURCE, 0, comm) + source = status.source + resize!(buf.offsets, MPI.Get_count(offset_status, Int)) + MPI.Recv!(buf.offsets, comm; source, tag=0) + + resize!(buf.buffer, last(buf.offsets)) + MPI.Recv!(buf.buffer, comm; source, tag=1) + return source +end + +""" + PointToPoint <: Communicator + +[`Communicator`](@ref) that uses circular communication using `MPI.Isend` and `MPI.Recv!`. +""" +struct PointToPoint{K,V} <: Communicator + send_buffers::Vector{SegmentedBuffer{Pair{K,V}}} + recv_buffer::SegmentedBuffer{Pair{K,V}} + mpi_comm::MPI.Comm + mpi_rank::Int + mpi_size::Int +end +function PointToPoint{K,V}( + ; + mpi_comm=MPI.COMM_WORLD, + mpi_rank=MPI.Comm_rank(mpi_comm), + mpi_size=MPI.Comm_size(mpi_comm), +) where {K,V} + return PointToPoint( + [SegmentedBuffer{Pair{K,V}}() for _ in 1:mpi_size-1], + SegmentedBuffer{Pair{K,V}}(), + mpi_comm, + mpi_rank, + mpi_size, + ) +end + +mpi_rank(ptp::PointToPoint) = ptp.mpi_rank +mpi_size(ptp::PointToPoint) = ptp.mpi_size +mpi_comm(ptp::PointToPoint) = ptp.mpi_comm + +function synchronize_remote!(ptp::PointToPoint, w) + # Asynchronously send all buffers. + for offset in 1:ptp.mpi_size - 1 + dst_rank = mod(ptp.mpi_rank + offset, ptp.mpi_size) + send_buffer = ptp.send_buffers[offset] + replace_collections!(send_buffer, remote_segments(w, dst_rank)) + mpi_send(send_buffer, dst_rank, ptp.mpi_comm) + end + + # Receive and insert from each rank. The order is first come first serve. + for _ in 1:ptp.mpi_size - 1 + mpi_recv_any!(ptp.recv_buffer, ptp.mpi_comm) + Folds.foreach(dict_add!, local_segments(w), ptp.recv_buffer) + end +end + +function copy_to_local!(ptp::PointToPoint, w, t) + # Same data sent to all ranks, so we can reuse the buffer. + send_buffer = first(ptp.send_buffers) + replace_collections!(send_buffer, t.segments) + + for offset in 1:ptp.mpi_size - 1 + dst_rank = mod(ptp.mpi_rank + offset, ptp.mpi_size) + mpi_send(send_buffer, dst_rank, ptp.mpi_comm) + end + + # We need all the data, including local in w. + Folds.foreach(copy!, local_segments(w), t.segments) + + # Receive and insert from each rank. The order is first come first serve. + for _ in 1:ptp.mpi_size - 1 + src_rank = mpi_recv_any!(ptp.recv_buffer, ptp.mpi_comm) + Folds.foreach(remote_segments(w, src_rank), ptp.recv_buffer) do dst, src + empty!(dst) + for (k, v) in src + dst[k] = valtype(dst)(v) + end + end + end + + # Pack the segments into a PDVec and return it. + return main_column(w) +end diff --git a/src/DictVectors/dvec.jl b/src/DictVectors/dvec.jl index 1b6965840..df59bc7f1 100644 --- a/src/DictVectors/dvec.jl +++ b/src/DictVectors/dvec.jl @@ -7,7 +7,7 @@ supports various linear algebra operations such as `norm` and `dot`. It has a [`StochasticStyle`](@ref) that is used to select an appropriate spawning strategy in the FCIQMC algorithm. -See also: [`AbstractDVec`](@ref). +See also: [`AbstractDVec`](@ref), [`InitiatorDVec`](@ref), [`PDVec`](@ref). ## Constructors diff --git a/src/DictVectors/initiatordvec.jl b/src/DictVectors/initiatordvec.jl index 5351230c0..013305eba 100644 --- a/src/DictVectors/initiatordvec.jl +++ b/src/DictVectors/initiatordvec.jl @@ -5,11 +5,13 @@ Dictionary-based vector-like data structure for use with [`lomc!`](@ref Main.lom [`KrylovKit.jl`](https://github.com/Jutho/KrylovKit.jl). See [`AbstractDVec`](@ref). Functionally identical to [`DVec`](@ref), but contains [`InitiatorValue`](@ref)s internally in order to facilitate initiator methods. Initiator methods for controlling the Monte Carlo -sign problem were first introduced in +sign problem were first introduced in [J. Chem. Phys. 132, 041103 (2010)](https://doi.org/10.1063/1.3302277). -How the initiators are handled is controlled by specifying an [`InitiatorRule`](@ref) with +How the initiators are handled is controlled by specifying an [`InitiatorRule`](@ref) with the `initiator` keyword argument (see below). +See also: [`AbstractDVec`](@ref), [`DVec`](@ref), [`PDVec`](@ref). + ## Constructors * `InitiatorDVec(dict::AbstractDict[; style, initiator, capacity])`: create an @@ -207,8 +209,7 @@ end """ InitiatorIterator -Iterator over pairs or values of an `InitiatorDVec`. Supports the `SplittablesBase` -interface. +Iterator over pairs or values of an `InitiatorDVec`. """ struct InitiatorIterator{T,D,I} iter::D diff --git a/src/DictVectors/initiators.jl b/src/DictVectors/initiators.jl index c2f2f42c8..7a0a51916 100644 --- a/src/DictVectors/initiators.jl +++ b/src/DictVectors/initiators.jl @@ -2,7 +2,8 @@ abstract type AbstractInitiatorValue{V} A value equipped with additional information that enables a variation of the initiator -approximation. To be used with [`InitiatorDVec`](@ref) and [`InitiatorRule`](@ref)s. +approximation. To be used with [`PDVec`](@ref), [`InitiatorDVec`](@ref) and +[`InitiatorRule`](@ref)s. Must define: * `Base.zero`, `Base.:+`, `Base.:-`, `Base.:*` @@ -47,7 +48,8 @@ Base.zero(::Type{InitiatorValue{V}}) where {V} = InitiatorValue{V}() """ NonInitiatorValue{V} -Value that does not contain any additional information - used with [`NonInitiator`](@ref). +Value that does not contain any additional information - used with [`NonInitiator`](@ref), +the default initiator rule for [`PDVec`](@ref). """ struct NonInitiatorValue{V} <: AbstractInitiatorValue{V} value::V @@ -115,8 +117,8 @@ to_initiator_value """ Initiator(threshold) <: InitiatorRule -Initiator rule to be passed to [`InitiatorDVec`](@ref). An initiator is a configuration -`add` with a coefficient with magnitude `abs(v[add]) > threshold`. Rules: +Initiator rule to be passed to [`PDVec`](@ref) or [`InitiatorDVec`](@ref). An initiator is a +configuration `add` with a coefficient with magnitude `abs(v[add]) > threshold`. Rules: * Initiators can spawn anywhere. * Non-initiators can spawn to initiators. @@ -156,8 +158,8 @@ end """ SimpleInitiator(threshold) <: InitiatorRule -Initiator rule to be passed to [`InitiatorDVec`](@ref). An initiator is a configuration -`add` with a coefficient with magnitude `abs(v[add]) > threshold`. Rules: +Initiator rule to be passed to [`PDVec`](@ref) or [`InitiatorDVec`](@ref). An initiator is +a configuration `add` with a coefficient with magnitude `abs(v[add]) > threshold`. Rules: * Initiators can spawn anywhere. * Non-initiators cannot spawn. @@ -179,7 +181,7 @@ end """ CoherentInitiator(threshold) <: InitiatorRule -Initiator rule to be passed to [`InitiatorDVec`](@ref). An initiator is +Initiator rule to be passed to [`PDVec`](@ref) or [`InitiatorDVec`](@ref). An initiator is a configuration `add` with a coefficient with magnitude `abs(v[add]) > threshold`. Rules: * Initiators can spawn anywhere. @@ -208,7 +210,8 @@ end """ NonInitiator{V} <: InitiatorRule{V} -Initiator rule that disables the approximation. +Initiator rule that disables the approximation. This is the default setting for +[`PDVec`](@ref). See [`InitiatorRule`](@ref). """ diff --git a/src/DictVectors/pdvec.jl b/src/DictVectors/pdvec.jl new file mode 100644 index 000000000..82015a7dc --- /dev/null +++ b/src/DictVectors/pdvec.jl @@ -0,0 +1,872 @@ +""" + fastrange_hash(k, n) + +Map `k` to to bucket in range 1:n. See [fastrange](https://github.com/lemire/fastrange). +""" +function fastrange_hash(k, n::Int) + h = hash(k) + return (((h % UInt128) * (n % UInt128)) >> 64) % Int + 1 +end + +""" + PDVec{K,V}(; kwargs...) + PDVec(iter; kwargs...) + PDVec(pairs...; kwargs...) + +Dictionary-based vector-like data structure for use with FCIQMC and +[KrylovKit.jl](https://github.com/Jutho/KrylovKit.jl). While mostly behaving like a `Dict`, +it supports various linear algebra operations such as `norm` and `dot`, and the interface defined in [VectorInterface](https://github.com/Jutho/VectorInterface.jl). + +The P in `PDVec` stands for parallel. `PDVec`s perform `mapreduce`, `foreach`, and various +linear algebra operations in a threaded manner. If MPI is available, these operations are +automatically distributed as well. As such it is not recommended to iterate over `pairs`, +`keys`, or `values` directly unless explicitly performing them on the [`localpart`](@ref) of +the vector. + +See also: [`AbstractDVec`](@ref), [`DVec`](@ref), [`InitiatorDVec`](@ref). + +## Keyword arguments + +* `style = `[`default_style`](@ref)`(V)`: A [`StochasticStyle`](@ref) that is used to select + the spawning strategy in the FCIQMC algorithm. + +* `initiator = `[`NonInitiator`](@ref)`()`: An [`InitiatorRule`](@ref), used in FCIQMC to + remove the sign problem. + +* `communicator`: A [`Communicator`](@ref) that controls how operations are performed when + using MPI. The defaults are [`NotDistributed`](@ref) when not using MPI and + [`PointToPoint`](@ref) when using MPI. + +# Extended Help + +## Segmentation + +The vector is split into `Threads.nthreads()` subdictionaries called segments. Which +dictionary a key-value pair is mapped to is determined by the hash of the key. The purpose +of this segmentation is to allow parallel processing - functions such as `mapreduce`, `add!` +or `dot` (full list below) process each subdictionary on a separate thread. + +### Example + +```julia +julia> add = FermiFS2C((1,1,0,0), (0,0,1,1)); + +julia> op = HubbardMom1D(add; t=4/π^2, u=4); + +julia> pv = PDVec(add => 1.0) +1-element PDVec: style = IsDeterministic{Float64}() + fs"|↑↑↓↓⟩" => 1.0 + +julia> pv = op * pv +7-element PDVec: style = IsDeterministic{Float64}() + fs"|↑↓↑↓⟩" => 1.0 + fs"|↑↑↓↓⟩" => 4.0 + fs"|↓↑↓↑⟩" => 1.0 + fs"|↓↑↑↓⟩" => -1.0 + fs"|⇅⋅⋅⇅⟩" => 1.0 + fs"|↑↓↓↑⟩" => -1.0 + fs"|⋅⇅⇅⋅⟩" => 1.0 + +julia> map!(x -> -x, values(pv)); pv +7-element PDVec: style = IsDeterministic{Float64}() + fs"|↑↓↑↓⟩" => -1.0 + fs"|↑↑↓↓⟩" => -4.0 + fs"|↓↑↓↑⟩" => -1.0 + fs"|↓↑↑↓⟩" => 1.0 + fs"|⇅⋅⋅⇅⟩" => -1.0 + fs"|↑↓↓↑⟩" => 1.0 + fs"|⋅⇅⇅⋅⟩" => -1.0 + +julia> dest = similar(pv) +0-element PDVec: style = IsDeterministic{Float64}() + +julia> map!(x -> x + 2, dest, values(pv)) +7-element PDVec: style = IsDeterministic{Float64}() + fs"|↑↓↑↓⟩" => 1.0 + fs"|↑↑↓↓⟩" => -2.0 + fs"|↓↑↓↑⟩" => 1.0 + fs"|↓↑↑↓⟩" => 3.0 + fs"|⇅⋅⋅⇅⟩" => 1.0 + fs"|↑↓↓↑⟩" => 3.0 + fs"|⋅⇅⇅⋅⟩" => 1.0 + +julia> sum(values(pv)) +-6.0 + +julia> dot(dest, pv) +10.0 + +julia> dot(dest, op, pv) +44.0 +``` + +## MPI + +When MPI is active, all parallel reductions are automatically reduced across MPI ranks +with a call to `MPI.Allreduce`. + +In a distributed setting, `PDVec` does not support iteration without first making it +explicit the iteration is only to be performed on the local segments of the vector. This is +done with [`localpart`](@ref). In general, even when not using MPI, it is best practice to +use [`localpart`](@ref) when explicit iteration is required. + +## Use with KrylovKit + +`PDVec` is compatible with `eigsolve` from +[KrylovKit.jl](https://github.com/Jutho/KrylovKit.jl). When used, the diagonalisation is +performed in a threaded and distributed manner. Using multiple MPI ranks with this method +does not distribute the memory load effectively, but does result in significant speedups. + +### Example + +```julia +julia> using KrylovKit + +julia> add = BoseFS((0,0,5,0,0)); + +julia> op = HubbardMom1D(add; u=6.0); + +julia> pv = PDVec(add => 1.0); + +julia> results = eigsolve(op, pv, 4, :SR); + +julia> results[1][1:4] +4-element Vector{Float64}: + -3.4311156892322234 + 1.1821748602612363 + 3.7377753753082823 + 6.996390417443125 +``` + +## Parallel functionality + +The following functions are threaded MPI-compatible: + +* From Base: `mapreduce` and derivatives (`sum`, `prod`, `reduce`...), `all`, + `any`,`map!` (on `values` only), `+`, `-`, `*` + +* From LinearAlgebra: `rmul!`, `lmul!`, `mul!`, `axpy!`, `axpby!`, `dot`, `norm`, + `normalize`, `normalize!` + +* The full interface defined in + [VectorInterface](https://github.com/Jutho/VectorInterface.jl) + +""" +struct PDVec{ + K,V,N,S<:StochasticStyle{V},I<:InitiatorRule,C<:Communicator +} <: AbstractDVec{K,V} + segments::NTuple{N,Dict{K,V}} + style::S + initiator::I + communicator::C +end + +function PDVec{K,V}(args...; kwargs...) where {K,V} + return PDVec{K,V,Threads.nthreads()}(args...; kwargs...) +end +function PDVec{K,V,N}( + ; style=default_style(V), + initiator_threshold=0, initiator=initiator_threshold > 0, + communicator=nothing, +) where {K,V,N} + W = eltype(style) + segments = ntuple(_ -> Dict{K,W}(), Val(N)) + + if initiator == false + irule = NonInitiator() + elseif initiator == true + initiator_threshold = initiator_threshold == 0 ? 1 : initiator_threshold + irule = Initiator(initiator_threshold) + elseif initiator isa InitiatorRule + irule = initiator + else + throw(ArgumentError("Invalid initiator $initiator")) + end + + # This is a bit clunky. If you modify the communicator by hand, you have to make sure it + # knows to hold values of type W. When we introduce more communicators, they should + # probably be constructed by a function, similar to how it's done in RMPI. + IW = initiator_valtype(irule, W) + if isnothing(communicator) + if MPI.Comm_size(MPI.COMM_WORLD) > 1 + comm = PointToPoint{K,IW}() + else + comm = NotDistributed() + end + elseif communicator isa Communicator + comm = communicator + else + throw(ArgumentError("Invalid communicator $communicator")) + end + + return PDVec(segments, style, irule, comm) +end +function PDVec(pairs; kwargs...) + K = eltype(first.(pairs)) + V = eltype(last.(pairs)) + t = PDVec{K,V}(; kwargs...) + for (k, v) in pairs + t[k] = v + end + return t +end +function PDVec(pairs::Vararg{Pair}; kwargs...) + return PDVec(pairs; kwargs...) +end +function PDVec(dict::Dict{K,V}; kwargs...) where {K,V} + t = PDVec{K,V}(; kwargs...) + for (k, v) in dict + t[k] = v + end + return t +end +function PDVec(dv::AbstractDVec{K,V}; style=StochasticStyle(dv), kwargs...) where {K,V} + t = PDVec{K,V}(; style, kwargs...) + return copyto!(t, dv) +end + +function Base.summary(io::IO, t::PDVec) + len = length(t) + print(io, "$len-element") + if num_segments(t) ≠ Threads.nthreads() + print(io, ", ", num_segments(t), "-segment") + end + if t.communicator isa LocalPart + print(io, " localpart(PDVec): ") + comm = t.communicator.communicator + else + print(io, " PDVec: ") + comm = t.communicator + end + print(io, "style = ", t.style) + if t.initiator ≠ NonInitiator() + print(io, ", initiator=", t.initiator) + end + if comm ≠ NotDistributed() + print(io, ", communicator=", t.communicator) + end +end + +### +### Properties and utilities +### +""" + is_distributed(t::PDVec) + +Return true if `t` is MPI-distributed. +""" +is_distributed(t::PDVec) = is_distributed(t.communicator) + +""" + num_segments(t::PDVec) + +Return the number of segments in `t`. +""" +num_segments(t::PDVec{<:Any,<:Any,N}) where {N} = N + +StochasticStyle(t::PDVec) = t.style + +function Base.length(t::PDVec) + result = sum(length, t.segments) + return merge_remote_reductions(t.communicator, +, result) +end + +Base.isempty(t::PDVec) = iszero(length(t)) + +""" + check_compatibility(t, u) + +Return true if `t` and `u` have the same number of segments and throw otherwise. +""" +function check_compatibility(t, u) + if num_segments(t) == num_segments(u) + return true + else + throw(ArgumentError("vectors have different numbers of segments.")) + end +end + +function Base.isequal(l::PDVec, r::PDVec) + check_compatibility(l, r) + if length(localpart(l)) == length(localpart(r)) + result = Folds.all(zip(l.segments, r.segments)) do (l_seg, r_seg) + isequal(l_seg, r_seg) + end + else + result = false + end + return merge_remote_reductions(l.communicator, &, result) +end + +""" + target_segment(t::PDVec, key) -> target, is_local + +Determine the target segment from `key` hash. For MPI distributed vectors, this may return +numbers that are out of range and `is_local=false`. +""" +function target_segment(t::PDVec{K}, k::K) where {K} + return target_segment(t.communicator, k, num_segments(t)) +end +# Special case for single-threaded operation. +function target_segment(t::PDVec{K,<:Any,1,<:Any,<:Any,NotDistributed}, k::K) where {K} + return 1, true +end + +### +### getting and setting +### +function Base.getindex(t::PDVec{K,V}, k::K) where {K,V} + segment_id, is_local = target_segment(t, k) + if is_local + return get(t.segments[segment_id], k, zero(V)) + else + throw(CommunicatorError("Attempted to access non-local key `$k`")) + end +end +function Base.setindex!(t::PDVec{K,V}, val, k::K) where {K,V} + v = V(val) + segment_id, is_local = target_segment(t, k) + # Adding a key that is not local is supported. This is done to allow easy construction + # of vectors even when using MPI. + if is_local + if iszero(v) + delete!(t.segments[segment_id], k) + else + t.segments[segment_id][k] = v + end + end + return v +end +function deposit!(t::PDVec{K,V}, k::K, val, parent=nothing) where {K,V} + iszero(val) && return nothing + segment_id, is_local = target_segment(t, k) + if is_local + segment = t.segments[segment_id] + new_val = get(segment, k, zero(V)) + V(val) + if iszero(new_val) + delete!(segment, k) + else + segment[k] = new_val + end + end + return nothing +end + +function Base.delete!(t::PDVec{K,V}, k::K) where {K,V} + t[k] = zero(V) + return t +end + +### +### empty(!), similar, copy, etc. +### +function Base.empty( + t::PDVec{K,V,N}; style=t.style, initiator=t.initiator, communicator=t.communicator, +) where {K,V,N} + return PDVec{K,V,N}(; style, initiator, communicator) +end +function Base.empty(t::PDVec{K,V}, ::Type{V}; kwargs...) where {K,V} + return empty(t; kwargs...) +end +function Base.empty(t::PDVec{K,<:Any,N}, ::Type{V}; kwargs...) where {K,V,N} + return PDVec{K,V,N}(; kwargs...) +end +function Base.empty(t::PDVec{<:Any,<:Any,N}, ::Type{K}, ::Type{V}; kwargs...) where {K,V,N} + return PDVec{K,V,N}(; kwargs...) +end +function Base.empty!(t::PDVec) + Folds.foreach(empty!, t.segments, ) + return t +end +Base.similar(t::PDVec, args...; kwargs...) = empty(t, args...; kwargs...) + +function Base.sizehint!(t::PDVec, n) + n_per_segment = cld(n, length(t.segments)) + Folds.foreach(d -> sizehint!(d, n_per_segment), t.segments) + return t +end + +function Base.copyto!(dst::PDVec, src::PDVec) + check_compatibility(dst, src) + Folds.foreach(dst.segments, src.segments) do d_seg, s_seg + copy!(d_seg, s_seg) + end + return dst +end +function Base.copy!(dst::PDVec, src::PDVec) + return copyto!(dst, src) +end +function Base.copy(src::PDVec) + return copy!(empty(src), src) +end + +function localpart(t::PDVec{K,V,N,S,I}) where {K,V,S,I,N} + return PDVec{K,V,N,S,I,LocalPart}( + t.segments, t.style, t.initiator, LocalPart(t.communicator), + ) +end +function localpart(t::PDVec{<:Any,<:Any,<:Any,<:Any,<:Any,<:LocalPart}) + return t +end + +### +### Iterators, map, mapreduce +### +""" + PDVecKeys + PDVecValues + PDVecPairs + +Iterators over keys/values/pairs of the [`PDVec`](@ref). Iteration is only supported over +the [`localpart`](@ref). Use reduction operations (`reduce`, `mapreduce`, `sum`, ...) if possible +when using them. +""" +struct PDVecIterator{F,T,V<:PDVec} + selector::F + vector::V + + function PDVecIterator(selector::F, ::Type{T}, t::V) where {F,T,V} + return new{F,T,V}(selector, t) + end +end + +num_segments(t::PDVecIterator) = num_segments(t.vector) +is_distributed(t::PDVecIterator) = is_distributed(t.vector) +Base.eltype(t::Type{<:PDVecIterator{<:Any,T}}) where {T} = T +Base.length(t::PDVecIterator) = length(t.vector) + +""" + PDVecKeys + +Iterator over the keys of a [`PDVec`](@ref). Alias for +[`PDVecIterator`](@ref)`{typeof(keys)}`. +""" +const PDVecKeys{T,V} = PDVecIterator{typeof(keys),T,V} +""" + PDVecVals + +Iterator over the values of a [`PDVec`](@ref). Alias for +[`PDVecIterator`](@ref)`{typeof(values)}`. +""" +const PDVecVals{T,V} = PDVecIterator{typeof(values),T,V} +""" + PDVecPairs + +Iterator over the pairs of a [`PDVec`](@ref). Alias for +[`PDVecIterator`](@ref)`{typeof(pairs)}`. +""" +const PDVecPairs{T,V} = PDVecIterator{typeof(pairs),T,V} + +Base.show(io::IO, t::PDVecKeys) = print(io, "PDVecKeys{", eltype(t), "}(...)") +Base.show(io::IO, t::PDVecVals) = print(io, "PDVecVals{", eltype(t), "}(...)") +Base.show(io::IO, t::PDVecPairs) = print(io, "PDVecPairs{", eltype(t), "}(...)") + +Base.keys(t::PDVec) = PDVecIterator(keys, keytype(t), t) +Base.values(t::PDVec) = PDVecIterator(values, valtype(t), t) +Base.pairs(t::PDVec) = PDVecIterator(pairs, eltype(t), t) + +function Base.iterate(t::PDVecIterator) + if !(t.vector.communicator isa LocalPart) && !(t.vector.communicator isa NotDistributed) + throw(CommunicatorError( + "Direct iteration over distributed vectors is not supported.", + "Use `localpart` to iterate over the local part only, ", + "or use a reduction function such as mapreduce." + )) + end + return iterate(t, 1) +end +function Base.iterate(t::PDVecIterator, segment_id::Int) + segments = t.vector.segments + if segment_id > length(segments) + return nothing + end + it = iterate(t.selector(segments[segment_id])) + if isnothing(it) + return iterate(t, segment_id + 1) + else + return it[1], (segment_id, it[2]) + end +end +function Base.iterate(t::PDVecIterator, (segment_id, state)) + segments = t.vector.segments + it = iterate(t.selector(segments[segment_id]), state) + if isnothing(it) + return iterate(t, segment_id + 1) + else + return it[1], (segment_id, it[2]) + end +end + +""" + mapreduce(f, op, keys(::PDVec); init) + mapreduce(f, op, values(::PDVec); init) + mapreduce(f, op, pairs(::PDVec); init) + +Perform a parallel reduction operation on [`PDVec`](@ref)s. MPI-compatible. Is used in the +definition of various functions from Base such as `reduce`, `sum`, `prod`, etc. + +`init`, if provided, must be a neutral element for `op`. +""" +function Base.mapreduce(f::F, op::O, t::PDVecIterator; kwargs...) where {F,O} + result = Folds.mapreduce( + op, Iterators.filter(!isempty, t.vector.segments); kwargs... + ) do segment + mapreduce(f, op, t.selector(segment)) + end + return merge_remote_reductions(t.vector.communicator, op, result) +end + +""" + all(predicate, keys(::PDVec); kwargs...) + all(predicate, values(::PDVec); kwargs...) + all(predicate, pairs(::PDVec); kwargs...) + +Determine whether `predicate` returns `true` for all elements of iterator on +[`PDVec`](@ref). Parallel MPI-compatible. +""" +function Base.all(f, t::PDVecIterator) + result::Bool = Folds.all(t.vector.segments) do segment + all(f, t.selector(segment)) + end + return merge_remote_reductions(t.vector.communicator, &, result) +end + +""" + any(predicate, keys(::PDVec); kwargs...) + any(predicate, values(::PDVec); kwargs...) + any(predicate, pairs(::PDVec); kwargs...) + +Determine whether `predicate` returns `true` for any element in iterator on +[`PDVec`](@ref). Parallel and MPI-compatible. +""" +function Base.any(f, t::PDVecIterator) + result::Bool = Folds.any(t.vector.segments) do segment + any(f, t.selector(segment)) + end + return merge_remote_reductions(t.vector.communicator, |, result) +end + +""" + map!(f, values(::PDVec)) + map!(f, dst::PDVec, values(::PDVec)) + +In-place parallel `map!` on values of a [`PDVec`](@ref). If `dst` is provided, results are +written there. Only defined for `values` as efficiently changing keys in a thread-safe and +distributed way is not possible. +""" +function Base.map!(f, t::PDVecVals) + Folds.foreach(t.vector.segments) do segment + for (k, v) in segment + segment[k] = f(v) + end + # Filtered separately to prevent messing up the dict while iterating it. + filter!(p -> !iszero(p[2]), segment) + end + return t.vector +end +function Base.map!(f, dst::PDVec, src::PDVecVals) + check_compatibility(dst, src) + if dst === src.vector + map!(f, src) + else + Folds.foreach(dst.segments, src.vector.segments) do d, s + empty!(d) + for (k, v) in s + new_val = f(v) + if !iszero(new_val) + d[k] = new_val + end + end + end + end + return dst +end + +""" + map(f, values(::PDVec)) + +Out-of-place parallel `map` on values of a [`PDVec`](@ref). Returns a new +[`PDVec`](@ref). Only defined for `values` as efficiently changing keys in a thread-safe and +distributed way is not possible. +""" +function Base.map(f, src::PDVecVals) + return map!(f, copy(src.vector), src) +end + +""" + filter!(pred, [dst::PDVec, ], keys(::PDVec)) + filter!(pred, [dst::PDVec, ], values(::PDVec)) + filter!(pred, [dst::PDVec, ], pairs(::PDVec)) + +In-place parallel `filter!` on an iterator over a [`PDVec`](@ref). If `dst` is provided, +results are written there. +""" +function Base.filter!(f, src::PDVecIterator) + Folds.foreach(src.vector.segments) do segment + if src.selector === pairs + filter!(f, segment) + elseif src.selector === keys + filter!(p -> f(p[1]), segment) + elseif src.selector === values + filter!(p -> f(p[2]), segment) + end + end + return src.vector +end + +function Base.filter!(f, dst::PDVec, src::PDVecIterator) + if dst === src.vector + return filter!(f, src::PDVecIterator) + end + Folds.foreach(dst.segments, src.vector.segments) do d, s + empty!(d) + for ((k, v), x) in zip(s, src.selector(s)) + if f(x) + d[k] = v + end + end + end + return dst +end + +""" + filter(f, keys(::PDVec)) + filter(f, values(::PDVec)) + filter(f, pairs(::PDVec)) + +Out-of-place parallel `filter` on an iterator over a [`PDVec`](@ref). Returns a new +[`PDVec`](@ref). +""" +function Base.filter(f, src::PDVecIterator) + new_src = copy(src.vector) + return filter!(f, src.selector(new_src)) +end + +### +### High-level linear algebra functions +### +function VectorInterface.scale!(t::PDVec, α::Number) + if iszero(α) + empty!(t) + else + map!(Base.Fix1(*, α), values(t)) + end + return t +end +function VectorInterface.scale!(dst::PDVec, src::PDVec, α::Number) + return map!(Base.Fix1(*, α), dst, values(src)) +end + +""" + dict_add!(d::Dict, s, α=true, β=true) + +Internal function similar to `add!`, but on `Dict`s. `s` can be any iterator of pairs. +""" +function dict_add!(d::Dict, s, α=true, β=true) + if iszero(β) + empty!(d) + elseif β ≠ one(β) + map!(Base.Fix1(*, β), values(d)) + end + for (key, s_value) in s + d_value = get(d, key, zero(valtype(d))) + new_value = d_value + α * s_value + if iszero(new_value) + delete!(d, key) + else + d[key] = new_value + end + end + return d +end + +function VectorInterface.add!(dst::PDVec, src::PDVec, α::Number=true, β::Number=true) + check_compatibility(dst, src) + Folds.foreach(dst.segments, src.segments) do d, s + dict_add!(d, s, α, β) + end + return dst +end + +function VectorInterface.inner(l::PDVec, r::PDVec) + check_compatibility(l, r) + if l === r + return sum(abs2, values(l); init=zero(scalartype(l))) + else + T = promote_type(valtype(l), valtype(r)) + l_segs = l.segments + r_segs = r.segments + res = Folds.sum(zip(l_segs, r_segs); init=zero(T)) do (l_seg, r_seg) + sum(r_seg; init=zero(T)) do (k, v) + conj(get(l_seg, k, zero(valtype(l_seg)))) * v + end + end::T + return merge_remote_reductions(r.communicator, +, res) + end +end +function VectorInterface.inner(fd::FrozenDVec, p::PDVec) + res = zero(promote_type(valtype(fd), valtype(p))) + for (k, v) in pairs(fd) + res += conj(p[k]) * v + end + return merge_remote_reductions(p.communicator, +, res) +end +function VectorInterface.inner(l::AbstractDVec, r::PDVec) + res = sum(pairs(r)) do (k, v) + conj(l[k]) * v + end + return res +end +VectorInterface.inner(p::PDVec, fd::FrozenDVec) = conj(dot(fd, p)) +VectorInterface.inner(l::PDVec, r::AbstractDVec) = conj(dot(r, l)) + +function Base.real(v::PDVec) + dst = similar(v, real(valtype(v))) + return map!(real, dst, values(v)) +end +function Base.imag(v::PDVec) + dst = similar(v, real(valtype(v))) + return map!(imag, dst, values(v)) +end + +### +### Operator linear algebra operations +### +""" + mul!(y::PDVec, A::AbstractHamiltonian, x::PDVec[, w::PDWorkingMemory]) + +Perform `y = A * x` in-place. The working memory `w` is required to facilitate +threaded/distributed operations. If not passed a new instance will be allocated. `y` and `x` +may be the same vector. +""" +function LinearAlgebra.mul!( + y::PDVec, op::AbstractHamiltonian, x::PDVec, + w=PDWorkingMemory(y; style=IsDeterministic()), +) + if w.style ≢ IsDeterministic() + throw(ArgumentError( + "Attempted to use `mul!` with non-deterministic working memory. " * + "Use `apply_operator!` instead." + )) + end + _, _, wm, y = apply_operator!(w, y, x, op) + return y +end + +""" + dot(y::PDVec, A::AbstractHamiltonian, x::PDVec[, w::PDWorkingMemory]) + +Perform `y ⋅ A ⋅ x`. The working memory `w` is required to facilitate threaded/distributed +operations with non-diagonal `A`. If needed and not passed a new instance will be +allocated. `A` can be replaced with a tuple of operators. +""" +function LinearAlgebra.dot(t::PDVec, op::AbstractHamiltonian, u::PDVec, w) + return dot(LOStructure(op), t, op, u, w) +end +function LinearAlgebra.dot(t::PDVec, op::AbstractHamiltonian, u::PDVec) + return dot(LOStructure(op), t, op, u) +end +function LinearAlgebra.dot( + ::IsDiagonal, t::PDVec, op::AbstractHamiltonian, u::PDVec, w=nothing +) + T = promote_type(valtype(t), eltype(op), valtype(u)) + return sum(pairs(u); init=zero(T)) do (k, v) + T(conj(t[k]) * diagonal_element(op, k) * v) + end +end +function LinearAlgebra.dot( + ::LOStructure, left::PDVec, op::AbstractHamiltonian, right::PDVec, w=nothing +) + # First two cases: only one vector is distrubuted. Avoid shuffling things around + # by placing that one on the left to reduce the need for communication. + if !is_distributed(left) && is_distributed(right) + return dot(AdjointUnknown(), left, op, right) + elseif is_distributed(left) && !is_distributed(right) + return conj(dot(AdjointUnknown(), right, op', left)) + end + # Other cases: both vectors distributed or not distributed. Put the shorter vector + # on the right as is done for regular DVecs. + if length(left) < length(right) + return conj(dot(AdjointUnknown(), right, op', left, w)) + else + return dot(AdjointUnknown(), left, op, right, w) + end +end +# Default variant: also called from other LOStructures. +function LinearAlgebra.dot( + ::AdjointUnknown, t::PDVec, op::AbstractHamiltonian, source::PDVec, w=nothing +) + if is_distributed(t) + if isnothing(w) + target = copy_to_local!(PDWorkingMemory(t), t) + else + target = copy_to_local!(w, t) + end + else + target = t + end + return dot_from_right(target, op, source) +end + +function dot_from_right(target, op, source::PDVec) + T = promote_type(valtype(target), valtype(source), eltype(op)) + result = sum(pairs(source); init=zero(T)) do (k, v) + res = conj(target[k]) * diagonal_element(op, k) * v + for (k_off, v_off) in offdiagonals(op, k) + res += conj(target[k_off]) * v_off * v + end + res + end + return result::T +end + +function LinearAlgebra.dot(t::PDVec, ops::Tuple, source::PDVec, w=nothing) + if is_distributed(t) && any(LOStructure(op) ≢ IsDiagonal() for op in ops) + if isnothing(w) + target = copy_to_local!(PDWorkingMemory(t), t) + else + target = copy_to_local!(w, t) + end + else + target = t + end + return map(ops) do op + dot_from_right(target, op, source) + end +end + +""" + FrozenPDVec + +Parallel version of [`FrozenDVec`](@ref). See: [`freeze`](@ref), [`PDVec`](@ref). +""" +struct FrozenPDVec{K,V,N} <: AbstractProjector + segments::NTuple{N,Vector{Pair{K,V}}} +end +Base.keytype(::FrozenPDVec{K}) where {K} = K +Base.valtype(::FrozenPDVec{<:Any,V}) where {V} = V +Base.eltype(::FrozenPDVec{K,V}) where {K,V} = Pair{K,V} +Base.pairs(fd::FrozenPDVec) = Iterators.flatten(fd.segments) + +function freeze(dv::PDVec{K,V,N}) where {K,V,N} + return FrozenPDVec{K,V,N}(map(collect, dv.segments)) +end + +function VectorInterface.inner(fd::FrozenPDVec, dv::AbstractDVec) + T = promote_type(valtype(fd), valtype(dv)) + return Folds.sum(fd.segments) do segment + sum(segment; init=zero(T)) do (k, v) + dv[k] ⋅ v + end + end +end + +function VectorInterface.inner(fd::FrozenPDVec, dv::PDVec) + T = promote_type(valtype(fd), valtype(dv)) + res = Folds.sum(zip(fd.segments, dv.segments); init=zero(T)) do (l_seg, r_seg) + sum(l_seg; init=zero(T)) do (k, v) + T(conj(v) * get(r_seg, k, zero(valtype(r_seg)))) + end + end::T + return merge_remote_reductions(dv.communicator, +, res) +end diff --git a/src/DictVectors/pdworkingmemory.jl b/src/DictVectors/pdworkingmemory.jl new file mode 100644 index 000000000..32e5014fc --- /dev/null +++ b/src/DictVectors/pdworkingmemory.jl @@ -0,0 +1,270 @@ +""" + PDWorkingMemoryColumn + +A column in [`PDWorkingMemory`](@ref). Supports [`deposit!`](@ref) and +[`StochasticStyle`](@ref) and acts as a target for spawning. +""" +struct PDWorkingMemoryColumn{K,V,W<:AbstractInitiatorValue{V},I<:InitiatorRule,S,N} + segments::NTuple{N,Dict{K,W}} + initiator::I + style::S +end +function PDWorkingMemoryColumn(t::PDVec{K,V}, style=t.style) where {K,V} + n = total_num_segments(t.communicator, num_segments(t)) + W = initiator_valtype(t.initiator, V) + + segments = ntuple(_ -> Dict{K,W}(), n) + return PDWorkingMemoryColumn(segments, t.initiator, style) +end + +function deposit!(c::PDWorkingMemoryColumn{K,V,W}, k::K, val, parent) where {K,V,W} + segment_id = fastrange_hash(k, num_segments(c)) + segment = c.segments[segment_id] + new_val = get(segment, k, zero(W)) + to_initiator_value(c.initiator, k, V(val), parent) + if iszero(new_val) + delete!(segment, k) + else + segment[k] = new_val + end + return nothing +end + +function Base.getindex(c::PDWorkingMemoryColumn{K,V,W}, k::K) where {K,V,W} + segment_id = fastrange_hash(k, num_segments(c)) + segment = c.segments[segment_id] + return convert(V, get(segment, k, zero(W))) +end + +Base.length(c::PDWorkingMemoryColumn) = sum(length, c.segments) +Base.empty!(c::PDWorkingMemoryColumn) = foreach(empty!, c.segments) +Base.keytype(::PDWorkingMemoryColumn{K}) where {K} = K +Base.valtype(::PDWorkingMemoryColumn{<:Any,V}) where {V} = V +Base.eltype(::PDWorkingMemoryColumn{K,V}) where {K,V} = Pair{K,V} +num_segments(c::PDWorkingMemoryColumn{<:Any,<:Any,<:Any,<:Any,<:Any,N}) where {N} = N +segment_type(::Type{<:PDWorkingMemoryColumn{K,<:Any,W}}) where {K,W} = Dict{K,W} +StochasticStyle(c::PDWorkingMemoryColumn) = c.style + +""" + PDWorkingMemory(t::PDVec) + +The working memory that handles threading and MPI distribution for operations that involve +operators, such as FCIQMC propagation, operator-vector multiplication and three-way +dot products with [`PDVec`](@ref)s. + +The working memory is structured in a series of columns, where each has a number of segments +(see [`PDVec`](@ref)) equal to the number of segments across all MPI ranks. The purpose of +this organisation is to allow spawning in parallel without using locks or atomic operations. + +The steps performed on a `PDWorkingMemory` during a typical operation are +[`perform_spawns!`](@ref), [`collect_local!`](@ref), [`synchronize_remote!`](@ref), and +[`move_and_compress!`](@ref). + +When used with three-argument dot products, a full copy of the left-hand side vector is +materialized in the first column of the working memory on all ranks. +""" +struct PDWorkingMemory{ + K,V,W<:AbstractInitiatorValue{V},S<:StochasticStyle{V},I<:InitiatorRule,C<:Communicator,N +} + columns::Vector{PDWorkingMemoryColumn{K,V,W,I,S,N}} + style::S + initiator::I + communicator::C +end +function PDWorkingMemory(t::PDVec{K,V,S,D,I}; style=t.style) where {K,V,S,D,I} + if !(style isa StochasticStyle{V}) + throw(ArgumentError("Incompatible style $style given to `PDWorkingMemory`")) + end + nrows = total_num_segments(t.communicator, num_segments(t)) + columns = [PDWorkingMemoryColumn(t, style) for _ in 1:num_segments(t)] + + W = initiator_valtype(t.initiator, V) + return PDWorkingMemory(columns, style, t.initiator, t.communicator) +end + +function Base.show(io::IO, w::PDWorkingMemory{K,V}) where {K,V} + print(io, "PDWorkingMemory{$K,$V} with $(length(w.columns)) columns") +end + +StochasticStyle(w::PDWorkingMemory) = w.style +Base.keytype(w::PDWorkingMemory{K}) where {K} = K +Base.valtype(w::PDWorkingMemory{<:Any,V}) where {V} = V +Base.eltype(w::PDWorkingMemory{K,V}) where {K,V} = Pair{K,V} + +""" + num_rows(w::PDWorkingMemory) -> Int + +Number of rows in the working memory. The number of rows is equal to the number of segments +accross all ranks. +""" +num_rows(w::PDWorkingMemory) = length(w.columns[1].segments) + +""" + num_columns(w::PDWorkingMemory) -> Int + +Number of columns in the working memory. The number of rows is equal to the number of +segments in the local rank. +""" +num_columns(w::PDWorkingMemory) = length(w.columns) + +function Base.length(w::PDWorkingMemory) + result = sum(length, w.columns) + return merge_remote_reductions(w.communicator, +, result) +end + +""" + MainSegmentIterator{W,D} <: AbstractVector{D} + +Iterates the main segments of a specified rank. See [`remote_segments`](@ref) and +[`local_segments`](@ref). +""" +struct MainSegmentIterator{W,D} <: AbstractVector{D} + working_memory::W + rank::Int +end + +""" + remote_segments(w::PDWorkingMemory, rank_id) + +Returns iterator over the main segments that belong to rank `rank_id`. Iterates `Dict`s. + +See [`PDWorkingMemory`](@ref). +""" +function remote_segments(w::PDWorkingMemory, rank) + return MainSegmentIterator{typeof(w),segment_type(eltype(w.columns))}(w, rank) +end + +""" + local_segments(w::PDWorkingMemory) + +Returns iterator over the main segments on the current rank. Iterates `Dict`s. + +See [`PDWorkingMemory`](@ref). +""" +function local_segments(w::PDWorkingMemory) + rank = mpi_rank(w.communicator) + return MainSegmentIterator{typeof(w),segment_type(eltype(w.columns))}(w, rank) +end + +Base.size(it::MainSegmentIterator) = (num_columns(it.working_memory),) + +function Base.getindex(it::MainSegmentIterator, index) + row_index = index + it.rank * num_columns(it.working_memory) + return it.working_memory.columns[1].segments[row_index] +end + +# Internal function to ensure type stability in `perform_spawns!` +function _spawn_column!(ham, column, segment, boost) + empty!(column) + _, stats = step_stats(column.style) + for (k, v) in segment + stats += apply_column!(column, ham, k, v, boost) + end + return stats +end + +""" + perform_spawns!(w::PDWorkingMemory, t::PDVec, ham, boost) + +Perform spawns from `t` through `ham` to `w`. + +See [`PDVec`](@ref) and [`PDWorkingMemory`](@ref). +""" +function perform_spawns!(w::PDWorkingMemory, t::PDVec, ham, boost) + if num_columns(w) ≠ num_segments(t) + error("working memory incompatible with vector") + end + stat_names, init_stats = step_stats(w.style) + stats = Folds.sum(zip(w.columns, t.segments); init=init_stats) do (column, segment) + _spawn_column!(ham, column, segment, boost) + end ::typeof(init_stats) + return stat_names, stats +end + +""" + collect_local!(w::PDWorkingMemory) + +Collect each row in `w` into its main segment. This step must be performed before using +[`local_segments`](@ref) or [`remote_segments`](@ref) to move the values elsewhere. + +See [`PDWorkingMemory`](@ref). +""" +function collect_local!(w::PDWorkingMemory) + ncols = num_columns(w) + Folds.foreach(1:num_rows(w)) do i + for j in 2:ncols + dict_add!(w.columns[1].segments[i], w.columns[j].segments[i]) + end + end +end + +""" + synchronize_remote!(w::PDWorkingMemory) + +Synchronize non-local segments across MPI. Controlled by the [`Communicator`](@ref). This +can only be perfomed after [`collect_local!`](@ref). + +See [`PDWorkingMemory`](@ref). +""" +function synchronize_remote!(w::PDWorkingMemory) + synchronize_remote!(w.communicator, w) +end + +""" + move_and_compress!(dst::PDVec, src::PDWorkingMemory) + move_and_compress!(::CompressionStrategy, dst::PDVec, src::PDWorkingMemory) + +Move the values in `src` to `dst`, compressing the according to the +[`CompressionStrategy`](@ref) on the way. This step can only be performed after +[`collect_local!`](@ref) and [`synchronize_remote!`](@ref). + +See [`PDWorkingMemory`](@ref). +""" +function move_and_compress!(dst::PDVec, src::PDWorkingMemory) + compression = CompressionStrategy(StochasticStyle(src)) + stat_names, init = step_stats(compression) + stats = Folds.mapreduce(add, dst.segments, local_segments(src); init) do dst_seg, src_seg + empty!(dst_seg) + compress!( + compression, dst_seg, + (from_initiator_value(src.initiator, v) for (k, v) in pairs(src_seg)), + ) + end + return dst, stat_names, stats +end + +""" + main_column(::PDWorkingMemory) -> PDVec + +Return the "main" column of the working memory wrapped in a [`PDVec`](@ref). + +See [`PDWorkingMemory`](@ref). +""" +function main_column(w::PDWorkingMemory{K,V,W,S}) where {K,V,W,S} + return w.columns[1] +end + +function copy_to_local!(w, v::AbstractDVec) + return v +end + +function copy_to_local!(w, t::PDVec) + return copy_to_local!(w.communicator, w, t) +end + +working_memory(t::PDVec) = PDWorkingMemory(t) + +function Interfaces.apply_operator!( + working_memory::PDWorkingMemory, target::PDVec, source::PDVec, ham, + boost=1, compress::Val{C}=Val(true) +) where {C} + + stat_names, stats = perform_spawns!(working_memory, source, ham, boost) + collect_local!(working_memory) + synchronize_remote!(working_memory) + target, comp_stat_names, comp_stats = move_and_compress!(target, working_memory) + + stat_names = (stat_names..., comp_stat_names...) + stats = (stats..., comp_stats...) + + return stat_names, stats, working_memory, target +end diff --git a/src/DictVectors/projectors.jl b/src/DictVectors/projectors.jl index e6e245ccc..19edbd8c2 100644 --- a/src/DictVectors/projectors.jl +++ b/src/DictVectors/projectors.jl @@ -135,7 +135,8 @@ end """ FrozenDVec -See: [`freeze`](@ref). +A frozen [`DVec`](@ref)(s) can't be modified or used in the conventional manner, but support +faster dot products. See: [`freeze`](@ref). """ struct FrozenDVec{K,V} <: AbstractProjector pairs::Vector{Pair{K,V}} @@ -145,7 +146,7 @@ Base.valtype(::FrozenDVec{<:Any,V}) where {V} = V Base.eltype(::FrozenDVec{K,V}) where {K,V} = Pair{K,V} Base.pairs(fd::FrozenDVec) = fd.pairs -freeze(dv) = FrozenDVec(collect(pairs(localpart(dv)))) +freeze(dv::AbstractDVec) = FrozenDVec(collect(pairs(localpart(dv)))) freeze(p::AbstractProjector) = p diff --git a/src/Interfaces/Interfaces.jl b/src/Interfaces/Interfaces.jl index 69a152feb..644e06c7a 100644 --- a/src/Interfaces/Interfaces.jl +++ b/src/Interfaces/Interfaces.jl @@ -25,6 +25,7 @@ Follow the links for the definitions of the interfaces! * [`deposit!`](@ref) * [`default_style`](@ref) * [`CompressionStrategy`](@ref) +* The interface from [VectorInterface.jl](https://github.com/Jutho/VectorInterface.jl). ## Functions Rimu.jl uses to do FCIQMC: diff --git a/src/Interfaces/dictvectors.jl b/src/Interfaces/dictvectors.jl index a390dda83..56995127f 100644 --- a/src/Interfaces/dictvectors.jl +++ b/src/Interfaces/dictvectors.jl @@ -8,8 +8,9 @@ support the interface from [VectorInterface.jl](https://github.com/Jutho/VectorI and are designed to work well for quantum Monte Carlo with [`lomc!`](@ref Main.lomc!) and for matrix-free linear algebra with [KrylovKit](https://github.com/Jutho/KrylovKit.jl). -Concrete implementations are available as [`DVec`](@ref Main.DictVectors.DVec) and -[`InitiatorDVec`](@ref Main.DictVectors.InitiatorDVec). +Concrete implementations are available as [`PDVec`](@ref Main.DictVectors.PDVec), +[`DVec`](@ref Main.DictVectors.DVec), and [`InitiatorDVec`](@ref +Main.DictVectors.InitiatorDVec). `AbstractDVec`s have a [`StochasticStyle`](@ref) which selects the spawning algorithm in `FCIQMC`. Looking up an element that is not stored in the `AbstractDVec` should return a @@ -54,7 +55,7 @@ end localpart(dv) -> AbstractDVec Get the part of `dv` that is located on this MPI rank. Returns `dv` itself for -`AbstractDVec`s. +vectors that can't be MPI distributed ([`DVec`](@ref Main.DictVectors.DVec)s and [`InitiatorDVec`](@ref Main.DictVectors.InitiatorDVec)s). """ localpart(dv) = dv # default for local data @@ -127,7 +128,8 @@ function apply_operator!( target, working_memory, spawn_stats ) if C - comp_names, comp_stats = compress!(target) + comp_names, _ = step_stats(CompressionStrategy(target)) + comp_stats = compress!(target) names = (spawn_names..., comp_names...) stats = (spawn_stats..., comp_stats...) else diff --git a/src/Interfaces/stochasticstyles.jl b/src/Interfaces/stochasticstyles.jl index e2594ee60..c6058dcb6 100644 --- a/src/Interfaces/stochasticstyles.jl +++ b/src/Interfaces/stochasticstyles.jl @@ -71,8 +71,9 @@ returns a relevant subtype. The default is [`NoCompression`](@ref). When defining a new `CompressionStrategy`, subtype it as `MyCompressionStrategy <: CompressionStrategy` and define these methods: -* [`compress!(s::MyCompressionStrategy, v)`](@ref compress!) -* [`compress!(s::MyCompressionStrategy, w, v)`](@ref compress!) +* [`compress!(s::CompressionStrategy, v)`](@ref compress!) +* [`compress!(s::CompressionStrategy, w, v)`](@ref compress!) +* [`step_stats(s::CompressionStrategy)`](@ref step_stats) """ abstract type CompressionStrategy end @@ -84,6 +85,7 @@ Default [`CompressionStrategy`](@ref). Leaves the vector intact. struct NoCompression <: CompressionStrategy end CompressionStrategy(::StochasticStyle) = NoCompression() +CompressionStrategy(v) = CompressionStrategy(StochasticStyle(v)) """ compress!([::CompressionStrategy,] v) -> ::NTuple{N,::Symbol}, ::NTuple{N} @@ -98,16 +100,21 @@ Returns two tuples, containing the names and values of statistics that are to be compress!(v) = compress!(CompressionStrategy(StochasticStyle(v)), v) compress!(w, v) = compress!(CompressionStrategy(StochasticStyle(v)), w, v) -compress!(::NoCompression, v) = (), () +step_stats(::NoCompression) = (), () + +compress!(::NoCompression, v) = () function compress!(::NoCompression, w, v) - copy!(w, v) - return (), () + for (add, val) in pairs(v) + w[add] = val + end + return () end """ step_stats(::StochasticStyle) + step_stats(::CompressionStrategy) -Return a tuple of names (`Symbol` or `String`) and a tuple of zeros of values of the same +Return a tuple of stat names (`Symbol` or `String`) and a tuple of zeros of the same length. These will be reported as columns in the `DataFrame` returned by [`lomc!`](@ref Main.lomc!). """ step_stats(v) = step_stats(StochasticStyle(v)) diff --git a/src/RMPI/RMPI.jl b/src/RMPI/RMPI.jl index 2234b258b..37319b2f5 100644 --- a/src/RMPI/RMPI.jl +++ b/src/RMPI/RMPI.jl @@ -14,7 +14,8 @@ using Random using StaticArrays using VectorInterface -import ..Interfaces: sort_into_targets! +import Rimu: sort_into_targets! +import ..DictVectors: mpi_rank, mpi_comm, mpi_size export MPIData export mpi_rank, is_mpi_root, @mpi_root, mpi_barrier diff --git a/src/RMPI/mpidata.jl b/src/RMPI/mpidata.jl index eb2e7a876..5038a79d6 100644 --- a/src/RMPI/mpidata.jl +++ b/src/RMPI/mpidata.jl @@ -360,7 +360,7 @@ function get_overlaps_nondiagonal!(names, values, operators, vecs::NTuple{N}, :: end end -function Rimu.all_overlaps(operators::Tuple, vecs::NTuple{N,MPIData}, ::Val{B}) where {N,B} +function Rimu.all_overlaps(operators::Tuple, vecs::NTuple{N,MPIData}, _, ::Val{B}) where {N,B} T = promote_type((valtype(v) for v in vecs)..., eltype.(operators)...) names = String[] values = T[] diff --git a/src/StochasticStyles/compression.jl b/src/StochasticStyles/compression.jl index 5014ffbcf..762a6e12b 100644 --- a/src/StochasticStyles/compression.jl +++ b/src/StochasticStyles/compression.jl @@ -10,28 +10,33 @@ struct ThresholdCompression{T} <: CompressionStrategy end ThresholdCompression() = ThresholdCompression(1) -function _threshold_compress!(t::ThresholdCompression, w, v) - for (add, val) in pairs(v) +_set_value!(w, key, val) = w[key] = val +_set_value!(w::Dict, key, val) = val ≠ 0 ? w[key] = val : delete!(w, key) + +step_stats(::ThresholdCompression) = (:len_before,), (0,) + +function _threshold_compress!(t::ThresholdCompression, w, pairs) + for (add, val) in pairs prob = abs(val) / t.threshold if prob < 1 # projection is only necessary if abs(val) < s.threshold val = ifelse(prob > rand(), t.threshold * sign(val), zero(val)) - w[add] = val end + _set_value!(w, add, val) end end function compress!(t::ThresholdCompression, v) len_before = length(v) v_local = localpart(v) - _threshold_compress!(t, v_local, v_local) - return (:len_before,), (len_before,) + _threshold_compress!(t, v_local, pairs(v_local)) + return (len_before,) end function compress!(t::ThresholdCompression, w, v) - len_before = length(w) + len_before = length(v) w_local = localpart(w) v_local = localpart(v) empty!(w_local) - _threshold_compress!(t, w_local, v_local) - return (:len_before,), (len_before,) + _threshold_compress!(t, w_local, pairs(v_local)) + return (len_before,) end diff --git a/src/StochasticStyles/spawning.jl b/src/StochasticStyles/spawning.jl index 736e9beb6..ac2341185 100644 --- a/src/StochasticStyles/spawning.jl +++ b/src/StochasticStyles/spawning.jl @@ -173,7 +173,7 @@ end @inline function spawn!(s::Exact, w, offdiags::AbstractVector, add, val, boost=1) T = valtype(w) - spawns = sum(offdiags) do (new_add, mat_elem) + spawns = sum(offdiags; init=zero(T)) do (new_add, mat_elem) abs(projected_deposit!( w, new_add, val * mat_elem, add => val, s.threshold )) diff --git a/src/lomc.jl b/src/lomc.jl index 8e7af26a9..187a4b091 100644 --- a/src/lomc.jl +++ b/src/lomc.jl @@ -243,11 +243,18 @@ end Linear operator Monte Carlo: Perform a projector quantum Monte Carlo simulation for determining the lowest eigenvalue of `ham`. `v` can be a single starting vector. The default choice is + +```julia +v = PDVec(starting_address(ham) => 10; style=IsStochasticInteger()) +``` +if threading is available or + ```julia v = DVec(starting_address(ham) => 10; style=IsStochasticInteger()) ``` -and triggers the integer walker FCIQMC algorithm. See [`DVec`](@ref) and -[`StochasticStyle`](@ref). + +otherwise. It triggers the integer walker FCIQMC algorithm. See [`PDVec`](@ref), +[`DVec`](@ref) and [`StochasticStyle`](@ref). # Keyword arguments, defaults, and precedence: @@ -309,7 +316,11 @@ function lomc!(ham, v; df=DataFrame(), name="lomc!", kwargs...) return lomc!(state, df; name) end function lomc!(ham; style=IsStochasticInteger(), kwargs...) - v = DVec(starting_address(ham)=>10; style) + if Threads.nthreads() > 1 + v = PDVec(starting_address(ham) => 10; style) + else + v = DVec(starting_address(ham) => 10; style) + end return lomc!(ham, v; kwargs...) end # For continuation, you can pass a QMCState and a DataFrame diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl index 94ebaf805..c9c5e1767 100644 --- a/src/strategies_and_params/replicastrategy.jl +++ b/src/strategies_and_params/replicastrategy.jl @@ -29,7 +29,7 @@ num_replicas(::ReplicaStrategy{N}) where {N} = N Return the names and values of statistics related to `N` replicas consistent with the [`ReplicaStrategy`](@ref) `RS`. `names` should be a tuple of `Symbol`s or `String`s and `values` should be a tuple of the same -length. This fuction will be called every [`reporting_interval`](@ref) steps from [`lomc!`](@ref), +length. This function will be called every [`reporting_interval`](@ref) steps from [`lomc!`](@ref), or once per time step if `reporting_interval` is not defined. Part of the [`ReplicaStrategy`](@ref) interface. See also [`ReplicaState`](@ref). @@ -67,7 +67,7 @@ interface for implementing operators). # Transformed Hamiltonians If a transformed Hamiltonian `G` has been passed to [`lomc!`](@ref) then overlaps can be calculated by -passing the same transformed Hamiltonian to `AllOverlaps` by setting `transform=G`. A warning is given +passing the same transformed Hamiltonian to `AllOverlaps` by setting `transform=G`. A warning is given if these two Hamiltonians do not match. Implemented transformations are: @@ -75,26 +75,26 @@ Implemented transformations are: * [`GutzwillerSampling`](@ref) * [`GuidingVectorSampling`](@ref) -In the case of a transformed Hamiltonian the overlaps are defined as follows. For a similarity transformation +In the case of a transformed Hamiltonian the overlaps are defined as follows. For a similarity transformation `G` of the Hamiltonian (see e.g. [`GutzwillerSampling`](@ref).) ```math \\hat{G} = f \\hat{H} f^{-1}. ``` The expectation value of an operator ``\\hat{A}`` is ```math - \\langle \\hat{A} \\rangle = \\langle \\psi | \\hat{A} | \\psi \\rangle + \\langle \\hat{A} \\rangle = \\langle \\psi | \\hat{A} | \\psi \\rangle = \\frac{\\langle \\phi | f^{-1} \\hat{A} f^{-1} | \\phi \\rangle}{\\langle \\phi | f^{-2} | \\phi \\rangle} ``` -where +where ```math | \\phi \\rangle = f | \\psi \\rangle -``` +``` is the (right) eigenvector of ``\\hat{G}`` and ``| \\psi \\rangle`` is an eigenvector of ``\\hat{H}``. -For a K-tuple of input operators `(\\hat{A}_1, ..., \\hat{A}_K)`, overlaps of -``\\langle \\phi | f^{-1} \\hat{A} f^{-1} | \\phi \\rangle`` are reported as `c{i}_Op{k}_c{j}`. -The correct vector-vector overlap ``\\langle \\phi | f^{-2} | \\phi \\rangle`` is reported *last* -as `c{i}_Op{K+1}_c{j}`. This is in addition to the *bare* vector-vector overlap +For a K-tuple of input operators `(\\hat{A}_1, ..., \\hat{A}_K)`, overlaps of +``\\langle \\phi | f^{-1} \\hat{A} f^{-1} | \\phi \\rangle`` are reported as `c{i}_Op{k}_c{j}`. +The correct vector-vector overlap ``\\langle \\phi | f^{-2} | \\phi \\rangle`` is reported *last* +as `c{i}_Op{K+1}_c{j}`. This is in addition to the *bare* vector-vector overlap ``\\langle \\phi | f^{-2} | \\phi \\rangle`` that is reported as `c{i}_dot_c{j}`. In either case the `c{i}_dot_c{j}` overlap can be omitted with the flag `vecnorm=false`. @@ -116,7 +116,7 @@ function AllOverlaps(num_replicas=2; operator=nothing, transform=nothing, vecnor else fsq = Rimu.Hamiltonians.TransformUndoer(transform) ops = (map(op -> Rimu.Hamiltonians.TransformUndoer(transform, op), operators)..., fsq) - end + end if !vecnorm && length(ops) == 0 return NoStats(num_replicas) end @@ -127,16 +127,19 @@ end function replica_stats(rs::AllOverlaps{N,<:Any,<:Any,B}, replicas::NTuple{N}) where {N,B} # Not using broadcasting because it wasn't inferred properly. vecs = ntuple(i -> replicas[i].v, Val(N)) - return all_overlaps(rs.operators, vecs, Val(B)) + wms = ntuple(i -> replicas[i].wm, Val(N)) + return all_overlaps(rs.operators, vecs, wms, Val(B)) end """ - all_overlaps(operators, vectors, vecnorm=true) + all_overlaps(operators, vectors, working_memories, vecnorm=true) Get all overlaps between vectors and operators. This function is overloaded for `MPIData`. The flag `vecnorm` can disable the vector-vector overlap `c{i}_dot_c{j}`. """ -function all_overlaps(operators::Tuple, vecs::NTuple{N,AbstractDVec}, ::Val{B}) where {N,B} +function all_overlaps( + operators::Tuple, vecs::NTuple{N,AbstractDVec}, wms, ::Val{B} +) where {N,B} T = promote_type((valtype(v) for v in vecs)..., eltype.(operators)...) names = String[] values = T[] @@ -145,13 +148,20 @@ function all_overlaps(operators::Tuple, vecs::NTuple{N,AbstractDVec}, ::Val{B}) push!(names, "c$(i)_dot_c$(j)") push!(values, dot(vecs[i], vecs[j])) end + if all(isdiag, operators) + v = vecs[i] + else + v = DictVectors.copy_to_local!(wms[i], vecs[i]) + end for (k, op) in enumerate(operators) push!(names, "c$(i)_Op$(k)_c$(j)") - push!(values, dot(vecs[i], op, vecs[j])) + # Using dot_from_right here because dot will try to copy_to_local! if + # called directly. + push!(values, dot_from_right(v, op, vecs[j])) end end - - num_reports = (N * (N - 1) ÷ 2) * (B + length(operators)) + + num_reports = (N * (N - 1) ÷ 2) * (B + length(operators)) return SVector{num_reports,String}(names).data, SVector{num_reports,T}(values).data end @@ -163,7 +173,7 @@ Used as a sanity check before starting main [`lomc!`](@ref) loop. """ function check_transform(r::AllOverlaps, ham::AbstractHamiltonian) ops = r.operators - if !isempty(ops) + if !isempty(ops) op_transform = all(op -> typeof(op)<:Rimu.Hamiltonians.TransformUndoer, ops) ham_transform = hasproperty(ham, :hamiltonian) # need a better test for this if op_transform && ham_transform && !all(op -> ham == op.transform, ops) @@ -175,4 +185,4 @@ function check_transform(r::AllOverlaps, ham::AbstractHamiltonian) end end return nothing -end \ No newline at end of file +end diff --git a/test/DictVectors.jl b/test/DictVectors.jl index e74c99386..71d1031c5 100644 --- a/test/DictVectors.jl +++ b/test/DictVectors.jl @@ -48,8 +48,6 @@ function test_dvec_interface(type; kwargs...) end @testset "properties" begin u = type(:a => 1, :b => 2; kwargs...) - v = type(0.5 => 0.1im; kwargs...) - @test valtype(u) ≡ scalartype(u) ≡ Int @test keytype(u) ≡ Symbol @test eltype(u) ≡ Pair{Symbol,Int} @@ -57,6 +55,7 @@ function test_dvec_interface(type; kwargs...) @test ndims(u) == 1 @test u isa AbstractDVec{Symbol,Int} + v = type(0.5 => 0.1im; kwargs...) @test valtype(v) ≡ scalartype(v) ≡ ComplexF64 @test keytype(v) ≡ Float64 @test eltype(v) ≡ Pair{Float64,ComplexF64} @@ -109,10 +108,18 @@ function test_dvec_interface(type; kwargs...) @test e == zerovector(u) == empty(u) == similar(u) @test e == zerovector!(copy(u)) == zerovector!(copy(u)) == empty!(copy(u)) @test e ≡ zerovector!(e) ≡ zerovector!!(e) ≡ empty!(e) + @test typeof(empty(u)) === typeof(u) + @test typeof(zerovector(u)) === typeof(u) @test scalartype(zerovector(u, Int)) ≡ Int @test scalartype(empty(u, Int)) ≡ Int @test eltype(similar(u, Float64, Int)) ≡ Pair{Float64,Int} + @test eltype(similar(u, String)) ≡ Pair{Int,String} + @test eltype(similar(u, Float64, String)) ≡ Pair{Float64,String} + + v = type(1 => 1; kwargs...) + @test zerovector!!(v, Int) ≡ v + @test zerovector!!(v, Float64) ≢ v end @testset "scale(!)" begin u = type(1 => 1.0 + im, 2 => -2.0im; kwargs...) @@ -134,6 +141,8 @@ function test_dvec_interface(type; kwargs...) w = type(1 => 1; kwargs...) @test scale(w, 1 + im) == (1 + im) * w == type(1 => 1 + im) + @test scale!!(w, 1 + im) ≢ w + @test scale!!(w, 1) ≡ w end @testset "add(!)" begin u = type(45 => 10.0, 12 => 3.5; kwargs...) @@ -149,6 +158,11 @@ function test_dvec_interface(type; kwargs...) @test axpby!(2, u, -7, copy(v)) == x @test u + type(12 => -3.5 + im; kwargs...) == type(45 => 10, 12 => im) + + @test add!!(v, u, 2, -7) ≡ v + @test add!!(v, u, 2 + im, -7) ≢ v + @test add!!(v, u, 2, -7 - im) ≢ v + @test add!!(v, type(1 => im; kwargs...), 2, -7 - im) ≢ v end @testset "inner" begin u = type(zip(1:4, [1, 1.5, im, -im]); kwargs...) @@ -196,8 +210,9 @@ function test_dvec_interface(type; kwargs...) u = type(Dict(ps); kwargs...) - @test reduce(+, values(u); init=1) == sum(vs) + 1 - @test reduce(+, keys(u); init=-1) == sum(ks) - 1 + @test reduce(+, values(u); init=0) == sum(vs) + @test reduce(*, keys(u)) == prod(ks) + @test mapreduce(x -> x[1], +, pairs(u)) == sum(ks) @test mapreduce(x -> x + 1.1, +, values(u)) ≈ sum(x -> x + 1.1, vs) @test mapreduce(abs2, *, keys(u)) == prod(abs2, ks) @@ -207,6 +222,9 @@ function test_dvec_interface(type; kwargs...) @test minimum(abs2, values(u)) == minimum(abs2, vs) @test maximum(x -> x + 1.1, keys(u)) ≈ maximum(x -> x + 1.1, ks) @test prod(p -> p[1] - p[2], pairs(u)) == prod(p -> p[1] - p[2], ps) + + v = type{Int,Int}(; kwargs...) + @test mapreduce(x -> x + 1, +, keys(v); init=0) == 0 end @testset "all, any" begin ks = ['a', 'b', 'c', 'd', 'e', 'f'] @@ -246,6 +264,20 @@ function test_dvec_interface(type; kwargs...) @test StochasticStyle(type(:a => 1 + 2im; kwargs...)) isa IsStochastic2Pop @test StochasticStyle(type(:a => SA[1 1; 1 1]; kwargs...)) isa StyleUnknown end + @testset "Stochastic styles convert eltype" begin + u = type(:a => 0f0; kwargs...) + @test valtype(u) === Float32 + @test StochasticStyle(u) == IsDeterministic{Float32}() + + v = type(:a => 1; style=IsDynamicSemistochastic(), kwargs...) + @test v[:a] isa Float64 + + w = type(:a => 1.0; style=IsStochasticInteger(), kwargs...) + @test w isa type{Symbol,Int} + + x = type(:a => 1.0; style=IsStochastic2Pop(), kwargs...) + @test !isreal(x) + end end end @@ -254,39 +286,135 @@ end test_dvec_interface(DVec; capacity=200) end - @testset "Stochastic styles convert eltype" begin - dvec1 = DVec(:a => 0f0) - @test valtype(dvec1) === Float32 - @test StochasticStyle(dvec1) == IsDeterministic{Float32}() - - dvec2 = DVec(:a => 1, style=IsDynamicSemistochastic()) - @test dvec2[:a] isa Float64 - - dvec3 = DVec(:a => 1.0, style=IsStochasticInteger()) - @test dvec3 isa DVec{Symbol,Int} - - dvec4 = DVec(:a => 1.0, style=IsStochastic2Pop()) - @test !isreal(dvec4) - end end @testset "InitiatorDVec" begin @testset "interface tests" begin test_dvec_interface(InitiatorDVec; capacity=100) + test_dvec_interface(InitiatorDVec; initiator=DictVectors.CoherentInitiator(1)) + end +end + +using Rimu.DictVectors: num_segments, is_distributed + +@testset "PDVec" begin + @testset "constructor errors" begin + @test_throws ArgumentError PDVec(1 => 1; initiator="none") + @test_throws ArgumentError PDVec(1 => 1; communicator="none") end - @testset "Stochastic styles convert eltype" begin - dvec1 = InitiatorDVec(:a => 0f0) - @test valtype(dvec1) === Float32 - @test StochasticStyle(dvec1) == IsDeterministic{Float32}() + @testset "operations" begin + @testset "properties" begin + pd1 = PDVec(zip(1:10, 10:-1.0:1)) + pd2 = PDVec(zip(1:10, 10:-1.0:1)) + pd2[1] += 1e-13 + pd3 = PDVec(zip(1:10, 10:-1.0:1); style=IsDynamicSemistochastic()) + pd4 = PDVec(zip(1:9, 10:-1.0:2)) - dvec2 = InitiatorDVec(:a => 1, style=IsDynamicSemistochastic()) - @test dvec2[:a] isa Float64 + @test num_segments(pd1) == Threads.nthreads() + @test num_segments(pd2) == Threads.nthreads() - dvec3 = InitiatorDVec(:a => 1.0, style=IsStochasticInteger()) - @test dvec3 isa InitiatorDVec{Symbol,Int} + @test StochasticStyle(pd1) ≡ IsDeterministic() + @test StochasticStyle(pd3) ≡ IsDynamicSemistochastic() + + @test length(pd1) == length(pd2) == length(pd3) == 10 + @test pd1 == pd3 + @test pd1 != pd2 + @test pd2 ≈ pd3 + @test pd2 ≉ pd3 atol=1e-16 + @test pd3 != pd4 + + @test length(pd4 - pd3) == 1 + + @test dot(pd1, pd1) == sum(abs2, values(pd1)) + @test dot(pd1, pd3) == sum(abs2, values(pd1)) + + @test !is_distributed(pd1) + @test !is_distributed(values(pd1)) + + @test real(pd1) == pd1 + @test isempty(imag(pd1)) + end - dvec4 = InitiatorDVec(:a => 1.0, style=IsStochastic2Pop()) - @test !isreal(dvec4) + @testset "DVec with PDVec" begin + pv = PDVec(zip(1:10, 10:-1:1)) + dv = PDVec{Int,Int}() + copyto!(dv, pv) + @test dv == pv + @test dot(dv, pv) / (norm(dv) * norm(pv)) ≈ 1 + + add!(pv, dv) + @test pv == 2 * dv + @test 0.5 * pv == dv + end + + @testset "map(!)" begin + pd1 = PDVec(zip(2:2:12, [1, -1, 2, -2, 3, -3])) + pd1_m = map(x -> x + 1, values(pd1)) + @test length(pd1_m) == 5 + @test pd1_m[2] == 2 + @test pd1 ≠ pd1_m + + map!(x -> x + 1, values(pd1)) + @test length(pd1) == 5 + @test pd1[2] == 2 + + @test pd1_m == pd1 + + pd2 = similar(pd1) + map!(x -> x - 2, pd2, values(pd1)) + @test length(pd2) == 4 + @test pd2[6] == 1 + + pd3 = map!(x -> x + 4, pd2, values(pd2)) + @test pd3 === pd2 + @test length(pd2) == 3 + end + + @testset "filter(!)" begin + pd1 = PDVec(zip(1:6, [1, -1, 2, -2, 3, -3])) + pd2 = similar(pd1) + pd2 = filter!(>(0), pd2, values(pd1)) + @test all(>(0), values(pd2)) + @test length(pd2) == 3 + + pd3 = filter(x -> x % 2 == 0, keys(pd1)) + @test all(<(0), values(pd3)) + @test length(pd3) == 3 + + filter!(p -> p[1] - p[2] ≠ 0, pairs(pd1)) + @test length(pd1) == 5 + + filter!(iseven, keys(pd1)) + @test length(pd1) == 3 + + filter!(iseven, pd1, values(pd1)) + @test length(pd1) == 1 + end + + @testset "operator dot" begin + add = FermiFS2C((1,1,0,0), (0,0,1,1)) + H = HubbardMom1D(add) + T = Transcorrelated1D(add) + D = DensityMatrixDiagonal(1) + + dv1 = H * DVec(add => 1.0) + dv2 = T * DVec(add => 1.0) + pv1 = H * PDVec(add => 1.0) + pv2 = T * PDVec(add => 1.0) + wm = PDWorkingMemory(pv1) + + for op in (H, T, D) + @test dot(pv1, op, pv2) ≈ dot(dv1, op, dv2) + @test dot(dv1, op, pv2) ≈ dot(pv1, op, dv2) + + @test dot(pv1, op, pv2, wm) ≈ dot(pv1, op, dv2) + end + end + end + + @testset "interface tests" begin + test_dvec_interface(PDVec) + test_dvec_interface(PDVec; initiator=true) end end diff --git a/test/Interfaces.jl b/test/Interfaces.jl index 753f7b015..310b17937 100644 --- a/test/Interfaces.jl +++ b/test/Interfaces.jl @@ -12,6 +12,9 @@ using Test @test storage(vector) ≡ vector @test localpart(vector) ≡ vector + zerovector!(vector) + @test vector == [0, 0, 0] + ham = [1 0 0; 2 3 0; 5 6 0] @test offdiagonals(ham, 1) == [(2, 2), (3, 5)] @test offdiagonals(ham, 2) == [(3, 6)] diff --git a/test/KrylovKit.jl b/test/KrylovKit.jl index 447bca7aa..3fa1354ba 100644 --- a/test/KrylovKit.jl +++ b/test/KrylovKit.jl @@ -1,4 +1,5 @@ using KrylovKit +using LinearAlgebra using Rimu using Test @@ -13,3 +14,29 @@ using Test @test energy ≈ -4.0215 atol=0.0001 end + +if VERSION ≥ v"1.9" + @testset "KrylovKit Extension" begin + add = FermiFS2C((0,1,0,1,0), (0,0,1,0,0)) + ham_bm = HubbardMom1D(add) + ham_tc = Transcorrelated1D(add) + + true_bm = eigen(Matrix(ham_bm)).values[1] + res1 = eigsolve(ham_bm, DVec(add => 1.0), 1, :SR) + res2 = eigsolve(ham_bm, PDVec(add => 1.0), 1, :SR) + + @test res1[1][1] ≈ true_bm + @test res1[1][1] ≈ res2[1][1] + @test res1[1][1] isa Real + @test res2[1][1] isa Real + + true_tc = eigen(Matrix(ham_tc)).values[1] + res3 = eigsolve(ham_tc, DVec(add => 1), 1, :SR) + res4 = eigsolve(ham_tc, PDVec(add => 1), 1, :SR) + + @test res3[1][1] ≈ true_tc + @test res3[1][1] ≈ res4[1][1] + @test res3[1][1] isa ComplexF64 + @test res4[1][1] isa ComplexF64 + end +end diff --git a/test/StochasticStyles.jl b/test/StochasticStyles.jl index bd75e0470..8b5b706af 100644 --- a/test/StochasticStyles.jl +++ b/test/StochasticStyles.jl @@ -238,15 +238,28 @@ end StochasticStyles.ThresholdCompression(), StochasticStyles.ThresholdCompression(2), ) - target = similar(dv) + target_1 = similar(dv) + target_2 = similar(dv) + dv_before = copy(dv) for _ in 1:1000 - compressed = copy(dv) - StochasticStyles.compress!(compression, compressed) - add!(target, compressed) + compressed_1 = copy(dv) + compressed_2 = similar(dv) + StochasticStyles.compress!(compression, compressed_1) + StochasticStyles.compress!(compression, compressed_2, dv) + @test length(compressed_1) < length(dv) + @test length(compressed_2) < length(dv) + add!(target_1, compressed_1) + add!(target_2, compressed_2) end - scale!(target, 1/1000) - @test walkernumber(target) ≈ walkernumber(dv) rtol=0.1 - @test dot(target, dv) ≈ 1 rtol=0.1 + scale!(target_1, 1/1000) + scale!(target_2, 1/1000) + @test walkernumber(target_1) ≈ walkernumber(dv) rtol=0.1 + @test walkernumber(target_2) ≈ walkernumber(dv) rtol=0.1 + @test dot(target_1, dv) ≈ 1 rtol=0.1 + @test dot(target_2, dv) ≈ 1 rtol=0.1 + + # double check that the dv didn't change during compression. + @test dv_before == dv end end diff --git a/test/lomc.jl b/test/lomc.jl index fa2f59411..313b73085 100644 --- a/test/lomc.jl +++ b/test/lomc.jl @@ -1,6 +1,6 @@ using Rimu using Test -using Rimu.DictVectors: Initiator, SimpleInitiator, CoherentInitiator +using Rimu.DictVectors: Initiator, SimpleInitiator, CoherentInitiator, NonInitiator using Rimu.StochasticStyles: IsStochastic2Pop, Bernoulli, WithoutReplacement using Rimu.StochasticStyles: ThresholdCompression using Rimu.StatsTools @@ -69,9 +69,8 @@ using Logging add = near_uniform(BoseFS{5,15}) H = HubbardReal1D(add) G = GutzwillerSampling(H, g=1) - dv = DVec(add => 1, style=IsDynamicSemistochastic()) - @testset "NoStats" begin + dv = DVec(add => 1, style=IsDynamicSemistochastic()) df, state = lomc!(H, dv; replica=NoStats(1)) @test state.replica == NoStats(1) @test length(state.replicas) == 1 @@ -87,58 +86,66 @@ using Logging @test isnothing(Rimu.check_transform(NoStats(), H)) end + # column names are of the form c{i}_dot_c{j} and c{i}_Op{k}_c{j}. + function num_stats(df) + return length(filter(x -> match(r"^c[0-9]", x) ≠ nothing, names(df))) + end @testset "AllOverlaps" begin - # column names are of the form c{i}_dot_c{j} and c{i}_Op{k}_c{j}. - num_stats(df) = length(filter(x -> match(r"^c[0-9]", x) ≠ nothing, names(df))) - - # No operator: N choose 2 reports. - df, _ = lomc!(H, dv; replica=AllOverlaps(4)) - @test num_stats(df) == binomial(4, 2) - df, _ = lomc!(H, dv; replica=AllOverlaps(5)) - @test num_stats(df) == binomial(5, 2) - - # No vector norm: N choose 2 reports. - df, _ = lomc!(H, dv; replica=AllOverlaps(4; operator=H, vecnorm=false)) - @test num_stats(df) == binomial(4, 2) - df, _ = lomc!(H, dv; replica=AllOverlaps(5; operator=H, vecnorm=false)) - @test num_stats(df) == binomial(5, 2) - - # No operator, no vector norm: 0 reports. - df, _ = lomc!(H, dv; replica=AllOverlaps(4; vecnorm=false)) - @test num_stats(df) == 0 - df, _ = lomc!(H, dv; replica=AllOverlaps(5; vecnorm=false)) - @test num_stats(df) == 0 - - # One operator: 2 * N choose 2 reports. - df, _ = lomc!(H, dv; replica=AllOverlaps(4; operator=H)) - @test num_stats(df) == 2 * binomial(4, 2) - df, _ = lomc!(H, dv; replica=AllOverlaps(5; operator=H)) - @test num_stats(df) == 2 * binomial(5, 2) - - # Two operators: 3 * N choose 2 reports. - df, _ = lomc!(H, dv; replica=AllOverlaps(2; operator=(G, H))) - @test num_stats(df) == 3 * binomial(2, 2) - df, _ = lomc!(H, dv; replica=AllOverlaps(7; operator=(G, H))) - @test num_stats(df) == 3 * binomial(7, 2) - - # Transformed operators: (3 + 1) * N choose 2 reports. - df, _ = lomc!(G, dv; replica=AllOverlaps(2; operator=(H, G), transform=G)) - @test num_stats(df) == 4 * binomial(2, 2) - df, _ = lomc!(G, dv; replica=AllOverlaps(7; operator=(H, G), transform=G)) - @test num_stats(df) == 4 * binomial(7, 2) - - # Check transformation - # good transform - no warning - @test_logs min_level=Logging.Warn Rimu.check_transform(AllOverlaps(; operator=H, transform=G), G) - # no operators - no warning - @test_logs min_level=Logging.Warn Rimu.check_transform(AllOverlaps(;), H) - # Hamiltonian transformed and operators not transformed - @test_logs (:warn, Regex("(Expected overlaps)")) Rimu.check_transform(AllOverlaps(; operator=H), G) - # Hamiltonian not transformed and operators transformed - @test_logs (:warn, Regex("(Expected overlaps)")) Rimu.check_transform(AllOverlaps(; operator=H, transform=G), H) - # Different transformations - @test_logs (:warn, Regex("(not consistent)")) Rimu.check_transform(AllOverlaps(; operator=H, transform=GutzwillerSampling(H, 0.5)), G) + for dv in ( + DVec(add => 1, style=IsDynamicSemistochastic()), + PDVec(add => 1, style=IsDynamicSemistochastic()), + ) + # No operator: N choose 2 reports. + df, _ = lomc!(H, dv; replica=AllOverlaps(4)) + @test num_stats(df) == binomial(4, 2) + df, _ = lomc!(H, dv; replica=AllOverlaps(5)) + @test num_stats(df) == binomial(5, 2) + + # No vector norm: N choose 2 reports. + df, _ = lomc!(H, dv; replica=AllOverlaps(4; operator=H, vecnorm=false)) + @test num_stats(df) == binomial(4, 2) + df, _ = lomc!(H, dv; replica=AllOverlaps(5; operator=H, vecnorm=false)) + @test num_stats(df) == binomial(5, 2) + + # No operator, no vector norm: 0 reports. + df, _ = lomc!(H, dv; replica=AllOverlaps(4; vecnorm=false)) + @test num_stats(df) == 0 + df, _ = lomc!(H, dv; replica=AllOverlaps(5; vecnorm=false)) + @test num_stats(df) == 0 + + # One operator: 2 * N choose 2 reports. + df, _ = lomc!(H, dv; replica=AllOverlaps(4; operator=H)) + @test num_stats(df) == 2 * binomial(4, 2) + df, _ = lomc!(H, dv; replica=AllOverlaps(5; operator=H)) + @test num_stats(df) == 2 * binomial(5, 2) + + # Two operators: 3 * N choose 2 reports. + df, _ = lomc!(H, dv; replica=AllOverlaps(2; operator=(G, H))) + @test num_stats(df) == 3 * binomial(2, 2) + df, _ = lomc!(H, dv; replica=AllOverlaps(7; operator=(G, H))) + @test num_stats(df) == 3 * binomial(7, 2) + + # Transformed operators: (3 + 1) * N choose 2 reports. + df, _ = lomc!(G, dv; replica=AllOverlaps(2; operator=(H, G), transform=G)) + @test num_stats(df) == 4 * binomial(2, 2) + df, _ = lomc!(G, dv; replica=AllOverlaps(7; operator=(H, G), transform=G)) + @test num_stats(df) == 4 * binomial(7, 2) + + # Check transformation + # good transform - no warning + @test_logs min_level=Logging.Warn Rimu.check_transform(AllOverlaps(; operator=H, transform=G), G) + # no operators - no warning + @test_logs min_level=Logging.Warn Rimu.check_transform(AllOverlaps(;), H) + # Hamiltonian transformed and operators not transformed + @test_logs (:warn, Regex("(Expected overlaps)")) Rimu.check_transform(AllOverlaps(; operator=H), G) + # Hamiltonian not transformed and operators transformed + @test_logs (:warn, Regex("(Expected overlaps)")) Rimu.check_transform(AllOverlaps(; operator=H, transform=G), H) + # Different transformations + @test_logs (:warn, Regex("(not consistent)")) Rimu.check_transform(AllOverlaps(; operator=H, transform=GutzwillerSampling(H, 0.5)), G) + end + end + @testset "AllOverlaps special cases" begin # Complex operator v = DVec(1 => 1) G = MatrixHamiltonian(rand(5, 5)) @@ -148,6 +155,7 @@ using Logging @test df.c1_Op1_c2 isa Vector{ComplexF64} # MPIData + dv = DVec(add => 1, style=IsDynamicSemistochastic()) df, _ = lomc!(H, MPIData(dv); replica=AllOverlaps(4; operator=H)) @test num_stats(df) == 2 * binomial(4, 2) df, _ = lomc!(H, MPIData(dv); replica=AllOverlaps(5; operator=DensityMatrixDiagonal(1))) @@ -249,7 +257,7 @@ using Logging @testset "Setting `maxlength`" begin add = BoseFS{15,10}((0,0,0,0,0,15,0,0,0,0)) H = HubbardMom1D(add; u=6.0) - dv = DVec(add => 1; style=IsDynamicSemistochastic()) + dv = PDVec(add => 1; style=IsDynamicSemistochastic()) Random.seed!(1336) @@ -274,7 +282,7 @@ using Logging add = BoseFS{5,5}((1,1,1,1,1)) H = HubbardReal1D(add; u=0.5) # Using Deterministic to get exact same result - dv = DVec(add => 1.0, style=IsDeterministic()) + dv = PDVec(add => 1.0, style=IsDeterministic()) # Run lomc!, then change laststep and continue. df, state = lomc!(H, copy(dv)) @@ -292,7 +300,7 @@ using Logging @testset "Reporting" begin add = BoseFS((1,2,1,1)) H = HubbardReal1D(add; u=2) - dv = DVec(add => 1, style=IsDeterministic()) + dv = PDVec(add => 1, style=IsDeterministic()) @testset "ReportDFAndInfo" begin r_strat = ReportDFAndInfo(reporting_interval=5, info_interval=10, io=devnull, writeinfo=true) @@ -541,6 +549,8 @@ end @test "len_before" ∉ names(df_th) @test "len_before" ∉ names(df_cx) @test "len_before" ∉ names(df_de) + @test all(>(0), df_dp.len_before) + @test all(df_dp.len_before .≥ df_dp.len) E_st, σ_st = mean_and_se(df_st.shift[500:end]) E_th, σ_th = mean_and_se(df_th.shift[500:end]) @@ -590,6 +600,11 @@ end initiator=CoherentInitiator(1), style=IsDynamicSemistochastic(), ) + dv_ni = InitiatorDVec( + add => 1; + initiator=NonInitiator(), + style=IsDynamicSemistochastic(), + ) @testset "Energies below the plateau & initiator bias" begin Random.seed!(8008) @@ -604,14 +619,18 @@ end df_i1 = lomc!(H, copy(dv_i1); s_strat, laststep, dτ).df df_i2 = lomc!(H, copy(dv_i2); s_strat, laststep, dτ).df df_i3 = lomc!(H, copy(dv_i3); s_strat, laststep, dτ).df + df_ni = lomc!(H, copy(dv_ni); s_strat, laststep, dτ).df E_no, σ_no = mean_and_se(df_no.shift[2000:end]) E_i1, σ_i1 = mean_and_se(df_i1.shift[2000:end]) E_i2, σ_i2 = mean_and_se(df_i2.shift[2000:end]) E_i3, σ_i3 = mean_and_se(df_i3.shift[2000:end]) + E_ni, σ_ni = mean_and_se(df_ni.shift[2000:end]) # Garbage energy from no initiator. @test E_no < E0 + @test E_ni < E0 + @test E_no ≈ E_ni atol=3 * σ_no # Initiator has a bias. @test E_i1 > E0 @test E_i2 > E0 @@ -636,17 +655,20 @@ end df_i1 = lomc!(H, copy(dv_i1); s_strat, laststep, dτ).df df_i2 = lomc!(H, copy(dv_i2); s_strat, laststep, dτ).df df_i3 = lomc!(H, copy(dv_i3); s_strat, laststep, dτ).df + df_ni = lomc!(H, copy(dv_ni); s_strat, laststep, dτ).df E_no, σ_no = mean_and_se(df_no.shift[500:end]) E_i1, σ_i1 = mean_and_se(df_i1.shift[500:end]) E_i2, σ_i2 = mean_and_se(df_i2.shift[500:end]) E_i3, σ_i3 = mean_and_se(df_i3.shift[500:end]) + E_ni, σ_ni = mean_and_se(df_ni.shift[500:end]) # All estimates should be fairly good. @test E_no ≈ E0 atol=3σ_no @test E_i1 ≈ E0 atol=3σ_i1 @test E_i2 ≈ E0 atol=3σ_i2 @test E_i3 ≈ E0 atol=3σ_i3 + @test E_ni ≈ E0 atol=3σ_ni end end end diff --git a/test/mpi_runtests.jl b/test/mpi_runtests.jl index 3b32a961a..d445c33f7 100644 --- a/test/mpi_runtests.jl +++ b/test/mpi_runtests.jl @@ -1,10 +1,10 @@ using LinearAlgebra -using MPI using Random using Rimu -using StaticArrays -using Statistics using Test +using KrylovKit +using StaticArrays +using MPI using Rimu.RMPI using Rimu.StatsTools @@ -36,13 +36,18 @@ function correct_ranks(md) end end -mpi_allprintln("hello") +@mpi_root @info "Running MPI tests..." +RMPI.mpi_allprintln("hello") -# Ignore all printing on ranks other than root. -if mpi_rank() != mpi_root +# Ignore all printing on ranks other than root. Passing an argument to this script disables +# this. +if isnothing(get(ARGS, 1, nothing)) && mpi_rank() != mpi_root redirect_stderr(devnull) redirect_stdout(devnull) end +if !isnothing(get(ARGS, 1, nothing)) + @mpi_root @info "Debug printing enabled" +end """ setup_dv(type, args...; kwargs...) @@ -192,7 +197,6 @@ end ) target = MPIData(similar(source); kw...) RMPI.mpi_combine_walkers!(target, source) - #@test norm(source) ≈ norm(target) return target end @@ -213,6 +217,95 @@ end end end + @testset "PDVec" begin + add = FermiFS2C((1,1,1,0,0,0),(1,1,1,0,0,0)) + ham = HubbardRealSpace(add) + + dv = DVec(add => 1.0) + pv = PDVec(add => 1.0) + + res_dv = eigsolve(ham, dv, 1, :SR) + res_pv = eigsolve(ham, pv, 1, :SR) + + @test res_dv[1][1] ≈ res_pv[1][1] + + dv = copy(res_dv[2][1]) + pv = copy(res_pv[2][1]) + @test norm(pv) ≈ 1 + @test length(pv) == length(dv) + @test sum(values(pv)) ≈ sum(values(dv)) + normalize!(pv, 1) + @test norm(pv, 1) ≈ 1 + rmul!(pv, 2) + @test norm(pv, 1) ≈ 2 + + pv1 = copy(res_pv[2][1]) + pv2 = mul!(similar(pv), ham, pv) + + @test dot(pv2, pv1) ≈ dot(pv2, dv) + @test dot(pv1, pv2) ≈ dot(dv, pv2) + @test dot(freeze(pv2), pv1) ≈ dot(pv2, pv1) + @test dot(pv1, freeze(pv2)) ≈ dot(pv1, pv2) + + if mpi_size() > 1 + @test pv.communicator isa DictVectors.PointToPoint + @test mpi_size() == mpi_size(pv.communicator) + @test_throws DictVectors.CommunicatorError iterate(pairs(pv)) + + local_pairs = collect(pairs(localpart(pv))) + local_vals = sum(abs2, values(localpart(pv))) + + total_len = MPI.Allreduce(length(local_pairs), +, MPI.COMM_WORLD) + total_vals = MPI.Allreduce(local_vals, +, MPI.COMM_WORLD) + + @test total_len == length(dv) + @test total_vals == sum(abs2, values(pv)) + end + + @testset "dot" begin + add = BoseFS((0,0,10,0,0)) + H = HubbardMom1D(add) + D = DensityMatrixDiagonal(1) + + # Need to seed here to get the same random vectors on all ranks. + Random.seed!(1) + pairs_v = [BoseFS(rand_onr(10, 5)) => 2 - 4rand() for _ in 1:100] + pairs_w = [BoseFS(rand_onr(10, 5)) => 2 - 4rand() for _ in 1:20] + + v = DVec(pairs_v) + w = DVec(pairs_w) + pv = PDVec(pairs_v) + pw = PDVec(pairs_w) + + @test norm(v) ≈ norm(pv) + @test length(w) == length(pw) + + @test dot(v, w) ≈ dot(pv, pw) + + @test dot(freeze(pw), pv) ≈ dot(w, v) ≈ dot(pw, freeze(pv)) + @test dot(freeze(pv), pw) ≈ dot(v, w) ≈ dot(pv, freeze(pw)) + wm = PDWorkingMemory(pv) + + for op in (H, D) + @test dot(v, op, w) ≈ dot(pv, op, pw) + @test dot(w, op, v) ≈ dot(pw, op, pv) + + @test dot(v, op, w) ≈ dot(pv, op, pw, wm) + @test dot(w, op, v) ≈ dot(pw, op, pv, wm) + + pu = op * pv + u = op * v + @test length(u) == length(pu) + @test norm(u, 1) ≈ norm(pu, 1) + @test norm(u, 2) ≈ norm(pu, 2) + @test norm(u, Inf) ≈ norm(pu, Inf) + end + + @test dot(pv, (H, D), pw, wm) == (dot(pv, H, pw), dot(pv, D, pw)) + @test dot(pv, (H, D), pw) == (dot(pv, H, pw), dot(pv, D, pw)) + end + end + @testset "Ground state energy estimates" begin # H1 = HubbardReal1D(BoseFS((1,1,1,1,1,1,1)); u=6.0) # E0 = eigvals(Matrix(H1))[1] @@ -223,14 +316,19 @@ end (RMPI.mpi_point_to_point, (;)), (RMPI.mpi_all_to_all, (;)), (RMPI.mpi_one_sided, (; capacity=1000)), + (:PDVec, (;)), ) @testset "Regular with $setup and post-steps" begin - H = HubbardReal1D(BoseFS((1, 1, 1, 1, 1, 1, 1)); u=6.0) - dv = MPIData( - DVec(starting_address(H) => 3; style=IsDynamicSemistochastic()); - setup, - kwargs... - ) + H = HubbardReal1D(BoseFS((1,1,1,1,1,1,1)); u=6.0) + if setup == :PDVec + dv = PDVec(starting_address(H) => 3; style=IsDynamicSemistochastic()) + else + dv = MPIData( + DVec(starting_address(H) => 3; style=IsDynamicSemistochastic()); + setup, + kwargs... + ) + end post_step = ( ProjectedEnergy(H, dv), @@ -242,7 +340,7 @@ end # Shift estimate. Es, σs = mean_and_se(df.shift[2000:end]) - s_low, s_high = Es - 2σs, Es + 2σs + s_low, s_high = Es - 3σs, Es + 3σs # Projected estimate. r = ratio_of_means(df.hproj[2000:end], df.vproj[2000:end]) p_low, p_high = pquantile(r, [0.0015, 0.9985]) @@ -253,12 +351,14 @@ end @test all(0 .≤ df.loneliness .≤ 1) end @testset "Initiator with $setup" begin - H = HubbardMom1D(BoseFS((0, 0, 0, 7, 0, 0, 0)); u=6.0) - dv = MPIData( - InitiatorDVec(starting_address(H) => 3); - setup, - kwargs... - ) + H = HubbardMom1D(BoseFS((0,0,0,7,0,0,0)); u=6.0) + add = starting_address(H) + + if setup == :PDVec + dv = PDVec(add => 3; initiator_threshold=1) + else + dv = MPIData(InitiatorDVec(add => 3); setup, kwargs...) + end s_strat = DoubleLogUpdate(targetwalkers=100) df = lomc!(H, dv; laststep=5000, s_strat).df @@ -266,6 +366,59 @@ end Es, _ = mean_and_se(df.shift[2000:end]) @test E0 ≤ Es end + for initiator in (true, false) + if setup === RMPI.mpi_one_sided + # Skip one sided here, because for some reason blocking fails + @warn "Skipping one-sided" + continue + end + @testset "AllOverlaps with $setup and initiator=$initiator" begin + H = HubbardMom1D(BoseFS((0,0,5,0,0))) + add = starting_address(H) + N = num_particles(add) + M = num_modes(add) + + if setup == :PDVec + dv = PDVec(add => 3; style=IsDynamicSemistochastic(), initiator) + elseif initiator + dv = MPIData(InitiatorDVec( + add => 3; style=IsDynamicSemistochastic() + ); setup, kwargs...) + else + dv = MPIData(DVec( + add => 3; style=IsDynamicSemistochastic() + ); setup, kwargs...) + end + + # Diagonal + replica = AllOverlaps(2; operator=ntuple(DensityMatrixDiagonal, M)) + df,_ = lomc!(H, dv; replica, laststep=10_000) + + density_sum = sum(1:M) do i + top = df[!, Symbol("c1_Op", i, "_c2")] + bot = df.c1_dot_c2 + pmean(ratio_of_means(top, bot; skip=5000)) + end + @test density_sum ≈ N rtol=1e-3 + + # Not Diagonal + ops = ntuple(x -> G2MomCorrelator(x - cld(M, 2)), M) + replica = AllOverlaps(2; operator=ops) + df,_ = lomc!(H, dv; replica, laststep=10_000) + + g2s = map(1:M) do i + top = df[!, Symbol("c1_Op", i, "_c2")] + bot = df.c1_dot_c2 + pmean(ratio_of_means(top, bot; skip=5000)) + end + for i in 1:cld(M, 2) + @test real(g2s[i]) ≈ real(g2s[end - i + 1]) rtol=1e-3 + @test imag(g2s[i]) ≈ -imag(g2s[end - i + 1]) rtol=1e-3 + end + @test real(sum(g2s)) ≈ N^2 rtol=1e-2 + @test imag(sum(g2s)) ≈ 0 atol=1e-3 + end + end end end