Merge pull request #8 from mtsch/gradient-descent

Gradient descent, Jastrow ansatz, README
RimuQMC · Aug 14, 2024 · 6bdaeb6 · 6bdaeb6
2 parents f29c81c + 889718c
commit 6bdaeb6
Show file tree

Hide file tree

Showing 13 changed files with 1,002 additions and 38 deletions.
diff --git a/Project.toml b/Project.toml
@@ -22,9 +22,9 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
 
 [extras]
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]

diff --git a/README.md b/README.md
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -0,0 +1,4 @@
+[deps]
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
+Rimu = "c53c40cc-bd84-11e9-2cf4-a9fde2b9386e"
diff --git a/docs/README-21.png b/docs/README-21.png
diff --git a/docs/README-41.png b/docs/README-41.png
diff --git a/docs/README.jl b/docs/README.jl
@@ -0,0 +1,191 @@
+# # Gutzwiller
+# _importance sampling and variational Monte Carlo for
+# [Rimu.jl](https://github.com/joachimbrand/Rimu.jl)_
+#
+# ## Installation
+#
+# Gutzwiller.jl is not yet registered. To install it, run
+#
+# ```julia
+# import Pkg; Pkg.add("https://github.com/mtsch/Gutzwiller.jl")
+# ```
+#
+
+# ## Usage guide
+#
+#
+using Rimu
+using Gutzwiller
+using CairoMakie
+using LaTeXStrings
+
+# First, we set up a starting address and a Hamiltonian
+
+addr = near_uniform(BoseFS{10,10})
+H = HubbardReal1D(addr; u=2.0)
+
+# In this example, we'll set up a Gutzwiller ansatz to importance-sample the Hamiltonian.
+
+ansatz = GutzwillerAnsatz(H)
+
+# An ansatz is a struct that given a set of parameters and an address, produces the value
+# it would have if it was a vector.
+
+ansatz(addr, [1.0])
+
+# In addition, the function `val_and_grad` can be used to compute both the value and its
+# gradient with respect to the parameters.
+
+val_and_grad(ansatz, addr, [1.0])
+
+# ### Deterministic optimization
+
+# For effective importance sampling, we want the ansatz to be as good of an approximation to
+# the ground state of the Hamiltonian as possible. As the value of the Rayleigh quotient of
+# a given ansatz is always larger than the Hamiltonian's ground state energy, we can use
+# an optimization algorithm to find the paramters that minimize its energy.
+
+# When the basis of the Hamiltonian is small enough to fit into memory, it's best to use the
+# `LocalEnergyEvaluator`
+
+le = LocalEnergyEvaluator(H, ansatz)
+
+# which can be used to evaulate the value of the Rayleigh quotient (or its gradient) for
+# given parameters. In the case of the Gutzwiller ansatz, there is only one parameter.
+
+le([1.0])
+
+# Like before, we can use `val_and_grad` to also evaluate its gradient.
+
+val_and_grad(le, [1.0])
+
+# Now, let's plot the energy landscape for this particular case
+
+begin
+    fig = Figure()
+    ax = Axis(fig[1, 1]; xlabel=L"p", ylabel=L"E")
+    ps = range(0, 2; length=100)
+    Es = [le([p]) for p in ps]
+    lines!(ax, ps, Es)
+    fig
+end
+
+# To find the minimum, pass `le` to `optimize` from Optim.jl
+
+using Optim
+
+opt_nelder = optimize(le, [1.0])
+
+# To take advantage of the gradients, wrap the evaluator in `Optim.only_fg!`. This will
+# usually reduce the number of steps needed to reach the minimum.
+
+opt_lbgfs = optimize(Optim.only_fg!(le), [1.0])
+
+# We can inspect the parameters and the value at the minimum as
+
+opt_lbgfs.minimizer, opt_lbgfs.minimum
+
+# ### Variational quantum Monte Carlo
+
+# When the Hamiltonian is too large to store its full basis in memory, we can use
+# variational QMC to sample addresses from the Hilbert space and evaluate their energy
+# at the same time. An important paramter we have tune is the number `steps`. More steps
+# will give us a better approximation of the energy, but take longer to evaluate.
+# Not taking enough samples can also result in producing a biased result.
+# Consider the following.
+
+p0 = [1.0]
+kinetic_vqmc(H, ansatz, p0; steps=1e2) #hide
+@time kinetic_vqmc(H, ansatz, p0; steps=1e2)
+
+#
+
+@time kinetic_vqmc(H, ansatz, p0; steps=1e5)
+
+#
+
+@time kinetic_vqmc(H, ansatz, p0; steps=1e7)
+
+# For this simple example, `1e2` steps gives an energy that is significantly higher, while
+# `1e7` takes too long. `1e5` seems to work well enough. For more convenient evaluation, we
+# wrap VQMC into a struct that behaves much like the `LocalEnergyEvaluator`.
+
+qmc = KineticVQMC(H, ansatz; samples=1e4)
+
+#
+
+qmc([1.0]), le([1.0])
+
+# Because the output of this procedure is noisy, optimizing it with Optim.jl will not work.
+# However, we can use a stochastic gradient descent (in this case
+# [AMSGrad](https://paperswithcode.com/method/amsgrad)).
+
+grad_result = amsgrad(qmc, [1.0])
+
+# While `amsgrad` attempts to determine if the optimization converged, it will generally not
+# detect convergence due to the noise in the QMC evaluation. The best way to determine
+# convergence is to plot the results. `grad_result` can be converted to a `DataFrame`.
+
+grad_df = DataFrame(grad_result)
+
+# To plot it, we can either work with the `DataFrame` or access the fields directly
+
+begin
+    fig = Figure()
+    ax1 = Axis(fig[1, 1]; xlabel=L"i", ylabel=L"E_v")
+    ax2 = Axis(fig[2, 1]; xlabel=L"i", ylabel=L"p")
+
+    lines!(ax1, grad_df.iter, grad_df.value)
+    lines!(ax2, first.(grad_result.param))
+    fig
+end
+
+# We see from the plot that the value of the energy is fluctiating around what appears to be
+# the minimum. While the parameter estimate here is probably good enough for importance
+# sampling, we can refine the result by creating a new `KineticVQMC` structure with
+# increased samples and use it to refine the result.
+
+qmc2 = KineticVQMC(H, ansatz; samples=1e6)
+grad_result2 = amsgrad(qmc2, grad_result.param[end])
+
+# Now, let's plot the refined result next to the minimum found by Optim.jl
+
+begin
+    fig = Figure()
+    ax1 = Axis(fig[1, 1]; xlabel=L"i", ylabel=L"E_v")
+    ax2 = Axis(fig[2, 1]; xlabel=L"i", ylabel=L"p")
+
+    lines!(ax1, grad_result2.value)
+    hlines!(ax1, [opt_lbgfs.minimum]; linestyle=:dot)
+    lines!(ax2, first.(grad_result2.param))
+    hlines!(ax2, opt_lbgfs.minimizer; linestyle=:dot)
+    fig
+end
+
+# ### Importance sampling
+
+# Finally, we have a good estimate for the parameter to use with importance
+# sampling. Gutzwiller.jl provides `AnsatzSampling`, which is similar to
+# `GutzwillerSampling` from Rimu, but can be used with different ansatze.
+
+p = grad_result.param[end]
+G = AnsatzSampling(H, ansatz, p)
+
+# This can now be used with FCIQMC. Let's compare the importance sampled time series to a
+# non-importance sampled one.
+
+using Random; Random.seed!(1337) #hide
+
+prob_standard = ProjectorMonteCarloProblem(H; target_walkers=15, last_step=2000)
+sim_standard = solve(prob_standard)
+shift_estimator(sim_standard; skip=1000)
+
+#
+
+prob_sampled = ProjectorMonteCarloProblem(G; target_walkers=15, last_step=2000)
+sim_sampled = solve(prob_sampled)
+shift_estimator(sim_sampled; skip=1000)
+
+# Note that the lower energy estimate in the sampled case is probably due to a reduced
+# population control bias. The effect importance sampling has on the statistic can be more
+# dramatic for larger systems and beter choices of anstaze.
diff --git a/docs/make.jl b/docs/make.jl
@@ -0,0 +1,10 @@
+for fn in EXAMPLES_FILES
+    fnmd_full = Literate.markdown(
+        joinpath(EXAMPLES_INPUT, fn), EXAMPLES_OUTPUT;
+        documenter = true, execute = true
+        )
+    ex_num, margintitle = parse_header(fnmd_full)
+    push!(EXAMPLES_NUMS, ex_num)
+    fnmd = fn[1:end-2]*"md"     # full path does not work
+    push!(EXAMPLES_PAIRS, margintitle => joinpath("generated", fnmd))
+end
diff --git a/src/Gutzwiller.jl b/src/Gutzwiller.jl
@@ -13,7 +13,7 @@ using SpecialFunctions
 using Tables
 using VectorInterface
 
-import Rimu.Hamiltonians.extended_bose_hubbard_interaction as ebh
+import Rimu.Hamiltonians.extended_hubbard_interaction as ebh
 
 export num_parameters, val_and_grad
 include("ansatz/abstract.jl")
@@ -25,9 +25,10 @@ export ExtendedGutzwillerAnsatz
 include("ansatz/extgutzwiller.jl")
 export MultinomialAnsatz
 include("ansatz/multinomial.jl")
+export JastrowAnsatz, RelativeJastrowAnsatz
+include("ansatz/jastrow.jl")
 export ExtendedAnsatz
 include("ansatz/combination.jl")
-export kinetic_vqmc, kinetic_vqmc!, local_energy_estimator, KineticVQMC
 
 export LocalEnergyEvaluator
 include("localenergy.jl")
@@ -39,7 +40,7 @@ include("kinetic-vqmc/state.jl")
 include("kinetic-vqmc/qmc.jl")
 include("kinetic-vqmc/wrapper.jl")
 
-# Not done yet.
+export gradient_descent, amsgrad
 include("amsgrad.jl")