Merge branch 'cuda3' into threadgpu

marius311 · Apr 15, 2021 · cc42d60 · cc42d60
2 parents d2fa591 + 015850b
commit cc42d60
Show file tree

Hide file tree

Showing 25 changed files with 783 additions and 483 deletions.
diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml
@@ -5,10 +5,9 @@ on: [push, pull_request]
 jobs:
   main:
     runs-on: ${{ matrix.os }}
-    continue-on-error: ${{ matrix.experimental == 'nightly' }}
     strategy:
       matrix:
-        julia-version: ['1.5', '1.6-nightly']
+        julia-version: ['1.6', '1.7-nightly']
         os: [ubuntu-latest]
         JULIA_FFTW_PROVIDER: ['MKL','FFTW']
       fail-fast: false
@@ -25,5 +24,6 @@ jobs:
           && pip3 install camb
       - uses: julia-actions/julia-runtest@master
         timeout-minutes: 20
+        continue-on-error: ${{ matrix.julia-version == '1.7-nightly' }}
         env:
           JULIA_FFTW_PROVIDER: ${{ matrix.JULIA_FFTW_PROVIDER }}
diff --git a/Dockerfile b/Dockerfile
@@ -28,7 +28,7 @@ RUN apt-get update \
 
 ## install julia
 RUN mkdir /opt/julia \
-    && curl -L https://julialang-s3.julialang.org/bin/linux/x64/1.5/julia-1.5.3-linux-x86_64.tar.gz | tar zxf - -C /opt/julia --strip=1 \
+    && curl -L https://julialang-s3.julialang.org/bin/linux/x64/1.6/julia-1.6.0-linux-x86_64.tar.gz | tar zxf - -C /opt/julia --strip=1 \
     && chown -R 1000 /opt/julia \
     && ln -s /opt/julia/bin/julia /usr/local/bin
 
@@ -50,10 +50,12 @@ RUN curl https://pyenv.run | bash \
     && pyenv global 3.7.3
 
 ## install Python packages
+# see https://github.com/jupyter/jupyter_client/issues/637 re: jupyter-client==6.1.12
 RUN pip install --no-cache-dir \
         cython \
         julia \
-        jupyterlab==3 \
+        "jupyterlab>=3" \
+        jupyter-client==6.1.12 \
         jupytext \
         matplotlib \
         "nbconvert<6" \
@@ -88,7 +90,7 @@ COPY --chown=1000 Project.toml $HOME/CMBLensing/
 COPY --chown=1000 docs/Project.toml $HOME/CMBLensing/docs/
 RUN mkdir $HOME/CMBLensing/src && touch $HOME/CMBLensing/src/CMBLensing.jl
 ENV JULIA_PROJECT=$HOME/CMBLensing/docs
-RUN julia -e 'using Pkg; pkg"dev ~/CMBLensing; instantiate"' \
+RUN JULIA_PKG_PRECOMPILE_AUTO=0 julia -e 'using Pkg; pkg"dev ~/CMBLensing; instantiate"' \
     && rm -rf $HOME/.julia/conda/3/pkgs
 COPY --chown=1000 src $HOME/CMBLensing/src
 RUN (test $PRECOMPILE = 0 || julia -e 'using Pkg; pkg"precompile"')

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "CMBLensing"
 uuid = "b60c06c0-7e54-11e8-3788-4bd722d65317"
 authors = ["marius <[email protected]>"]
-version = "0.5.0"
+version = "0.5.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -53,7 +53,7 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 [compat]
 AbstractFFTs = "0.5, 1"
 Adapt = "1.0.1, 2, 3"
-CUDA = "2.1"
+CUDA = "3"
 Combinatorics = "1"
 DataStructures = "0.17.9, 0.18"
 FFTW = "1.2"
@@ -83,5 +83,5 @@ StaticArrays = "0.12.1, 1.0"
 StatsBase = "0.32, 0.33"
 TimerOutputs = "0.5"
 UnPack = "1"
-Zygote = "0.4.14, 0.5, 0.6"
-julia = "1.5"
+Zygote = "0.6.9"
+julia = "1.6"
diff --git a/README.md b/README.md
@@ -9,11 +9,11 @@
 CMBLensing.jl is a next-generation tool for analysis of the lensed Cosmic Microwave Background. It is written in [Julia](https://julialang.org/) and transparently callable from Python.
 
 
-At its heart, CMBLensing.jl maximizes or samples the Bayesian posterior for the CMB lensing problem. It also contains tools to quickly manipulate and process CMB maps, set up modified posteriors, and take gradients using automatic differentation.
+At its heart, CMBLensing.jl maximizes or samples the Bayesian posterior for the CMB lensing problem. It also contains tools to quickly manipulate and process CMB maps, set up modified posteriors, and take gradients using automatic differentiation.
 
 ### Highlights
-* Fully Nvidia GPU compatible (speedups over CPU are currently 3x-10x, depending on the problem size and hardware).
-* Automatic differentation (via [Zygote.jl](https://fluxml.ai/Zygote.jl/)) provides for-free gradients of your custom posteriors.
+* Fully Nvidia GPU compatible (1-2 order of magnitude speedups over CPU, depending on the problem size and hardware).
+* Automatic differentiation (via [Zygote.jl](https://fluxml.ai/Zygote.jl/)) provides for-free gradients of your custom posteriors.
 * Includes the following algorithms to lense a map:
     * `LenseFlow` ([Millea, Anderes, & Wandelt 2017](https://arxiv.org/abs/1708.06753))
     * `Taylens` ([Næss & Louis 2013](https://arxiv.org/abs/1307.0719))
@@ -29,14 +29,14 @@ The best place to get started is to read the [documentation](https://cosmicmar.c
 
 Most of the pages in the documentation are Jupyter notebooks, and you can click the "launch binder" link at the top of each page to launch a Jupyterlab server running the notebook in your browser (courtesy of [binder](https://mybinder.org/)). 
 
-You can also clone the repostiory and open the notebooks in [docs/src](https://github.com/marius311/CMBLensing.jl/tree/master/docs/src) if you want to run them locally (which will usually lead to higher performance). The notebooks are stored as `.md` files rather than `.ipynb` format. Its recommented to install [Jupytext](jupytext) (`pip install jupytext`) and then you can run these `.md` directly from Jupyterlab by right-clicking on them and selecting `Open With -> Notebook`. Otherwise, run the script `docs/make_notebooks.sh` to convert the `.md` files to `.ipynb` which you can then open as desired. 
+You can also clone the repostiory and open the notebooks in [docs/src](https://github.com/marius311/CMBLensing.jl/tree/master/docs/src) if you want to run them locally (which will usually lead to higher performance). The notebooks are stored as `.md` files rather than `.ipynb` format. Its recommended to install [Jupytext](jupytext) (`pip install jupytext`) and then you can run these `.md` directly from Jupyterlab by right-clicking on them and selecting `Open With -> Notebook`. Otherwise, run the script `docs/make_notebooks.sh` to convert the `.md` files to `.ipynb` which you can then open as desired. 
 
 
 ## Installation
 
 ### Requirements
 
-* Julia 1.5
+* Julia 1.6
 * _(recommended)_ An Nvidia GPU and [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) for GPU support
 * _(recommended)_ FFTW.jl built with [`JULIA_FFTW_PROVIDER=MKL`](https://juliamath.github.io/FFTW.jl/stable/#Installation-1) for faster CPU FFTs 
 * _(recommended)_ Python 3 + matplotlib (used for plotting)

diff --git a/docs/src/01_lense_a_map.md b/docs/src/01_lense_a_map.md
@@ -8,9 +8,9 @@ jupyter:
       format_version: '1.2'
       jupytext_version: 1.4.1
   kernelspec:
-    display_name: Julia 1.5.3
+    display_name: Julia 1.6.0
     language: julia
-    name: julia-1.5
+    name: julia-1.6
 ---
 
 # Lensing a flat-sky map

diff --git a/docs/src/02_posterior.md b/docs/src/02_posterior.md
@@ -8,9 +8,9 @@ jupyter:
       format_version: '1.2'
       jupytext_version: 1.4.1
   kernelspec:
-    display_name: Julia 1.5.3
+    display_name: Julia 1.6.0
     language: julia
-    name: julia-1.5
+    name: julia-1.6
 ---
 
 # The Lensing Posterior

diff --git a/docs/src/03_joint_MAP_example.md b/docs/src/03_joint_MAP_example.md
@@ -7,9 +7,9 @@ jupyter:
       format_version: '1.2'
       jupytext_version: 1.4.1
   kernelspec:
-    display_name: Julia 1.5.3
+    display_name: Julia 1.6.0
     language: julia
-    name: julia-1.5
+    name: julia-1.6
 ---
 
 # MAP estimation

diff --git a/docs/src/05_field_basics.md b/docs/src/05_field_basics.md
@@ -7,9 +7,9 @@ jupyter:
       format_version: '1.2'
       jupytext_version: 1.4.1
   kernelspec:
-    display_name: Julia 1.5.3
+    display_name: Julia 1.6.0
     language: julia
-    name: julia-1.5
+    name: julia-1.6
 ---
 
 # Field Basics

diff --git a/src/CMBLensing.jl b/src/CMBLensing.jl
@@ -4,19 +4,21 @@ module CMBLensing
 using Adapt
 using Base.Broadcast: AbstractArrayStyle, ArrayStyle, Broadcasted,
     DefaultArrayStyle, preprocess_args, Style, result_style, Unknown
-using Base.Iterators: flatten, product, repeated, cycle, countfrom, peel
+using Base.Iterators: flatten, product, repeated, cycle, countfrom, peel, partition
 using Base.Threads
 using Base: @kwdef, @propagate_inbounds, Bottom, OneTo, showarg, show_datatype,
     show_default, show_vector, typed_vcat, typename
 using Combinatorics
 using DataStructures
 using DelimitedFiles
-using Distributed: pmap, nworkers, myid, workers, addprocs, @everywhere, remotecall_wait, @spawnat, pgenerate, procs, @fetchfrom
+using Distributed: pmap, nworkers, myid, workers, addprocs, @everywhere, remotecall_wait, 
+    @spawnat, pgenerate, procs, @fetchfrom, default_worker_pool
 using FileIO
 using FFTW
 using InteractiveUtils
 using IterTools: flagfirst
 using JLD2
+using JLD2: jldopen, JLDWriteSession
 using KahanSummation
 using Loess
 using LinearAlgebra
@@ -66,7 +68,7 @@ import Statistics: std
 
 
 export
-    @⌛, @show⌛, @ismain, @namedtuple, @repeated, @unpack, animate,
+    @⌛, @show⌛, @ismain, @namedtuple, @repeated, @unpack, @cpu!, animate,
     argmaxf_lnP, BandPassOp, BaseDataSet, batch, batch_index, batch_length, beamCℓs, cache,
     CachedLenseFlow, camb, cov_to_Cℓ, cpu, Cℓ_2D, Cℓ_to_Cov, DataSet, DerivBasis,
     diag, Diagonal, DiagOp, dot, EBFourier, EBMap, expnorm, Field, FieldArray, fieldinfo,
@@ -80,16 +82,24 @@ export
     IEBFourier, IEBMap, InterpolatedCℓs, IQUFourier, IQUMap, kde,
     lasthalf, LazyBinaryOp, LenseBasis, LenseFlow, FieldOp, lnP, load_camb_Cℓs,
     load_chains, load_sim, LowPass, make_mask, Map, MAP_joint, MAP_marg,
-    mean_std_and_errors, MidPass, mix, nan2zero, new_dataset, noiseCℓs,
+    mean_std_and_errors, MidPass, mix, nan2zero, noiseCℓs,
     ParamDependentOp, pixwin, PowerLens, ProjLambert, QUFourier, QUMap, resimulate!,
     resimulate, RK4Solver, sample_joint, shiftℓ, 
     simulate, SymmetricFuncOp, symplectic_integrate, Taylens, toCℓ, toDℓ,
     ud_grade, unbatch, unmix, white_noise, Ð, Ł,  
     ℓ², ℓ⁴, ∇, ∇², ∇ᵢ, ∇ⁱ
-
+
+# bunch of sampling-related exports
+export gibbs_initialize_f!, gibbs_initialize_ϕ!, gibbs_initialize_θ!, 
+    gibbs_sample_f!, gibbs_sample_ϕ!, gibbs_sample_slice_θ!, 
+    gibbs_mix!, gibbs_unmix!, gibbs_postprocess!, 
+    once_every, start_after_burnin, mass_matrix_ϕ, hmc_step
+
+
 # generic stuff
 include("util.jl")
 include("util_fft.jl")
+include("util_parallel.jl")
 include("numerical_algorithms.jl")
 include("generic.jl")
 include("cls.jl")

diff --git a/src/autodiff.jl b/src/autodiff.jl
@@ -65,7 +65,7 @@ Zygote.accum(f::BaseField, nt::NamedTuple{(:arr,:metadata)}) = (@assert(isnothin
 @adjoint dot(f::Field{B1}, g::Field{B2}) where {B1,B2} = dot(f,g), Δ -> (Δ*B1(g), Δ*B2(f))
 @adjoint (*)(f::Adjoint{<:Any,<:Field}, g::Field) = Zygote.pullback((f,g)->dot(f',g),f,g)
 # ℝᴺˣᴺ -> ℝ¹ 
-@adjoint logdet(L::ParamDependentOp, θ) = Zygote._pullback(θ->logdet(L(;θ...)), θ)
+@adjoint logdet(L::ParamDependentOp, θ) = Zygote.pullback((L,θ)->logdet(L(θ)), L, θ)
 @adjoint logdet(L::DiagOp) = logdet(L), Δ -> (Δ * Zfac(L) * pinv(L)',)
 
 # basis conversion
@@ -109,9 +109,8 @@ end
 @adjoint function *(x::FieldOrOpRowVector, y::FieldVector)
     z = x * y
     # when x is a vector of Fields
-    back(Δ) = (Δ * y', x' * Δ)
-    # when x is a vector of Diagonals. in this case, Δ * basis(Δ)(y)' create an
-    # OuterProdOp in the same basis as the Diagonals in x
+    back(Δ::Real) = ((Δ * y)', x' * Δ)
+    # when x is a vector of Diagonals. in this case, Δ * basis(Δ)(y)'
     back(Δ::Field{B}) where {B} = (Δ * basis(Δ)(y)'), (x' * Δ)
     z, back
 end

diff --git a/src/batched_reals.jl b/src/batched_reals.jl
@@ -11,7 +11,7 @@ struct BatchedReal{T<:Real,V<:Vector{T}} <: Real
 end
 
 batch(r::Real) = r
-batch(rs::Real...) = BatchedReal(rs)
+batch(rs::Real...) = BatchedReal(collect(rs))
 batch(v::AbstractVector{<:Real}) = BatchedReal(collect(v))
 batch_length(br::BatchedReal) = length(br.vals)
 batch_length(::Real) = 1
@@ -35,7 +35,7 @@ unbatch(br::BatchedReal; dims=1) = reshape(br.vals, ntuple(_->1, dims-1)..., :)
 unbatch(r::Real; dims=nothing) = r
 Base.show(io::IO, br::BatchedReal) = print(io, "Batched", br.vals)
 (::Type{T})(br::BatchedReal) where {T<:Real} = batch(T.(br.vals))
-Base.hash(bv::BatchedReal, h::UInt) = foldr(hash, (typeof(bv), bv.vals), init=h)
+hash(bv::BatchedReal, h::UInt64) = foldr(hash, (typeof(bv), bv.vals), init=h)
 
 
 # used to denote a batch of things, no other functionality

diff --git a/src/chains.jl b/src/chains.jl
@@ -200,17 +200,24 @@ end
 
 
 
-struct GetDistKDE
+# an N-dimensional getdist Kernel Density Estimate
+struct GetDistKDE{N}
     kde
 end
-(k::GetDistKDE)(x) = k.kde(x)
+(k::GetDistKDE)(x...) = k.kde(x...)
 
 """
-    kde(samples; [boundary=(min,max), normalize="integral" or "max"])
+    kde(samples::AbstractVector; [boundary=(min,max), normalize="integral" or "max"])
+    kde(samples::AbstractMatrix; [boundary=[(min1,max1),(min2,max2)], normalize="integral" or "max", smooth_scale_2D])
 
-Return a Kernel Density Estimate for a set of samples. The return
-object is a function which can be evaluated anywhere to compute the
-PDF. Based on Python [GetDist](https://getdist.readthedocs.io/en/latest/intro.html), 
+Return a Kernel Density Estimate for a set of 1D or 2D samples. The
+return object is a function which can be evaluated anywhere to compute
+the PDF. If provided, `boundary` specifies a hard upper/lower bound
+for the 1 or 2 or parameters, `normalize` specifies whether to
+normalize the PDF to unit integral or unit maximum, and
+`smooth_scale_2D` specifies how much smoothing to do for the 2D case.
+
+Based on Python [GetDist](https://getdist.readthedocs.io/en/latest/intro.html), 
 which must be installed.
 """
 function kde(samples::AbstractVector; boundary=(nothing,nothing), normalize="integral")
@@ -219,5 +226,20 @@ function kde(samples::AbstractVector; boundary=(nothing,nothing), normalize="int
     kde = getdist.MCSamples(;
         samples, weights=nothing, names=["x"], ranges=Dict("x"=>boundary)
     )
-    GetDistKDE(kde.get1DDensity(0).normalize(normalize))
+    GetDistKDE{1}(kde.get1DDensity(0).normalize(normalize))
 end
+
+function kde(samples::AbstractMatrix; boundary=((nothing,nothing),(nothing,nothing)), normalize="integral", smooth_scale_2D=nothing)
+    if size(samples,1) == 2
+        samples = samples'
+    elseif size(samples,2) != 2
+        error("KDE only supports 1 or 2 dimensional samples.")
+    end
+    getdist = @ondemand(PyCall.pyimport)("getdist")
+    getdist.chains.print_load_details = false
+    kde = getdist.MCSamples(;
+        samples, weights=nothing, names=["x","y"], ranges=Dict("x"=>boundary[1], "y"=>boundary[2])
+    )
+    density_kwargs = isnothing(smooth_scale_2D) ? () : (;smooth_scale_2D)
+    GetDistKDE{2}(kde.get2DDensity(0, 1; density_kwargs...).normalize(normalize))
+end
diff --git a/src/dataset.jl b/src/dataset.jl
@@ -1,27 +1,18 @@
 
 
-abstract type DataSet{DS} end
-
-getproperty(ds::DS, k::Symbol) where {DS<:DataSet{<:DataSet}} = 
-    hasfield(DS, k) ? getfield(ds, k) : getproperty(getfield(ds, :_super), k)
-setproperty!(ds::DS, k::Symbol, v) where {DS<:DataSet{<:DataSet}} = 
-    hasfield(DS, k) ? setfield!(ds, k, v) : setproperty!(getfield(ds, :_super), k, v)
-propertynames(ds::DS) where {DS′<:DataSet, DS<:DataSet{DS′}} = 
-    union(fieldnames(DS), fieldnames(DS′))
-
-function new_dataset(::Type{DS}; kwargs...) where {DS′<:DataSet, DS<:DataSet{DS′}}
-    kw  = filter(((k,_),)->  k in fieldnames(DS),  kwargs)
-    kw′ = filter(((k,_),)->!(k in fieldnames(DS)), kwargs)
-    DS(_super=DS′(;kw′...); kw...)
-end
+abstract type DataSet end
 
-copy(ds::DS) where {DS<:DataSet} = 
-    DS(((k==:_super ? copy(v) : v) for (k,v) in pairs(fields(ds)))...)
+copy(ds::DS) where {DS<:DataSet} = DS(fields(ds)...)
 
-hash(ds::DataSet, h::UInt64) = hash(typeof(ds), foldr(hash, fieldvalues(ds), init=h))
+hash(ds::DataSet, h::UInt64) = foldr(hash, (typeof(ds), fieldvalues(ds)...), init=h)
 
-# needed until fix to https://github.com/FluxML/Zygote.jl/issues/685
-Zygote.grad_mut(ds::DataSet) = Ref{Any}((;(propertynames(ds) .=> nothing)...))
+function show(io::IO, ds::DataSet)
+    println(io, typeof(ds), ": ")
+    ds_dict = OrderedDict(k => getproperty(ds,k) for k in propertynames(ds) if k!=Symbol("_super"))
+    for line in split(sprint(show, MIME"text/plain"(), ds_dict, context = (:limit => true)), "\n")[2:end]
+        println(io, line)
+    end
+end
 
 
 # util for distributing a singleton global dataset to workers
@@ -53,7 +44,7 @@ _distributed_dataset_hash = nothing
 
 
 # Stores variables needed to construct the posterior
-@kwdef mutable struct BaseDataSet <: DataSet{Nothing}
+@kwdef mutable struct BaseDataSet <: DataSet
     d                # data
     Cϕ               # ϕ covariance
     Cf               # unlensed field covariance

diff --git a/src/field_tuples.jl b/src/field_tuples.jl
@@ -109,14 +109,15 @@ white_noise(ξ::FieldTuple, rng::AbstractRNG) = FieldTuple(map(f -> white_noise(
 
 # # promote before recursing for these 
 dot(a::FieldTuple, b::FieldTuple) = reduce(+, map(dot, getfield.(promote(a,b),:fs)...), init=0)
-hash(ft::FieldTuple, h::UInt) = foldr(hash, (typeof(ft), ft.fs))
+hash(ft::FieldTuple, h::UInt64) = foldr(hash, (typeof(ft), ft.fs), init=h)
 
 # logdet & trace
 logdet(L::Diagonal{<:Union{Real,Complex}, <:FieldTuple}) = reduce(+, map(logdet∘Diagonal, L.diag.fs), init=0)
 tr(L::Diagonal{<:Union{Real,Complex}, <:FieldTuple}) = reduce(+, map(tr∘Diagonal, L.diag.fs), init=0)
 
 # misc
 batch_length(ft::FieldTuple) = only(unique(map(batch_length, ft.fs)))
+batch_index(ft::FieldTuple, I) = FieldTuple(map(f -> batch_index(f, I), ft.fs))
 function global_rng_for(::Type{<:FieldTuple{<:Union{FS,NamedTuple{Names,FS}}}}) where {Names,FS<:Tuple} 
     only(unique(map_tupleargs(global_rng_for, FS)))
 end