diff --git a/README.md b/README.md
index 68a2e65..b4a0fea 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 | **Documentation** | **Build Status** | **Code style** |
 |:--------:|:----------------:|:----------------:|
-| [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://cosmologicalemulators.github.io/Effort.jl/dev) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://cosmologicalemulators.github.io/Effort.jl/stable) | [![Build status (Github Actions)](https://github.com/CosmologicalEmulators/Effort.jl/workflows/CI/badge.svg)](https://github.com/CosmologicalEmulators/Effort.jl/actions) [![codecov](https://codecov.io/gh/CosmologicalEmulators/Effort.jl/branch/abstractcosmoemu/graph/badge.svg?token=0PYHCWVL67)](https://codecov.io/gh/CosmologicalEmulators/Effort.jl) | [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle) [![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac) |
+| [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://cosmologicalemulators.github.io/Effort.jl/dev) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://cosmologicalemulators.github.io/Effort.jl/stable) | [![Build status (Github Actions)](https://github.com/CosmologicalEmulators/Effort.jl/workflows/CI/badge.svg)](https://github.com/CosmologicalEmulators/Effort.jl/actions) [![codecov](https://codecov.io/gh/CosmologicalEmulators/Effort.jl/branch/develop/graph/badge.svg?token=3OZSSHWTQG)](https://codecov.io/gh/CosmologicalEmulators/Effort.jl) | [![Code Style: Blue](https://img.shields.io/badge/code%20style-blue-4495d1.svg)](https://github.com/invenia/BlueStyle) [![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac) |
 
 Repository containing the EFfective Field theORy surrogaTe.
 
diff --git a/src/Effort.jl b/src/Effort.jl
index 9308add..79cdccc 100644
--- a/src/Effort.jl
+++ b/src/Effort.jl
@@ -11,6 +11,7 @@ using LegendrePolynomials
 using LoopVectorization
 using Memoization
 using OrdinaryDiffEq
+using QuadGK
 using Integrals
 using LinearAlgebra
 using SparseArrays
diff --git a/src/chainrules.jl b/src/chainrules.jl
index 03c9d09..3419239 100644
--- a/src/chainrules.jl
+++ b/src/chainrules.jl
@@ -1,5 +1,6 @@
 @non_differentiable LinRange(a,b,n)
 @non_differentiable _transformed_weights(quadrature_rule, order, a,b)
+@non_differentiable gausslobatto(n)
 
 Zygote.@adjoint function _create_d(u, t, s, typed_zero)
     y = _create_d(u, t, s, typed_zero)
diff --git a/src/projection.jl b/src/projection.jl
index 16e0b45..26f0d9a 100644
--- a/src/projection.jl
+++ b/src/projection.jl
@@ -1,5 +1,6 @@
 function _Pkμ(k, μ, Int_Mono, Int_Quad, Int_Hexa)
-    return Int_Mono(k)*Pl(μ, 0) + Int_Quad(k)*Pl(μ, 2) + Int_Hexa(k)*Pl(μ, 4)
+    #@info Int_Hexa(k)
+    return Int_Mono(k)*_legendre_0(μ) + Int_Quad(k)*_legendre_2(μ) + Int_Hexa(k)*_legendre_4(μ)
 end
 
 function _k_true(k_o, μ_o, q_perp, F)
@@ -10,10 +11,23 @@ function _μ_true(μ_o, F)
     return μ_o/F/sqrt(1+μ_o^2*(1/F^2-1))
 end
 
+function k_true(k_o::Array, μ_o::Array, q_perp, F)
+    @tullio result[i,j] := k_o[i]/q_perp*sqrt(1+μ_o[j]^2*(1/F^2-1))
+    return vec(result)
+end
+
+function μ_true(μ_o::Array, F)
+    @tullio result[i] := μ_o[i]/F/sqrt(1. +μ_o[i]^2*(1/F^2-1))
+    return result
+end
+
 function _P_obs(k_o, μ_o, q_par, q_perp, Int_Mono, Int_Quad, Int_Hexa)
     F = q_par/q_perp
     k_t = _k_true(k_o, μ_o, q_perp, F)
     μ_t = _μ_true(μ_o, F)
+
+    _Pkμ(k_t, μ_t, Int_Mono, Int_Quad, Int_Hexa)/(q_par*q_perp^2)
+
     return _Pkμ(k_t, μ_t, Int_Mono, Int_Quad, Int_Hexa)/(q_par*q_perp^2)
 end
 
@@ -58,53 +72,6 @@ function apply_AP_check(k_grid::Array, Mono_array::Array, Quad_array::Array,
     return apply_AP_check(k_grid, int_Mono, int_Quad, int_Hexa, q_par, q_perp)
 end
 
-function _mygemmavx(A, B, C)
-    Dm = zero(eltype(C))
-    @tturbo for n ∈ axes(A,1)
-        Dm += A[n] * B[n] * C[n]
-    end
-    return Dm
-end
-
-function _mygemm(A, B, C)
-    Dm = zero(eltype(C))
-    for n ∈ axes(A,1)
-        Dm += A[n] * B[n] * C[n]
-    end
-    return Dm
-end
-
-function apply_AP(k_grid, int_Mono::QuadraticSpline, int_Quad::QuadraticSpline, int_Hexa::QuadraticSpline,
-    q_par, q_perp)
-    nk = length(k_grid)
-    n_GL_points = 5
-    #TODO: check that the extrapolation does not create problems. Maybe logextrap?
-    nodes, weights = @memoize gausslobatto(n_GL_points*2)
-    #since the integrand is symmetric, we are gonna use only half of the points
-    μ_nodes = nodes[1:n_GL_points]
-    μ_weights = weights[1:n_GL_points]
-    result = zeros(3, nk)
-
-    Pl_0 = Pl.(μ_nodes, 0)
-    Pl_2 = Pl.(μ_nodes, 2)
-    Pl_4 = Pl.(μ_nodes, 4)
-
-    temp = zeros(n_GL_points)
-
-    for (k_idx, myk) in enumerate(k_grid)
-        for j in 1:n_GL_points
-            temp[j] = _P_obs(myk, μ_nodes[j], q_par, q_perp, int_Mono, int_Quad,
-            int_Hexa)
-        end
-        #we do not divided by 2 since we are using only half of the points and the result
-        #should be multiplied by 2
-        result[1, k_idx] = (2*0+1)*_mygemmavx(μ_weights, temp, Pl_0)
-        result[2, k_idx] = (2*2+1)*_mygemmavx(μ_weights, temp, Pl_2)
-        result[3, k_idx] = (2*4+1)*_mygemmavx(μ_weights, temp, Pl_4)
-    end
-    return result
-end
-
 function _stoch_obs(k_o, μ_o, q_par, q_perp, n_bar, cϵ0, cϵ1, cϵ2, k_nl)
     F = q_par/q_perp
     k_t = _k_true(k_o, μ_o, q_perp, F)
@@ -117,11 +84,11 @@ function _k_grid_over_nl(k_grid, k_nl)
  end
 
  function _stoch_kμ(k_grid, μ, n_bar, cϵ0, cϵ1, cϵ2, k_nl)
-    return (cϵ0 * Pl(μ, 0) .+ Effort._k_grid_over_nl(k_grid, k_nl) .* (cϵ1 * Pl(μ, 0) +
-            cϵ2 * Pl(μ, 2)) ) ./ n_bar
+    return (cϵ0 * _legendre_0(μ) .+ Effort._k_grid_over_nl(k_grid, k_nl) .* (cϵ1 * _legendre_0(μ) +
+            cϵ2 * _legendre_2(μ)) ) ./ n_bar
 end
 
-function get_stochs_AP(k_grid, q_par, q_perp, n_bar, cϵ0, cϵ1, cϵ2; k_nl = 0.7, n_GL_points = 4)
+function get_stochs_AP(k_grid, q_par, q_perp, n_bar, cϵ0, cϵ1, cϵ2; k_nl = 0.7, n_GL_points = 18)
     nk = length(k_grid)
     #TODO: check that the extrapolation does not create problems. Maybe logextrap?
     nodes, weights = @memoize gausslobatto(n_GL_points*2)
@@ -131,8 +98,8 @@ function get_stochs_AP(k_grid, q_par, q_perp, n_bar, cϵ0, cϵ1, cϵ2; k_nl = 0.
     result = zeros(2, nk)
 
     Pl_array = zeros(2, n_GL_points)
-    Pl_array[1,:] .= Pl.(μ_nodes, 0)
-    Pl_array[2,:] .= Pl.(μ_nodes, 2)
+    Pl_array[1,:] .= _legendre_0.(μ_nodes)
+    Pl_array[2,:] .= _legendre_2.(μ_nodes)
 
     temp = zeros(n_GL_points, nk)
 
@@ -194,6 +161,11 @@ function apply_AP(k_grid_AP, k_grid, Mono_array::Array, Quad_array::Array, Hexa_
     return apply_AP(k_grid_AP, int_Mono, int_Quad, int_Hexa, q_par, q_perp)
 end
 
+function Pk_recon(mono, quad, hexa, l0, l2, l4)
+    @tullio Pkμ[i,j] := mono[i,j]*l0[j] + quad[i,j]*l2[j] + hexa[i,j]*l4[j]
+     return Pkμ
+ end
+
 """
     apply_AP(k_grid::Array, Mono_array::Array, Quad_array::Array, Hexa_array::Array, q_par,
     q_perp)
@@ -201,37 +173,40 @@ Given the Monopole, the Quadrupole, the Hexadecapole, and the k-grid, this funct
 the AP effect using the Gauss-Lobatto quadrature. Fast but accurate,  well tested against
 adaptive Gauss-Kronrod integration.
 """
-function apply_AP(k_grid, Mono_array::Array, Quad_array::Array, Hexa_array::Array, q_par,
-    q_perp)
-    int_Mono, int_Quad, int_Hexa = interp_Pℓs(Mono_array, Quad_array, Hexa_array, k_grid)
-    return apply_AP(k_grid, int_Mono, int_Quad, int_Hexa, q_par, q_perp)
-end
+function apply_AP(k::Array, mono::Array, quad::Array, hexa::Array, q_par, q_perp;
+    n_GL_points=8)
+    nk = length(k)
+    nodes, weights = gausslobatto(n_GL_points*2)
+    #since the integrand is symmetric, we are gonna use only half of the points
+    μ_nodes = nodes[1:n_GL_points]
+    μ_weights = weights[1:n_GL_points]
+    F = q_par/q_perp
 
-function apply_AP(k_grid_AP, k_interp, Mono_array::Array, Quad_array::Array, Hexa_array::Array, q_par, q_perp)
-    int_Mono, int_Quad, int_Hexa = interp_Pℓs(Mono_array, Quad_array, Hexa_array, k_interp)
-    return apply_AP(k_grid_AP, int_Mono, int_Quad, int_Hexa, q_par, q_perp)
-end
+    k_t = k_true(k, μ_nodes, q_perp, F)
 
-"""
-function window_convolution(W,v)
-    a,b,c,d = size(W)
-    result = zeros(a,c)
-    window_convolution!(result,W,v)
-    return result
-end
+    μ_t = μ_true(μ_nodes, F)
 
-function window_convolution!(result, W, v)
-    @turbo for i ∈ axes(W,1), k ∈ axes(W,3)
-        c = zero(eltype(v))
-        for l ∈ axes(W,4)
-            for j ∈ axes(W,2)
-                c += W[i,j,k,l] * v[j,l]
-            end
-        end
-        result[i,k] = c
-    end
+    Pl0_t = _legendre_0.(μ_t)
+    Pl2_t = _legendre_2.(μ_t)
+    Pl4_t = _legendre_4.(μ_t)
+
+    Pl0 = _legendre_0.(μ_nodes).*μ_weights.*(2*0+1)
+    Pl2 = _legendre_2.(μ_nodes).*μ_weights.*(2*2+1)
+    Pl4 = _legendre_4.(μ_nodes).*μ_weights.*(2*4+1)
+
+    new_mono = reshape(_quadratic_spline(mono, k, k_t), nk, n_GL_points)
+    new_quad = reshape(_quadratic_spline(quad, k, k_t), nk, n_GL_points)
+    new_hexa = reshape(_quadratic_spline(hexa, k, k_t), nk, n_GL_points)
+
+    Pkμ = Pk_recon(new_mono, new_quad, new_hexa, Pl0_t, Pl2_t, Pl4_t)./(q_par*q_perp^2)
+
+    pippo_0 = Pkμ * Pl0
+    pippo_2 = Pkμ * Pl2
+    pippo_4 = Pkμ * Pl4
+    result = hcat(pippo_0, pippo_2, pippo_4)'
+
+    return result
 end
-"""
 
 function window_convolution(W,v)
     return @tullio C[i,k] := W[i,j,k,l] * v[j,l]
diff --git a/src/utils.jl b/src/utils.jl
index 4c8637e..e519f52 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -63,3 +63,15 @@ end
 function _create_d(u, t, s, typed_zero)
     return map(i -> i == 1 ? typed_zero : 2 * (u[i] - u[i - 1]) / (t[i] - t[i - 1]), 1:s)
 end
+
+function _legendre_0(x)
+    return 1.
+end
+
+function _legendre_2(x)
+    return 0.5*(3*x^2-1)
+end
+
+function _legendre_4(x)
+    return 0.125*(35*x^4-30x^2+3)
+end
diff --git a/test/Project.toml b/test/Project.toml
index 14ede01..db5dbae 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -4,6 +4,7 @@ DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
 FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+LegendrePolynomials = "3db4a2ba-fc88-11e8-3e01-49c72059a882"
 NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
 SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
 SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
diff --git a/test/runtests.jl b/test/runtests.jl
index 43b3266..29e7dfd 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,6 +5,7 @@ using Static
 using Effort
 using ForwardDiff
 using Zygote
+using LegendrePolynomials
 using FiniteDifferences
 using SciMLSensitivity
 using DataInterpolations
@@ -66,6 +67,15 @@ function r_z_check_x(z, x)
     sum(Effort._r_z_check(z, Ωm0, h; mν =mν, w0=w0, wa=wa))
 end
 
+myx = Array(LinRange(0., 1., 100))
+monotest = sin.(myx)
+quadtest = 0.5.*cos.(myx)
+hexatest = 0.1.*cos.(2 .* myx)
+q_par = 1.4
+q_perp = 0.6
+
+x3 = Array(LinRange(-1., 1., 100))
+
 @testset "Effort tests" begin
     @test isapprox(Effort._H_a(a, Ωγ0, Ωm0, mν, h, w0, wa), h*100)
     @test isapprox(Effort._E_a(a, Ωm0, h), 1.)
@@ -85,4 +95,12 @@ end
     @test isapprox(ForwardDiff.gradient(x2->sum(Effort._quadratic_spline(y,x1,x2)), x2), Zygote.gradient(x2->sum(Effort._quadratic_spline(y,x1,x2)), x2)[1], rtol=1e-6)
     @test isapprox(grad(central_fdm(5,1), v->sum(Effort.window_convolution(W, v)), v)[1], Zygote.gradient(v->sum(Effort.window_convolution(W, v)), v)[1], rtol=1e-6)
     @test isapprox(grad(central_fdm(5,1), W->sum(Effort.window_convolution(W, v)), W)[1], Zygote.gradient(W->sum(Effort.window_convolution(W, v)), W)[1], rtol=1e-6)
+    @test isapprox(Effort.apply_AP(myx, monotest, quadtest, hexatest, q_par, q_perp; n_GL_points = 8), Effort.apply_AP_check(myx, monotest, quadtest, hexatest, q_par, q_perp), rtol=1e-3)
+    @test isapprox(Effort.apply_AP(myx, monotest, quadtest, hexatest, q_par, q_perp; n_GL_points = 18), Effort.apply_AP_check(myx, monotest, quadtest, hexatest, q_par, q_perp), rtol=1e-4)
+    @test isapprox(Effort.apply_AP(myx, monotest, quadtest, hexatest, q_par, q_perp; n_GL_points = 72), Effort.apply_AP_check(myx, monotest, quadtest, hexatest, q_par, q_perp), rtol=1e-5)
+    @test isapprox(Effort.apply_AP(myx, monotest, quadtest, hexatest, q_par, q_perp; n_GL_points = 126), Effort.apply_AP_check(myx, monotest, quadtest, hexatest, q_par, q_perp), rtol=1e-6)
+    @test isapprox(Zygote.gradient(x3->sum(x3.*Effort._legendre_0.(x3)), x3)[1], ForwardDiff.gradient(x3->sum(x3.*Pl.(x3, 0)), x3), rtol=1e-9)
+    @test isapprox(Zygote.gradient(x3->sum(Effort._legendre_2.(x3)), x3)[1], ForwardDiff.gradient(x3->sum(Pl.(x3, 2)), x3), rtol=1e-9)
+    @test isapprox(Zygote.gradient(x3->sum(Effort._legendre_4.(x3)), x3)[1], ForwardDiff.gradient(x3->sum(Pl.(x3, 4)), x3), rtol=1e-9)
+    @test isapprox(Zygote.gradient(myx->sum(Effort.apply_AP(myx, monotest, quadtest, hexatest, q_par, q_perp; n_GL_points = 8)), myx)[1], ForwardDiff.gradient(myx->sum(Effort.apply_AP(myx, monotest, quadtest, hexatest, q_par, q_perp; n_GL_points = 8)), myx))
 end