From 7fe3bcbbb14ab1f475d615869acf7636a2682134 Mon Sep 17 00:00:00 2001 From: Troels Henriksen Date: Mon, 5 Oct 2020 10:11:20 +0200 Subject: [PATCH] Fix #1143. --- CHANGELOG.md | 4 ++- src/Futhark/Optimise/TileLoops.hs | 7 +++-- tests/issue1143.fut | 47 +++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 tests/issue1143.fut diff --git a/CHANGELOG.md b/CHANGELOG.md index 9085fd1b16..8eb39b7f73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Fix in monomorphisation of types with constant sizes. - * Fix in in-place loweing (#1142). + * Fix in in-place lowering (#1142). + + * Fix tiling inside multiple nested loops (#1143). ## [0.17.2] diff --git a/src/Futhark/Optimise/TileLoops.hs b/src/Futhark/Optimise/TileLoops.hs index 64cd52370b..4e246420be 100644 --- a/src/Futhark/Optimise/TileLoops.hs +++ b/src/Futhark/Optimise/TileLoops.hs @@ -348,7 +348,9 @@ tileDoLoop initial_space variance prestms used_in_body (host_stms, tiling, tiled -- Expand the loop merge parameters to be arrays. tileDim t = arrayOf t (tilingTileShape tiling) $ uniqueness t - tiledBody' privstms = inScopeOf host_stms $ do + merge_scope = M.insert i (IndexName it) $ scopeOfFParams mergeparams + + tiledBody' privstms = localScope (scopeOf host_stms <> merge_scope) $ do addStms invariant_prestms let live_set = @@ -357,6 +359,7 @@ tileDoLoop initial_space variance prestms used_in_body (host_stms, tiling, tiled freeIn recomputed_variant_prestms <> used_in_body <> freeIn poststms + <> freeIn poststms_res prelude_arrs <- inScopeOf precomputed_variant_prestms $ @@ -400,7 +403,7 @@ tileDoLoop initial_space variance prestms used_in_body (host_stms, tiling, tiled letTupExp "tiled_inside_loop" $ DoLoop [] merge' (ForLoop i it bound []) loopbody' - postludeGeneric tiling inloop_privstms pat accs' poststms poststms_res res_ts + postludeGeneric tiling (privstms <> inloop_privstms) pat accs' poststms poststms_res res_ts return (host_stms, tiling, tiledBody') where diff --git a/tests/issue1143.fut b/tests/issue1143.fut new file mode 100644 index 0000000000..36eca0a49b --- /dev/null +++ b/tests/issue1143.fut @@ -0,0 +1,47 @@ +let dotprod [n] (xs: [n]f32) (ys: [n]f32): f32 = + #[sequential] reduce (+) 0.0 (map2 (*) xs ys) + +let house [d] (x: [d]f32): ([d]f32, f32) = + let dot = dotprod x x + let dot' = dot - x[0]**2 + x[0]**2 + let beta = if dot' != 0 then 2.0/dot' else 0 + in (x, beta) + +let matmul [n][p][m] (xss: [n][p]f32) (yss: [p][m]f32): [n][m]f32 = + map (\xs -> map (dotprod xs) (transpose yss)) xss + +let outer [n][m] (xs: [n]f32) (ys: [m]f32): [n][m]f32 = + matmul (map (\x -> [x]) xs) [ys] + +let matsub [m][n] (xss: [m][n]f32) (yss: [m][n]f32): *[m][n]f32 = + map2 (\xs ys -> map2 (-) xs ys) xss yss + +let matadd [m][n] (xss: [m][n]f32) (yss: [m][n]f32): [m][n]f32 = + map2 (\xs ys -> map2 (+) xs ys) xss yss + +let matmul_scalar [m][n] (xss: [m][n]f32) (k: f32): *[m][n]f32 = + map (map (*k)) xss + +let block_householder [m][n] (A: [m][n]f32) (r: i32): ([][]f32, [][]f32) = + #[unsafe] + let Q = replicate m (replicate m 0) + let (Q,A) = + loop (Q,A) = (Q, copy A) for k in 0..<(n/r) do + let s = k * r + let V = replicate m (replicate r 0f32) + let Bs = replicate r 0f32 + + let (A) = loop (A) for j in 0.. block_householder arr r) arrs