From e72f89cfc6bdb9e872502d229f6a5d54021ecf3d Mon Sep 17 00:00:00 2001
From: Basil Hess <bhe@zurich.ibm.com>
Date: Fri, 10 Jan 2025 17:57:58 +0100
Subject: [PATCH] Updates [full tests]

Signed-off-by: Basil Hess <bhe@zurich.ibm.com>
---
 docs/algorithms/kem/ml_kem.md                 |   2 +-
 docs/algorithms/kem/ml_kem.yml                |   2 +-
 .../copy_from_upstream/copy_from_upstream.yml |   4 +-
 src/kem/ml_kem/CMakeLists.txt                 |  30 +--
 .../arith_backend.h                           |   4 +-
 .../mlkem-native_ml-kem-1024_aarch64/cbd.c    |  38 ++-
 .../mlkem-native_ml-kem-1024_aarch64/cbd.h    |   8 +-
 .../mlkem-native_ml-kem-1024_aarch64/cbmc.h   |  31 +--
 .../mlkem-native_ml-kem-1024_aarch64/common.h |  20 +-
 .../mlkem-native_ml-kem-1024_aarch64/config.h |  54 +++-
 .../debug/debug.h                             |   6 +-
 .../mlkem-native_ml-kem-1024_aarch64/indcpa.c |  75 ++++--
 .../mlkem-native_ml-kem-1024_aarch64/indcpa.h |   8 +-
 .../mlkem-native_ml-kem-1024_aarch64/kem.c    |  75 ++++--
 .../mlkem-native_ml-kem-1024_aarch64/kem.h    | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../namespace.h                               |  57 -----
 .../mlkem-native_ml-kem-1024_aarch64/ntt.c    |  63 +++--
 .../mlkem-native_ml-kem-1024_aarch64/ntt.h    |  13 +-
 .../mlkem-native_ml-kem-1024_aarch64/params.h |  13 +-
 .../mlkem-native_ml-kem-1024_aarch64/poly.c   | 152 ++++++-----
 .../mlkem-native_ml-kem-1024_aarch64/poly.h   | 109 +++++---
 .../polyvec.c                                 |  42 +--
 .../polyvec.h                                 |  66 +++--
 .../mlkem-native_ml-kem-1024_aarch64/reduce.h |  20 +-
 .../rej_uniform.c                             |  21 +-
 .../rej_uniform.h                             |   5 +-
 .../mlkem-native_ml-kem-1024_aarch64/sys.h    |   2 +
 .../mlkem-native_ml-kem-1024_aarch64/verify.h |  71 +++---
 .../arith_backend.h                           |   4 +-
 .../ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c |  38 ++-
 .../ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h |   8 +-
 .../mlkem-native_ml-kem-1024_ref/cbmc.h       |  31 +--
 .../mlkem-native_ml-kem-1024_ref/common.h     |  20 +-
 .../mlkem-native_ml-kem-1024_ref/config.h     |  54 +++-
 .../debug/debug.h                             |   6 +-
 .../mlkem-native_ml-kem-1024_ref/indcpa.c     |  75 ++++--
 .../mlkem-native_ml-kem-1024_ref/indcpa.h     |   8 +-
 .../ml_kem/mlkem-native_ml-kem-1024_ref/kem.c |  75 ++++--
 .../ml_kem/mlkem-native_ml-kem-1024_ref/kem.h | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../mlkem-native_ml-kem-1024_ref/namespace.h  |  57 -----
 .../ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c |  63 +++--
 .../ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h |  13 +-
 .../mlkem-native_ml-kem-1024_ref/params.h     |  13 +-
 .../mlkem-native_ml-kem-1024_ref/poly.c       | 152 ++++++-----
 .../mlkem-native_ml-kem-1024_ref/poly.h       | 109 +++++---
 .../mlkem-native_ml-kem-1024_ref/polyvec.c    |  42 +--
 .../mlkem-native_ml-kem-1024_ref/polyvec.h    |  66 +++--
 .../mlkem-native_ml-kem-1024_ref/reduce.h     |  20 +-
 .../rej_uniform.c                             |  21 +-
 .../rej_uniform.h                             |   5 +-
 .../ml_kem/mlkem-native_ml-kem-1024_ref/sys.h |   2 +
 .../mlkem-native_ml-kem-1024_ref/verify.h     |  71 +++---
 .../arith_backend.h                           |   4 +-
 .../mlkem-native_ml-kem-1024_x86_64/cbd.c     |  38 ++-
 .../mlkem-native_ml-kem-1024_x86_64/cbd.h     |   8 +-
 .../mlkem-native_ml-kem-1024_x86_64/cbmc.h    |  31 +--
 .../mlkem-native_ml-kem-1024_x86_64/common.h  |  20 +-
 .../mlkem-native_ml-kem-1024_x86_64/config.h  |  54 +++-
 .../debug/debug.h                             |   6 +-
 .../mlkem-native_ml-kem-1024_x86_64/indcpa.c  |  75 ++++--
 .../mlkem-native_ml-kem-1024_x86_64/indcpa.h  |   8 +-
 .../mlkem-native_ml-kem-1024_x86_64/kem.c     |  75 ++++--
 .../mlkem-native_ml-kem-1024_x86_64/kem.h     | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../namespace.h                               |  57 -----
 .../mlkem-native_ml-kem-1024_x86_64/ntt.c     |  63 +++--
 .../mlkem-native_ml-kem-1024_x86_64/ntt.h     |  13 +-
 .../mlkem-native_ml-kem-1024_x86_64/params.h  |  13 +-
 .../mlkem-native_ml-kem-1024_x86_64/poly.c    | 152 ++++++-----
 .../mlkem-native_ml-kem-1024_x86_64/poly.h    | 109 +++++---
 .../mlkem-native_ml-kem-1024_x86_64/polyvec.c |  42 +--
 .../mlkem-native_ml-kem-1024_x86_64/polyvec.h |  66 +++--
 .../mlkem-native_ml-kem-1024_x86_64/reduce.h  |  20 +-
 .../rej_uniform.c                             |  21 +-
 .../rej_uniform.h                             |   5 +-
 .../mlkem-native_ml-kem-1024_x86_64/sys.h     |   2 +
 .../mlkem-native_ml-kem-1024_x86_64/verify.h  |  71 +++---
 .../x86_64/src/arith_native_x86_64.h          |   3 +
 .../x86_64/src/basemul.c                      |   4 +-
 .../x86_64/src/rej_uniform_avx2.c             | 176 +------------
 .../x86_64/src/rej_uniform_table.c            | 159 ++++++++++++
 .../arith_backend.h                           |   4 +-
 .../mlkem-native_ml-kem-512_aarch64/cbd.c     |  38 ++-
 .../mlkem-native_ml-kem-512_aarch64/cbd.h     |   8 +-
 .../mlkem-native_ml-kem-512_aarch64/cbmc.h    |  31 +--
 .../mlkem-native_ml-kem-512_aarch64/common.h  |  20 +-
 .../mlkem-native_ml-kem-512_aarch64/config.h  |  54 +++-
 .../debug/debug.h                             |   6 +-
 .../mlkem-native_ml-kem-512_aarch64/indcpa.c  |  75 ++++--
 .../mlkem-native_ml-kem-512_aarch64/indcpa.h  |   8 +-
 .../mlkem-native_ml-kem-512_aarch64/kem.c     |  75 ++++--
 .../mlkem-native_ml-kem-512_aarch64/kem.h     | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../namespace.h                               |  57 -----
 .../mlkem-native_ml-kem-512_aarch64/ntt.c     |  63 +++--
 .../mlkem-native_ml-kem-512_aarch64/ntt.h     |  13 +-
 .../mlkem-native_ml-kem-512_aarch64/params.h  |  13 +-
 .../mlkem-native_ml-kem-512_aarch64/poly.c    | 152 ++++++-----
 .../mlkem-native_ml-kem-512_aarch64/poly.h    | 109 +++++---
 .../mlkem-native_ml-kem-512_aarch64/polyvec.c |  42 +--
 .../mlkem-native_ml-kem-512_aarch64/polyvec.h |  66 +++--
 .../mlkem-native_ml-kem-512_aarch64/reduce.h  |  20 +-
 .../rej_uniform.c                             |  21 +-
 .../rej_uniform.h                             |   5 +-
 .../mlkem-native_ml-kem-512_aarch64/sys.h     |   2 +
 .../mlkem-native_ml-kem-512_aarch64/verify.h  |  71 +++---
 .../arith_backend.h                           |   4 +-
 .../ml_kem/mlkem-native_ml-kem-512_ref/cbd.c  |  38 ++-
 .../ml_kem/mlkem-native_ml-kem-512_ref/cbd.h  |   8 +-
 .../ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h |  31 +--
 .../mlkem-native_ml-kem-512_ref/common.h      |  20 +-
 .../mlkem-native_ml-kem-512_ref/config.h      |  54 +++-
 .../mlkem-native_ml-kem-512_ref/debug/debug.h |   6 +-
 .../mlkem-native_ml-kem-512_ref/indcpa.c      |  75 ++++--
 .../mlkem-native_ml-kem-512_ref/indcpa.h      |   8 +-
 .../ml_kem/mlkem-native_ml-kem-512_ref/kem.c  |  75 ++++--
 .../ml_kem/mlkem-native_ml-kem-512_ref/kem.h  | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../mlkem-native_ml-kem-512_ref/namespace.h   |  57 -----
 .../ml_kem/mlkem-native_ml-kem-512_ref/ntt.c  |  63 +++--
 .../ml_kem/mlkem-native_ml-kem-512_ref/ntt.h  |  13 +-
 .../mlkem-native_ml-kem-512_ref/params.h      |  13 +-
 .../ml_kem/mlkem-native_ml-kem-512_ref/poly.c | 152 ++++++-----
 .../ml_kem/mlkem-native_ml-kem-512_ref/poly.h | 109 +++++---
 .../mlkem-native_ml-kem-512_ref/polyvec.c     |  42 +--
 .../mlkem-native_ml-kem-512_ref/polyvec.h     |  66 +++--
 .../mlkem-native_ml-kem-512_ref/reduce.h      |  20 +-
 .../mlkem-native_ml-kem-512_ref/rej_uniform.c |  21 +-
 .../mlkem-native_ml-kem-512_ref/rej_uniform.h |   5 +-
 .../ml_kem/mlkem-native_ml-kem-512_ref/sys.h  |   2 +
 .../mlkem-native_ml-kem-512_ref/verify.h      |  71 +++---
 .../arith_backend.h                           |   4 +-
 .../mlkem-native_ml-kem-512_x86_64/cbd.c      |  38 ++-
 .../mlkem-native_ml-kem-512_x86_64/cbd.h      |   8 +-
 .../mlkem-native_ml-kem-512_x86_64/cbmc.h     |  31 +--
 .../mlkem-native_ml-kem-512_x86_64/common.h   |  20 +-
 .../mlkem-native_ml-kem-512_x86_64/config.h   |  54 +++-
 .../debug/debug.h                             |   6 +-
 .../mlkem-native_ml-kem-512_x86_64/indcpa.c   |  75 ++++--
 .../mlkem-native_ml-kem-512_x86_64/indcpa.h   |   8 +-
 .../mlkem-native_ml-kem-512_x86_64/kem.c      |  75 ++++--
 .../mlkem-native_ml-kem-512_x86_64/kem.h      | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../namespace.h                               |  57 -----
 .../mlkem-native_ml-kem-512_x86_64/ntt.c      |  63 +++--
 .../mlkem-native_ml-kem-512_x86_64/ntt.h      |  13 +-
 .../mlkem-native_ml-kem-512_x86_64/params.h   |  13 +-
 .../mlkem-native_ml-kem-512_x86_64/poly.c     | 152 ++++++-----
 .../mlkem-native_ml-kem-512_x86_64/poly.h     | 109 +++++---
 .../mlkem-native_ml-kem-512_x86_64/polyvec.c  |  42 +--
 .../mlkem-native_ml-kem-512_x86_64/polyvec.h  |  66 +++--
 .../mlkem-native_ml-kem-512_x86_64/reduce.h   |  20 +-
 .../rej_uniform.c                             |  21 +-
 .../rej_uniform.h                             |   5 +-
 .../mlkem-native_ml-kem-512_x86_64/sys.h      |   2 +
 .../mlkem-native_ml-kem-512_x86_64/verify.h   |  71 +++---
 .../x86_64/src/arith_native_x86_64.h          |   3 +
 .../x86_64/src/basemul.c                      |   4 +-
 .../x86_64/src/rej_uniform_avx2.c             | 176 +------------
 .../x86_64/src/rej_uniform_table.c            | 159 ++++++++++++
 .../arith_backend.h                           |   4 +-
 .../mlkem-native_ml-kem-768_aarch64/cbd.c     |  38 ++-
 .../mlkem-native_ml-kem-768_aarch64/cbd.h     |   8 +-
 .../mlkem-native_ml-kem-768_aarch64/cbmc.h    |  31 +--
 .../mlkem-native_ml-kem-768_aarch64/common.h  |  20 +-
 .../mlkem-native_ml-kem-768_aarch64/config.h  |  54 +++-
 .../debug/debug.h                             |   6 +-
 .../mlkem-native_ml-kem-768_aarch64/indcpa.c  |  75 ++++--
 .../mlkem-native_ml-kem-768_aarch64/indcpa.h  |   8 +-
 .../mlkem-native_ml-kem-768_aarch64/kem.c     |  75 ++++--
 .../mlkem-native_ml-kem-768_aarch64/kem.h     | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../namespace.h                               |  57 -----
 .../mlkem-native_ml-kem-768_aarch64/ntt.c     |  63 +++--
 .../mlkem-native_ml-kem-768_aarch64/ntt.h     |  13 +-
 .../mlkem-native_ml-kem-768_aarch64/params.h  |  13 +-
 .../mlkem-native_ml-kem-768_aarch64/poly.c    | 152 ++++++-----
 .../mlkem-native_ml-kem-768_aarch64/poly.h    | 109 +++++---
 .../mlkem-native_ml-kem-768_aarch64/polyvec.c |  42 +--
 .../mlkem-native_ml-kem-768_aarch64/polyvec.h |  66 +++--
 .../mlkem-native_ml-kem-768_aarch64/reduce.h  |  20 +-
 .../rej_uniform.c                             |  21 +-
 .../rej_uniform.h                             |   5 +-
 .../mlkem-native_ml-kem-768_aarch64/sys.h     |   2 +
 .../mlkem-native_ml-kem-768_aarch64/verify.h  |  71 +++---
 .../arith_backend.h                           |   4 +-
 .../ml_kem/mlkem-native_ml-kem-768_ref/cbd.c  |  38 ++-
 .../ml_kem/mlkem-native_ml-kem-768_ref/cbd.h  |   8 +-
 .../ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h |  31 +--
 .../mlkem-native_ml-kem-768_ref/common.h      |  20 +-
 .../mlkem-native_ml-kem-768_ref/config.h      |  54 +++-
 .../mlkem-native_ml-kem-768_ref/debug/debug.h |   6 +-
 .../mlkem-native_ml-kem-768_ref/indcpa.c      |  75 ++++--
 .../mlkem-native_ml-kem-768_ref/indcpa.h      |   8 +-
 .../ml_kem/mlkem-native_ml-kem-768_ref/kem.c  |  75 ++++--
 .../ml_kem/mlkem-native_ml-kem-768_ref/kem.h  | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../mlkem-native_ml-kem-768_ref/namespace.h   |  57 -----
 .../ml_kem/mlkem-native_ml-kem-768_ref/ntt.c  |  63 +++--
 .../ml_kem/mlkem-native_ml-kem-768_ref/ntt.h  |  13 +-
 .../mlkem-native_ml-kem-768_ref/params.h      |  13 +-
 .../ml_kem/mlkem-native_ml-kem-768_ref/poly.c | 152 ++++++-----
 .../ml_kem/mlkem-native_ml-kem-768_ref/poly.h | 109 +++++---
 .../mlkem-native_ml-kem-768_ref/polyvec.c     |  42 +--
 .../mlkem-native_ml-kem-768_ref/polyvec.h     |  66 +++--
 .../mlkem-native_ml-kem-768_ref/reduce.h      |  20 +-
 .../mlkem-native_ml-kem-768_ref/rej_uniform.c |  21 +-
 .../mlkem-native_ml-kem-768_ref/rej_uniform.h |   5 +-
 .../ml_kem/mlkem-native_ml-kem-768_ref/sys.h  |   2 +
 .../mlkem-native_ml-kem-768_ref/verify.h      |  71 +++---
 .../arith_backend.h                           |   4 +-
 .../mlkem-native_ml-kem-768_x86_64/cbd.c      |  38 ++-
 .../mlkem-native_ml-kem-768_x86_64/cbd.h      |   8 +-
 .../mlkem-native_ml-kem-768_x86_64/cbmc.h     |  31 +--
 .../mlkem-native_ml-kem-768_x86_64/common.h   |  20 +-
 .../mlkem-native_ml-kem-768_x86_64/config.h   |  54 +++-
 .../debug/debug.h                             |   6 +-
 .../mlkem-native_ml-kem-768_x86_64/indcpa.c   |  75 ++++--
 .../mlkem-native_ml-kem-768_x86_64/indcpa.h   |   8 +-
 .../mlkem-native_ml-kem-768_x86_64/kem.c      |  75 ++++--
 .../mlkem-native_ml-kem-768_x86_64/kem.h      | 105 ++++----
 .../mlkem_native.h                            | 239 ++++++++++++++++++
 .../namespace.h                               |  57 -----
 .../mlkem-native_ml-kem-768_x86_64/ntt.c      |  63 +++--
 .../mlkem-native_ml-kem-768_x86_64/ntt.h      |  13 +-
 .../mlkem-native_ml-kem-768_x86_64/params.h   |  13 +-
 .../mlkem-native_ml-kem-768_x86_64/poly.c     | 152 ++++++-----
 .../mlkem-native_ml-kem-768_x86_64/poly.h     | 109 +++++---
 .../mlkem-native_ml-kem-768_x86_64/polyvec.c  |  42 +--
 .../mlkem-native_ml-kem-768_x86_64/polyvec.h  |  66 +++--
 .../mlkem-native_ml-kem-768_x86_64/reduce.h   |  20 +-
 .../rej_uniform.c                             |  21 +-
 .../rej_uniform.h                             |   5 +-
 .../mlkem-native_ml-kem-768_x86_64/sys.h      |   2 +
 .../mlkem-native_ml-kem-768_x86_64/verify.h   |  71 +++---
 .../x86_64/src/arith_native_x86_64.h          |   3 +
 .../x86_64/src/basemul.c                      |   4 +-
 .../x86_64/src/rej_uniform_avx2.c             | 176 +------------
 .../x86_64/src/rej_uniform_table.c            | 159 ++++++++++++
 241 files changed, 8398 insertions(+), 4339 deletions(-)
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_table.c
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_ref/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_table.c
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_ref/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h
 delete mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/namespace.h
 create mode 100644 src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_table.c

diff --git a/docs/algorithms/kem/ml_kem.md b/docs/algorithms/kem/ml_kem.md
index 83a89a930..f4e8059d8 100644
--- a/docs/algorithms/kem/ml_kem.md
+++ b/docs/algorithms/kem/ml_kem.md
@@ -7,7 +7,7 @@
 - **Authors' website**: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203
 - **Specification version**: ML-KEM.
 - **Primary Source**<a name="primary-source"></a>:
-  - **Source**: https://github.com/bhess/mlkem-native/commit/8f54f09f21583fc0e29103f200fd5a42ec57665d
+  - **Source**: https://github.com/bhess/mlkem-native/commit/086b45d1ba2b5d820d4c4edb3875cbb9e023e080
   - **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
 
 
diff --git a/docs/algorithms/kem/ml_kem.yml b/docs/algorithms/kem/ml_kem.yml
index 3d7cc3d89..6e7bfad24 100644
--- a/docs/algorithms/kem/ml_kem.yml
+++ b/docs/algorithms/kem/ml_kem.yml
@@ -17,7 +17,7 @@ website: https://pq-crystals.org/kyber/ and https://csrc.nist.gov/pubs/fips/203
 nist-round: FIPS203
 spec-version: ML-KEM
 primary-upstream:
-  source: https://github.com/bhess/mlkem-native/commit/8f54f09f21583fc0e29103f200fd5a42ec57665d
+  source: https://github.com/bhess/mlkem-native/commit/086b45d1ba2b5d820d4c4edb3875cbb9e023e080
   spdx-license-identifier: CC0-1.0 or Apache-2.0
 parameter-sets:
 - name: ML-KEM-512
diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml
index 357bedf77..50de1d824 100644
--- a/scripts/copy_from_upstream/copy_from_upstream.yml
+++ b/scripts/copy_from_upstream/copy_from_upstream.yml
@@ -33,8 +33,8 @@ upstreams:
   -
     name: mlkem-native
     git_url: https://github.com/bhess/mlkem-native.git
-    git_branch: updates-3
-    git_commit: 8f54f09f21583fc0e29103f200fd5a42ec57665d
+    git_branch: updates-4
+    git_commit: 086b45d1ba2b5d820d4c4edb3875cbb9e023e080
     kem_meta_path: '{pretty_name_full}_META.yml'
     kem_scheme_path: '.'
   -
diff --git a/src/kem/ml_kem/CMakeLists.txt b/src/kem/ml_kem/CMakeLists.txt
index 4ccb40555..edd305ce8 100644
--- a/src/kem/ml_kem/CMakeLists.txt
+++ b/src/kem/ml_kem/CMakeLists.txt
@@ -7,19 +7,19 @@ set(_ML_KEM_OBJS "")
 
 if(OQS_ENABLE_KEM_ml_kem_512)
     add_library(ml_kem_512_ref OBJECT kem_ml_kem_512.c mlkem-native_ml-kem-512_ref/cbd.c mlkem-native_ml-kem-512_ref/debug/debug.c mlkem-native_ml-kem-512_ref/indcpa.c mlkem-native_ml-kem-512_ref/kem.c mlkem-native_ml-kem-512_ref/ntt.c mlkem-native_ml-kem-512_ref/poly.c mlkem-native_ml-kem-512_ref/polyvec.c mlkem-native_ml-kem-512_ref/rej_uniform.c mlkem-native_ml-kem-512_ref/verify.c mlkem-native_ml-kem-512_ref/zetas.c)
-    target_compile_options(ml_kem_512_ref PUBLIC -DMLKEM_K=2)
+    target_compile_options(ml_kem_512_ref PUBLIC -DMLKEM_K=2 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM512_C)
     target_include_directories(ml_kem_512_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-512_ref)
     target_include_directories(ml_kem_512_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
-    target_compile_options(ml_kem_512_ref PUBLIC -DMLKEM_K=2)
+    target_compile_options(ml_kem_512_ref PUBLIC -DMLKEM_K=2 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM512_C)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_512_ref>)
 endif()
 
 if(OQS_ENABLE_KEM_ml_kem_512_x86_64)
-    add_library(ml_kem_512_x86_64 OBJECT mlkem-native_ml-kem-512_x86_64/cbd.c mlkem-native_ml-kem-512_x86_64/debug/debug.c mlkem-native_ml-kem-512_x86_64/indcpa.c mlkem-native_ml-kem-512_x86_64/kem.c mlkem-native_ml-kem-512_x86_64/ntt.c mlkem-native_ml-kem-512_x86_64/poly.c mlkem-native_ml-kem-512_x86_64/polyvec.c mlkem-native_ml-kem-512_x86_64/rej_uniform.c mlkem-native_ml-kem-512_x86_64/verify.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-512_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-512_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-512_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-512_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-512_x86_64/zetas.c)
+    add_library(ml_kem_512_x86_64 OBJECT mlkem-native_ml-kem-512_x86_64/cbd.c mlkem-native_ml-kem-512_x86_64/debug/debug.c mlkem-native_ml-kem-512_x86_64/indcpa.c mlkem-native_ml-kem-512_x86_64/kem.c mlkem-native_ml-kem-512_x86_64/ntt.c mlkem-native_ml-kem-512_x86_64/poly.c mlkem-native_ml-kem-512_x86_64/polyvec.c mlkem-native_ml-kem-512_x86_64/rej_uniform.c mlkem-native_ml-kem-512_x86_64/verify.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-512_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-512_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-512_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-512_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-512_x86_64/zetas.c)
     target_include_directories(ml_kem_512_x86_64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-512_x86_64)
     target_include_directories(ml_kem_512_x86_64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
     target_compile_options(ml_kem_512_x86_64 PRIVATE  -mavx2  -mbmi2  -mpopcnt )
-    target_compile_options(ml_kem_512_x86_64 PUBLIC -DMLKEM_K=2 -DFORCE_X86_64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=X86_64_DEFAULT -DMLKEM_USE_NATIVE)
+    target_compile_options(ml_kem_512_x86_64 PUBLIC -DMLKEM_K=2 -DFORCE_X86_64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=X86_64_DEFAULT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM512_X86_64_DEFAULT)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_512_x86_64>)
 endif()
 
@@ -27,25 +27,25 @@ if(OQS_ENABLE_KEM_ml_kem_512_aarch64)
     add_library(ml_kem_512_aarch64 OBJECT mlkem-native_ml-kem-512_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-512_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-512_aarch64/cbd.c mlkem-native_ml-kem-512_aarch64/debug/debug.c mlkem-native_ml-kem-512_aarch64/indcpa.c mlkem-native_ml-kem-512_aarch64/kem.c mlkem-native_ml-kem-512_aarch64/ntt.c mlkem-native_ml-kem-512_aarch64/poly.c mlkem-native_ml-kem-512_aarch64/polyvec.c mlkem-native_ml-kem-512_aarch64/rej_uniform.c mlkem-native_ml-kem-512_aarch64/verify.c mlkem-native_ml-kem-512_aarch64/zetas.c)
     target_include_directories(ml_kem_512_aarch64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-512_aarch64)
     target_include_directories(ml_kem_512_aarch64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
-    target_compile_options(ml_kem_512_aarch64 PUBLIC -DMLKEM_K=2 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE)
+    target_compile_options(ml_kem_512_aarch64 PUBLIC -DMLKEM_K=2 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_512_aarch64>)
 endif()
 
 if(OQS_ENABLE_KEM_ml_kem_768)
     add_library(ml_kem_768_ref OBJECT kem_ml_kem_768.c mlkem-native_ml-kem-768_ref/cbd.c mlkem-native_ml-kem-768_ref/debug/debug.c mlkem-native_ml-kem-768_ref/indcpa.c mlkem-native_ml-kem-768_ref/kem.c mlkem-native_ml-kem-768_ref/ntt.c mlkem-native_ml-kem-768_ref/poly.c mlkem-native_ml-kem-768_ref/polyvec.c mlkem-native_ml-kem-768_ref/rej_uniform.c mlkem-native_ml-kem-768_ref/verify.c mlkem-native_ml-kem-768_ref/zetas.c)
-    target_compile_options(ml_kem_768_ref PUBLIC -DMLKEM_K=3)
+    target_compile_options(ml_kem_768_ref PUBLIC -DMLKEM_K=3 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM768_C)
     target_include_directories(ml_kem_768_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-768_ref)
     target_include_directories(ml_kem_768_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
-    target_compile_options(ml_kem_768_ref PUBLIC -DMLKEM_K=3)
+    target_compile_options(ml_kem_768_ref PUBLIC -DMLKEM_K=3 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM768_C)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_768_ref>)
 endif()
 
 if(OQS_ENABLE_KEM_ml_kem_768_x86_64)
-    add_library(ml_kem_768_x86_64 OBJECT mlkem-native_ml-kem-768_x86_64/cbd.c mlkem-native_ml-kem-768_x86_64/debug/debug.c mlkem-native_ml-kem-768_x86_64/indcpa.c mlkem-native_ml-kem-768_x86_64/kem.c mlkem-native_ml-kem-768_x86_64/ntt.c mlkem-native_ml-kem-768_x86_64/poly.c mlkem-native_ml-kem-768_x86_64/polyvec.c mlkem-native_ml-kem-768_x86_64/rej_uniform.c mlkem-native_ml-kem-768_x86_64/verify.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-768_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-768_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-768_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-768_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-768_x86_64/zetas.c)
+    add_library(ml_kem_768_x86_64 OBJECT mlkem-native_ml-kem-768_x86_64/cbd.c mlkem-native_ml-kem-768_x86_64/debug/debug.c mlkem-native_ml-kem-768_x86_64/indcpa.c mlkem-native_ml-kem-768_x86_64/kem.c mlkem-native_ml-kem-768_x86_64/ntt.c mlkem-native_ml-kem-768_x86_64/poly.c mlkem-native_ml-kem-768_x86_64/polyvec.c mlkem-native_ml-kem-768_x86_64/rej_uniform.c mlkem-native_ml-kem-768_x86_64/verify.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-768_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-768_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-768_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-768_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-768_x86_64/zetas.c)
     target_include_directories(ml_kem_768_x86_64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-768_x86_64)
     target_include_directories(ml_kem_768_x86_64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
     target_compile_options(ml_kem_768_x86_64 PRIVATE  -mavx2  -mbmi2  -mpopcnt )
-    target_compile_options(ml_kem_768_x86_64 PUBLIC -DMLKEM_K=3 -DFORCE_X86_64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=X86_64_DEFAULT -DMLKEM_USE_NATIVE)
+    target_compile_options(ml_kem_768_x86_64 PUBLIC -DMLKEM_K=3 -DFORCE_X86_64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=X86_64_DEFAULT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM768_X86_64_DEFAULT)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_768_x86_64>)
 endif()
 
@@ -53,25 +53,25 @@ if(OQS_ENABLE_KEM_ml_kem_768_aarch64)
     add_library(ml_kem_768_aarch64 OBJECT mlkem-native_ml-kem-768_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-768_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-768_aarch64/cbd.c mlkem-native_ml-kem-768_aarch64/debug/debug.c mlkem-native_ml-kem-768_aarch64/indcpa.c mlkem-native_ml-kem-768_aarch64/kem.c mlkem-native_ml-kem-768_aarch64/ntt.c mlkem-native_ml-kem-768_aarch64/poly.c mlkem-native_ml-kem-768_aarch64/polyvec.c mlkem-native_ml-kem-768_aarch64/rej_uniform.c mlkem-native_ml-kem-768_aarch64/verify.c mlkem-native_ml-kem-768_aarch64/zetas.c)
     target_include_directories(ml_kem_768_aarch64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-768_aarch64)
     target_include_directories(ml_kem_768_aarch64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
-    target_compile_options(ml_kem_768_aarch64 PUBLIC -DMLKEM_K=3 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE)
+    target_compile_options(ml_kem_768_aarch64 PUBLIC -DMLKEM_K=3 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM768_AARCH64_OPT)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_768_aarch64>)
 endif()
 
 if(OQS_ENABLE_KEM_ml_kem_1024)
     add_library(ml_kem_1024_ref OBJECT kem_ml_kem_1024.c mlkem-native_ml-kem-1024_ref/cbd.c mlkem-native_ml-kem-1024_ref/debug/debug.c mlkem-native_ml-kem-1024_ref/indcpa.c mlkem-native_ml-kem-1024_ref/kem.c mlkem-native_ml-kem-1024_ref/ntt.c mlkem-native_ml-kem-1024_ref/poly.c mlkem-native_ml-kem-1024_ref/polyvec.c mlkem-native_ml-kem-1024_ref/rej_uniform.c mlkem-native_ml-kem-1024_ref/verify.c mlkem-native_ml-kem-1024_ref/zetas.c)
-    target_compile_options(ml_kem_1024_ref PUBLIC -DMLKEM_K=4)
+    target_compile_options(ml_kem_1024_ref PUBLIC -DMLKEM_K=4 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM1024_C)
     target_include_directories(ml_kem_1024_ref PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-1024_ref)
     target_include_directories(ml_kem_1024_ref PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
-    target_compile_options(ml_kem_1024_ref PUBLIC -DMLKEM_K=4)
+    target_compile_options(ml_kem_1024_ref PUBLIC -DMLKEM_K=4 -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM1024_C)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_1024_ref>)
 endif()
 
 if(OQS_ENABLE_KEM_ml_kem_1024_x86_64)
-    add_library(ml_kem_1024_x86_64 OBJECT mlkem-native_ml-kem-1024_x86_64/cbd.c mlkem-native_ml-kem-1024_x86_64/debug/debug.c mlkem-native_ml-kem-1024_x86_64/indcpa.c mlkem-native_ml-kem-1024_x86_64/kem.c mlkem-native_ml-kem-1024_x86_64/ntt.c mlkem-native_ml-kem-1024_x86_64/poly.c mlkem-native_ml-kem-1024_x86_64/polyvec.c mlkem-native_ml-kem-1024_x86_64/rej_uniform.c mlkem-native_ml-kem-1024_x86_64/verify.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-1024_x86_64/zetas.c)
+    add_library(ml_kem_1024_x86_64 OBJECT mlkem-native_ml-kem-1024_x86_64/cbd.c mlkem-native_ml-kem-1024_x86_64/debug/debug.c mlkem-native_ml-kem-1024_x86_64/indcpa.c mlkem-native_ml-kem-1024_x86_64/kem.c mlkem-native_ml-kem-1024_x86_64/ntt.c mlkem-native_ml-kem-1024_x86_64/poly.c mlkem-native_ml-kem-1024_x86_64/polyvec.c mlkem-native_ml-kem-1024_x86_64/rej_uniform.c mlkem-native_ml-kem-1024_x86_64/verify.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/consts.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/fq.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/intt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/ntt.S mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_table.c mlkem-native_ml-kem-1024_x86_64/x86_64/src/shuffle.S mlkem-native_ml-kem-1024_x86_64/zetas.c)
     target_include_directories(ml_kem_1024_x86_64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-1024_x86_64)
     target_include_directories(ml_kem_1024_x86_64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
     target_compile_options(ml_kem_1024_x86_64 PRIVATE  -mavx2  -mbmi2  -mpopcnt )
-    target_compile_options(ml_kem_1024_x86_64 PUBLIC -DMLKEM_K=4 -DFORCE_X86_64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=X86_64_DEFAULT -DMLKEM_USE_NATIVE)
+    target_compile_options(ml_kem_1024_x86_64 PUBLIC -DMLKEM_K=4 -DFORCE_X86_64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=X86_64_DEFAULT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM1024_X86_64_DEFAULT)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_1024_x86_64>)
 endif()
 
@@ -79,7 +79,7 @@ if(OQS_ENABLE_KEM_ml_kem_1024_aarch64)
     add_library(ml_kem_1024_aarch64 OBJECT mlkem-native_ml-kem-1024_aarch64/aarch64/src/aarch64_zetas.c mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/intt_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/ntt_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/poly_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/polyvec_opt.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_asm_clean.S mlkem-native_ml-kem-1024_aarch64/aarch64/src/rej_uniform_table.c mlkem-native_ml-kem-1024_aarch64/cbd.c mlkem-native_ml-kem-1024_aarch64/debug/debug.c mlkem-native_ml-kem-1024_aarch64/indcpa.c mlkem-native_ml-kem-1024_aarch64/kem.c mlkem-native_ml-kem-1024_aarch64/ntt.c mlkem-native_ml-kem-1024_aarch64/poly.c mlkem-native_ml-kem-1024_aarch64/polyvec.c mlkem-native_ml-kem-1024_aarch64/rej_uniform.c mlkem-native_ml-kem-1024_aarch64/verify.c mlkem-native_ml-kem-1024_aarch64/zetas.c)
     target_include_directories(ml_kem_1024_aarch64 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/mlkem-native_ml-kem-1024_aarch64)
     target_include_directories(ml_kem_1024_aarch64 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
-    target_compile_options(ml_kem_1024_aarch64 PUBLIC -DMLKEM_K=4 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE)
+    target_compile_options(ml_kem_1024_aarch64 PUBLIC -DMLKEM_K=4 -DFORCE_AARCH64 -DMLKEM_NATIVE_ARITH_BACKEND_NAME=AARCH64_OPT -DMLKEM_USE_NATIVE -DMLKEM_NAMESPACE_PREFIX=PQCP_MLKEM_NATIVE_MLKEM1024_AARCH64_OPT)
     set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_1024_aarch64>)
 endif()
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_aarch64/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_ref/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h
index 4b78c004a..4fbf92beb 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/arith_native_x86_64.h
@@ -20,6 +20,9 @@
 #define rej_uniform_avx2 MLKEM_NAMESPACE(rej_uniform_avx2)
 unsigned int rej_uniform_avx2(int16_t *r, const uint8_t *buf);
 
+#define rej_uniform_table MLKEM_NAMESPACE(rej_uniform_table)
+extern const uint8_t rej_uniform_table[256][8];
+
 #define ntt_avx2 MLKEM_NAMESPACE(ntt_avx2)
 void ntt_avx2(__m256i *r, const __m256i *qdata);
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c
index 3f1653ed3..098f90ef3 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/basemul.c
@@ -25,7 +25,7 @@ static void poly_basemul_montgomery_avx2(poly *r, const poly *a, const poly *b)
  */
 static void poly_add_avx2(poly *r, const poly *a, const poly *b)
 {
-  unsigned int i;
+  unsigned i;
   __m256i f0, f1;
 
   for (i = 0; i < MLKEM_N; i += 16)
@@ -41,7 +41,7 @@ void polyvec_basemul_acc_montgomery_cached_avx2(poly *r, const polyvec *a,
                                                 const polyvec *b,
                                                 const polyvec_mulcache *b_cache)
 {
-  unsigned int i;
+  unsigned i;
   poly t;
 
   /* TODO: Use mulcache for AVX2. So far, it is unused. */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c
index c3c8b8104..c65b3d3d8 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_avx2.c
@@ -17,139 +17,6 @@
 #include <string.h>
 #include "arith_native_x86_64.h"
 #include "consts.h"
-/* #define BMI */
-
-#ifndef BMI
-static const uint8_t idx[256][8] = {
-    {-1, -1, -1, -1, -1, -1, -1, -1}, {0, -1, -1, -1, -1, -1, -1, -1},
-    {2, -1, -1, -1, -1, -1, -1, -1},  {0, 2, -1, -1, -1, -1, -1, -1},
-    {4, -1, -1, -1, -1, -1, -1, -1},  {0, 4, -1, -1, -1, -1, -1, -1},
-    {2, 4, -1, -1, -1, -1, -1, -1},   {0, 2, 4, -1, -1, -1, -1, -1},
-    {6, -1, -1, -1, -1, -1, -1, -1},  {0, 6, -1, -1, -1, -1, -1, -1},
-    {2, 6, -1, -1, -1, -1, -1, -1},   {0, 2, 6, -1, -1, -1, -1, -1},
-    {4, 6, -1, -1, -1, -1, -1, -1},   {0, 4, 6, -1, -1, -1, -1, -1},
-    {2, 4, 6, -1, -1, -1, -1, -1},    {0, 2, 4, 6, -1, -1, -1, -1},
-    {8, -1, -1, -1, -1, -1, -1, -1},  {0, 8, -1, -1, -1, -1, -1, -1},
-    {2, 8, -1, -1, -1, -1, -1, -1},   {0, 2, 8, -1, -1, -1, -1, -1},
-    {4, 8, -1, -1, -1, -1, -1, -1},   {0, 4, 8, -1, -1, -1, -1, -1},
-    {2, 4, 8, -1, -1, -1, -1, -1},    {0, 2, 4, 8, -1, -1, -1, -1},
-    {6, 8, -1, -1, -1, -1, -1, -1},   {0, 6, 8, -1, -1, -1, -1, -1},
-    {2, 6, 8, -1, -1, -1, -1, -1},    {0, 2, 6, 8, -1, -1, -1, -1},
-    {4, 6, 8, -1, -1, -1, -1, -1},    {0, 4, 6, 8, -1, -1, -1, -1},
-    {2, 4, 6, 8, -1, -1, -1, -1},     {0, 2, 4, 6, 8, -1, -1, -1},
-    {10, -1, -1, -1, -1, -1, -1, -1}, {0, 10, -1, -1, -1, -1, -1, -1},
-    {2, 10, -1, -1, -1, -1, -1, -1},  {0, 2, 10, -1, -1, -1, -1, -1},
-    {4, 10, -1, -1, -1, -1, -1, -1},  {0, 4, 10, -1, -1, -1, -1, -1},
-    {2, 4, 10, -1, -1, -1, -1, -1},   {0, 2, 4, 10, -1, -1, -1, -1},
-    {6, 10, -1, -1, -1, -1, -1, -1},  {0, 6, 10, -1, -1, -1, -1, -1},
-    {2, 6, 10, -1, -1, -1, -1, -1},   {0, 2, 6, 10, -1, -1, -1, -1},
-    {4, 6, 10, -1, -1, -1, -1, -1},   {0, 4, 6, 10, -1, -1, -1, -1},
-    {2, 4, 6, 10, -1, -1, -1, -1},    {0, 2, 4, 6, 10, -1, -1, -1},
-    {8, 10, -1, -1, -1, -1, -1, -1},  {0, 8, 10, -1, -1, -1, -1, -1},
-    {2, 8, 10, -1, -1, -1, -1, -1},   {0, 2, 8, 10, -1, -1, -1, -1},
-    {4, 8, 10, -1, -1, -1, -1, -1},   {0, 4, 8, 10, -1, -1, -1, -1},
-    {2, 4, 8, 10, -1, -1, -1, -1},    {0, 2, 4, 8, 10, -1, -1, -1},
-    {6, 8, 10, -1, -1, -1, -1, -1},   {0, 6, 8, 10, -1, -1, -1, -1},
-    {2, 6, 8, 10, -1, -1, -1, -1},    {0, 2, 6, 8, 10, -1, -1, -1},
-    {4, 6, 8, 10, -1, -1, -1, -1},    {0, 4, 6, 8, 10, -1, -1, -1},
-    {2, 4, 6, 8, 10, -1, -1, -1},     {0, 2, 4, 6, 8, 10, -1, -1},
-    {12, -1, -1, -1, -1, -1, -1, -1}, {0, 12, -1, -1, -1, -1, -1, -1},
-    {2, 12, -1, -1, -1, -1, -1, -1},  {0, 2, 12, -1, -1, -1, -1, -1},
-    {4, 12, -1, -1, -1, -1, -1, -1},  {0, 4, 12, -1, -1, -1, -1, -1},
-    {2, 4, 12, -1, -1, -1, -1, -1},   {0, 2, 4, 12, -1, -1, -1, -1},
-    {6, 12, -1, -1, -1, -1, -1, -1},  {0, 6, 12, -1, -1, -1, -1, -1},
-    {2, 6, 12, -1, -1, -1, -1, -1},   {0, 2, 6, 12, -1, -1, -1, -1},
-    {4, 6, 12, -1, -1, -1, -1, -1},   {0, 4, 6, 12, -1, -1, -1, -1},
-    {2, 4, 6, 12, -1, -1, -1, -1},    {0, 2, 4, 6, 12, -1, -1, -1},
-    {8, 12, -1, -1, -1, -1, -1, -1},  {0, 8, 12, -1, -1, -1, -1, -1},
-    {2, 8, 12, -1, -1, -1, -1, -1},   {0, 2, 8, 12, -1, -1, -1, -1},
-    {4, 8, 12, -1, -1, -1, -1, -1},   {0, 4, 8, 12, -1, -1, -1, -1},
-    {2, 4, 8, 12, -1, -1, -1, -1},    {0, 2, 4, 8, 12, -1, -1, -1},
-    {6, 8, 12, -1, -1, -1, -1, -1},   {0, 6, 8, 12, -1, -1, -1, -1},
-    {2, 6, 8, 12, -1, -1, -1, -1},    {0, 2, 6, 8, 12, -1, -1, -1},
-    {4, 6, 8, 12, -1, -1, -1, -1},    {0, 4, 6, 8, 12, -1, -1, -1},
-    {2, 4, 6, 8, 12, -1, -1, -1},     {0, 2, 4, 6, 8, 12, -1, -1},
-    {10, 12, -1, -1, -1, -1, -1, -1}, {0, 10, 12, -1, -1, -1, -1, -1},
-    {2, 10, 12, -1, -1, -1, -1, -1},  {0, 2, 10, 12, -1, -1, -1, -1},
-    {4, 10, 12, -1, -1, -1, -1, -1},  {0, 4, 10, 12, -1, -1, -1, -1},
-    {2, 4, 10, 12, -1, -1, -1, -1},   {0, 2, 4, 10, 12, -1, -1, -1},
-    {6, 10, 12, -1, -1, -1, -1, -1},  {0, 6, 10, 12, -1, -1, -1, -1},
-    {2, 6, 10, 12, -1, -1, -1, -1},   {0, 2, 6, 10, 12, -1, -1, -1},
-    {4, 6, 10, 12, -1, -1, -1, -1},   {0, 4, 6, 10, 12, -1, -1, -1},
-    {2, 4, 6, 10, 12, -1, -1, -1},    {0, 2, 4, 6, 10, 12, -1, -1},
-    {8, 10, 12, -1, -1, -1, -1, -1},  {0, 8, 10, 12, -1, -1, -1, -1},
-    {2, 8, 10, 12, -1, -1, -1, -1},   {0, 2, 8, 10, 12, -1, -1, -1},
-    {4, 8, 10, 12, -1, -1, -1, -1},   {0, 4, 8, 10, 12, -1, -1, -1},
-    {2, 4, 8, 10, 12, -1, -1, -1},    {0, 2, 4, 8, 10, 12, -1, -1},
-    {6, 8, 10, 12, -1, -1, -1, -1},   {0, 6, 8, 10, 12, -1, -1, -1},
-    {2, 6, 8, 10, 12, -1, -1, -1},    {0, 2, 6, 8, 10, 12, -1, -1},
-    {4, 6, 8, 10, 12, -1, -1, -1},    {0, 4, 6, 8, 10, 12, -1, -1},
-    {2, 4, 6, 8, 10, 12, -1, -1},     {0, 2, 4, 6, 8, 10, 12, -1},
-    {14, -1, -1, -1, -1, -1, -1, -1}, {0, 14, -1, -1, -1, -1, -1, -1},
-    {2, 14, -1, -1, -1, -1, -1, -1},  {0, 2, 14, -1, -1, -1, -1, -1},
-    {4, 14, -1, -1, -1, -1, -1, -1},  {0, 4, 14, -1, -1, -1, -1, -1},
-    {2, 4, 14, -1, -1, -1, -1, -1},   {0, 2, 4, 14, -1, -1, -1, -1},
-    {6, 14, -1, -1, -1, -1, -1, -1},  {0, 6, 14, -1, -1, -1, -1, -1},
-    {2, 6, 14, -1, -1, -1, -1, -1},   {0, 2, 6, 14, -1, -1, -1, -1},
-    {4, 6, 14, -1, -1, -1, -1, -1},   {0, 4, 6, 14, -1, -1, -1, -1},
-    {2, 4, 6, 14, -1, -1, -1, -1},    {0, 2, 4, 6, 14, -1, -1, -1},
-    {8, 14, -1, -1, -1, -1, -1, -1},  {0, 8, 14, -1, -1, -1, -1, -1},
-    {2, 8, 14, -1, -1, -1, -1, -1},   {0, 2, 8, 14, -1, -1, -1, -1},
-    {4, 8, 14, -1, -1, -1, -1, -1},   {0, 4, 8, 14, -1, -1, -1, -1},
-    {2, 4, 8, 14, -1, -1, -1, -1},    {0, 2, 4, 8, 14, -1, -1, -1},
-    {6, 8, 14, -1, -1, -1, -1, -1},   {0, 6, 8, 14, -1, -1, -1, -1},
-    {2, 6, 8, 14, -1, -1, -1, -1},    {0, 2, 6, 8, 14, -1, -1, -1},
-    {4, 6, 8, 14, -1, -1, -1, -1},    {0, 4, 6, 8, 14, -1, -1, -1},
-    {2, 4, 6, 8, 14, -1, -1, -1},     {0, 2, 4, 6, 8, 14, -1, -1},
-    {10, 14, -1, -1, -1, -1, -1, -1}, {0, 10, 14, -1, -1, -1, -1, -1},
-    {2, 10, 14, -1, -1, -1, -1, -1},  {0, 2, 10, 14, -1, -1, -1, -1},
-    {4, 10, 14, -1, -1, -1, -1, -1},  {0, 4, 10, 14, -1, -1, -1, -1},
-    {2, 4, 10, 14, -1, -1, -1, -1},   {0, 2, 4, 10, 14, -1, -1, -1},
-    {6, 10, 14, -1, -1, -1, -1, -1},  {0, 6, 10, 14, -1, -1, -1, -1},
-    {2, 6, 10, 14, -1, -1, -1, -1},   {0, 2, 6, 10, 14, -1, -1, -1},
-    {4, 6, 10, 14, -1, -1, -1, -1},   {0, 4, 6, 10, 14, -1, -1, -1},
-    {2, 4, 6, 10, 14, -1, -1, -1},    {0, 2, 4, 6, 10, 14, -1, -1},
-    {8, 10, 14, -1, -1, -1, -1, -1},  {0, 8, 10, 14, -1, -1, -1, -1},
-    {2, 8, 10, 14, -1, -1, -1, -1},   {0, 2, 8, 10, 14, -1, -1, -1},
-    {4, 8, 10, 14, -1, -1, -1, -1},   {0, 4, 8, 10, 14, -1, -1, -1},
-    {2, 4, 8, 10, 14, -1, -1, -1},    {0, 2, 4, 8, 10, 14, -1, -1},
-    {6, 8, 10, 14, -1, -1, -1, -1},   {0, 6, 8, 10, 14, -1, -1, -1},
-    {2, 6, 8, 10, 14, -1, -1, -1},    {0, 2, 6, 8, 10, 14, -1, -1},
-    {4, 6, 8, 10, 14, -1, -1, -1},    {0, 4, 6, 8, 10, 14, -1, -1},
-    {2, 4, 6, 8, 10, 14, -1, -1},     {0, 2, 4, 6, 8, 10, 14, -1},
-    {12, 14, -1, -1, -1, -1, -1, -1}, {0, 12, 14, -1, -1, -1, -1, -1},
-    {2, 12, 14, -1, -1, -1, -1, -1},  {0, 2, 12, 14, -1, -1, -1, -1},
-    {4, 12, 14, -1, -1, -1, -1, -1},  {0, 4, 12, 14, -1, -1, -1, -1},
-    {2, 4, 12, 14, -1, -1, -1, -1},   {0, 2, 4, 12, 14, -1, -1, -1},
-    {6, 12, 14, -1, -1, -1, -1, -1},  {0, 6, 12, 14, -1, -1, -1, -1},
-    {2, 6, 12, 14, -1, -1, -1, -1},   {0, 2, 6, 12, 14, -1, -1, -1},
-    {4, 6, 12, 14, -1, -1, -1, -1},   {0, 4, 6, 12, 14, -1, -1, -1},
-    {2, 4, 6, 12, 14, -1, -1, -1},    {0, 2, 4, 6, 12, 14, -1, -1},
-    {8, 12, 14, -1, -1, -1, -1, -1},  {0, 8, 12, 14, -1, -1, -1, -1},
-    {2, 8, 12, 14, -1, -1, -1, -1},   {0, 2, 8, 12, 14, -1, -1, -1},
-    {4, 8, 12, 14, -1, -1, -1, -1},   {0, 4, 8, 12, 14, -1, -1, -1},
-    {2, 4, 8, 12, 14, -1, -1, -1},    {0, 2, 4, 8, 12, 14, -1, -1},
-    {6, 8, 12, 14, -1, -1, -1, -1},   {0, 6, 8, 12, 14, -1, -1, -1},
-    {2, 6, 8, 12, 14, -1, -1, -1},    {0, 2, 6, 8, 12, 14, -1, -1},
-    {4, 6, 8, 12, 14, -1, -1, -1},    {0, 4, 6, 8, 12, 14, -1, -1},
-    {2, 4, 6, 8, 12, 14, -1, -1},     {0, 2, 4, 6, 8, 12, 14, -1},
-    {10, 12, 14, -1, -1, -1, -1, -1}, {0, 10, 12, 14, -1, -1, -1, -1},
-    {2, 10, 12, 14, -1, -1, -1, -1},  {0, 2, 10, 12, 14, -1, -1, -1},
-    {4, 10, 12, 14, -1, -1, -1, -1},  {0, 4, 10, 12, 14, -1, -1, -1},
-    {2, 4, 10, 12, 14, -1, -1, -1},   {0, 2, 4, 10, 12, 14, -1, -1},
-    {6, 10, 12, 14, -1, -1, -1, -1},  {0, 6, 10, 12, 14, -1, -1, -1},
-    {2, 6, 10, 12, 14, -1, -1, -1},   {0, 2, 6, 10, 12, 14, -1, -1},
-    {4, 6, 10, 12, 14, -1, -1, -1},   {0, 4, 6, 10, 12, 14, -1, -1},
-    {2, 4, 6, 10, 12, 14, -1, -1},    {0, 2, 4, 6, 10, 12, 14, -1},
-    {8, 10, 12, 14, -1, -1, -1, -1},  {0, 8, 10, 12, 14, -1, -1, -1},
-    {2, 8, 10, 12, 14, -1, -1, -1},   {0, 2, 8, 10, 12, 14, -1, -1},
-    {4, 8, 10, 12, 14, -1, -1, -1},   {0, 4, 8, 10, 12, 14, -1, -1},
-    {2, 4, 8, 10, 12, 14, -1, -1},    {0, 2, 4, 8, 10, 12, 14, -1},
-    {6, 8, 10, 12, 14, -1, -1, -1},   {0, 6, 8, 10, 12, 14, -1, -1},
-    {2, 6, 8, 10, 12, 14, -1, -1},    {0, 2, 6, 8, 10, 12, 14, -1},
-    {4, 6, 8, 10, 12, 14, -1, -1},    {0, 4, 6, 8, 10, 12, 14, -1},
-    {2, 4, 6, 8, 10, 12, 14, -1},     {0, 2, 4, 6, 8, 10, 12, 14}};
-#endif
 
 #define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a)
 #define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a)
@@ -159,9 +26,6 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
   unsigned int ctr, pos;
   uint16_t val0, val1;
   uint32_t good;
-#ifdef BMI
-  uint64_t idx0, idx1, idx2, idx3;
-#endif
   const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ / 16]);
   const __m256i ones = _mm256_set1_epi8(1);
   const __m256i mask = _mm256_set1_epi16(0xFFF);
@@ -195,34 +59,16 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
     g0 = _mm256_packs_epi16(g0, g1);
     good = _mm256_movemask_epi8(g0);
 
-#ifdef BMI
-    idx0 = _pdep_u64(good >> 0, 0x0101010101010101);
-    idx1 = _pdep_u64(good >> 8, 0x0101010101010101);
-    idx2 = _pdep_u64(good >> 16, 0x0101010101010101);
-    idx3 = _pdep_u64(good >> 24, 0x0101010101010101);
-    idx0 = (idx0 << 8) - idx0;
-    idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
-    idx1 = (idx1 << 8) - idx1;
-    idx1 = _pext_u64(0x0E0C0A0806040200, idx1);
-    idx2 = (idx2 << 8) - idx2;
-    idx2 = _pext_u64(0x0E0C0A0806040200, idx2);
-    idx3 = (idx3 << 8) - idx3;
-    idx3 = _pext_u64(0x0E0C0A0806040200, idx3);
-
-    g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0));
-    g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1));
-    g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1);
-    g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1);
-#else
     g0 = _mm256_castsi128_si256(
-        _mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF]));
+        _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 0) & 0xFF]));
     g1 = _mm256_castsi128_si256(
-        _mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF]));
+        _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 8) & 0xFF]));
     g0 = _mm256_inserti128_si256(
-        g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1);
+        g0, _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 16) & 0xFF]),
+        1);
     g1 = _mm256_inserti128_si256(
-        g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1);
-#endif
+        g1, _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 24) & 0xFF]),
+        1);
 
     g2 = _mm256_add_epi8(g0, ones);
     g3 = _mm256_add_epi8(g1, ones);
@@ -254,16 +100,8 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
     t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f);
     good = _mm_movemask_epi8(t);
 
-#ifdef BMI
-    good &= 0x5555;
-    idx0 = _pdep_u64(good, 0x1111111111111111);
-    idx0 = (idx0 << 8) - idx0;
-    idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
-    pilo = _mm_cvtsi64_si128(idx0);
-#else
     good = _pext_u32(good, 0x5555);
-    pilo = _mm_loadl_epi64((__m128i *)&idx[good]);
-#endif
+    pilo = _mm_loadl_epi64((__m128i *)&rej_uniform_table[good]);
 
     pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones));
     pilo = _mm_unpacklo_epi8(pilo, pihi);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_table.c b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_table.c
new file mode 100644
index 000000000..e49029140
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-1024_x86_64/x86_64/src/rej_uniform_table.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * WARNING: This file is auto-generated from scripts/autogenerate_files.py
+ *          Do not modify it directly.
+ */
+
+#include "common.h"
+
+#if defined(MLKEM_NATIVE_ARITH_BACKEND_X86_64_DEFAULT)
+
+#include <stdint.h>
+#include "arith_native_x86_64.h"
+
+/*
+ * Lookup table used by rejection sampling of the public matrix.
+ * See autogenerate_files.py for details.
+ */
+ALIGN const uint8_t rej_uniform_table[256][8] = {
+    {-1, -1, -1, -1, -1, -1, -1, -1}, {0, -1, -1, -1, -1, -1, -1, -1},
+    {2, -1, -1, -1, -1, -1, -1, -1},  {0, 2, -1, -1, -1, -1, -1, -1},
+    {4, -1, -1, -1, -1, -1, -1, -1},  {0, 4, -1, -1, -1, -1, -1, -1},
+    {2, 4, -1, -1, -1, -1, -1, -1},   {0, 2, 4, -1, -1, -1, -1, -1},
+    {6, -1, -1, -1, -1, -1, -1, -1},  {0, 6, -1, -1, -1, -1, -1, -1},
+    {2, 6, -1, -1, -1, -1, -1, -1},   {0, 2, 6, -1, -1, -1, -1, -1},
+    {4, 6, -1, -1, -1, -1, -1, -1},   {0, 4, 6, -1, -1, -1, -1, -1},
+    {2, 4, 6, -1, -1, -1, -1, -1},    {0, 2, 4, 6, -1, -1, -1, -1},
+    {8, -1, -1, -1, -1, -1, -1, -1},  {0, 8, -1, -1, -1, -1, -1, -1},
+    {2, 8, -1, -1, -1, -1, -1, -1},   {0, 2, 8, -1, -1, -1, -1, -1},
+    {4, 8, -1, -1, -1, -1, -1, -1},   {0, 4, 8, -1, -1, -1, -1, -1},
+    {2, 4, 8, -1, -1, -1, -1, -1},    {0, 2, 4, 8, -1, -1, -1, -1},
+    {6, 8, -1, -1, -1, -1, -1, -1},   {0, 6, 8, -1, -1, -1, -1, -1},
+    {2, 6, 8, -1, -1, -1, -1, -1},    {0, 2, 6, 8, -1, -1, -1, -1},
+    {4, 6, 8, -1, -1, -1, -1, -1},    {0, 4, 6, 8, -1, -1, -1, -1},
+    {2, 4, 6, 8, -1, -1, -1, -1},     {0, 2, 4, 6, 8, -1, -1, -1},
+    {10, -1, -1, -1, -1, -1, -1, -1}, {0, 10, -1, -1, -1, -1, -1, -1},
+    {2, 10, -1, -1, -1, -1, -1, -1},  {0, 2, 10, -1, -1, -1, -1, -1},
+    {4, 10, -1, -1, -1, -1, -1, -1},  {0, 4, 10, -1, -1, -1, -1, -1},
+    {2, 4, 10, -1, -1, -1, -1, -1},   {0, 2, 4, 10, -1, -1, -1, -1},
+    {6, 10, -1, -1, -1, -1, -1, -1},  {0, 6, 10, -1, -1, -1, -1, -1},
+    {2, 6, 10, -1, -1, -1, -1, -1},   {0, 2, 6, 10, -1, -1, -1, -1},
+    {4, 6, 10, -1, -1, -1, -1, -1},   {0, 4, 6, 10, -1, -1, -1, -1},
+    {2, 4, 6, 10, -1, -1, -1, -1},    {0, 2, 4, 6, 10, -1, -1, -1},
+    {8, 10, -1, -1, -1, -1, -1, -1},  {0, 8, 10, -1, -1, -1, -1, -1},
+    {2, 8, 10, -1, -1, -1, -1, -1},   {0, 2, 8, 10, -1, -1, -1, -1},
+    {4, 8, 10, -1, -1, -1, -1, -1},   {0, 4, 8, 10, -1, -1, -1, -1},
+    {2, 4, 8, 10, -1, -1, -1, -1},    {0, 2, 4, 8, 10, -1, -1, -1},
+    {6, 8, 10, -1, -1, -1, -1, -1},   {0, 6, 8, 10, -1, -1, -1, -1},
+    {2, 6, 8, 10, -1, -1, -1, -1},    {0, 2, 6, 8, 10, -1, -1, -1},
+    {4, 6, 8, 10, -1, -1, -1, -1},    {0, 4, 6, 8, 10, -1, -1, -1},
+    {2, 4, 6, 8, 10, -1, -1, -1},     {0, 2, 4, 6, 8, 10, -1, -1},
+    {12, -1, -1, -1, -1, -1, -1, -1}, {0, 12, -1, -1, -1, -1, -1, -1},
+    {2, 12, -1, -1, -1, -1, -1, -1},  {0, 2, 12, -1, -1, -1, -1, -1},
+    {4, 12, -1, -1, -1, -1, -1, -1},  {0, 4, 12, -1, -1, -1, -1, -1},
+    {2, 4, 12, -1, -1, -1, -1, -1},   {0, 2, 4, 12, -1, -1, -1, -1},
+    {6, 12, -1, -1, -1, -1, -1, -1},  {0, 6, 12, -1, -1, -1, -1, -1},
+    {2, 6, 12, -1, -1, -1, -1, -1},   {0, 2, 6, 12, -1, -1, -1, -1},
+    {4, 6, 12, -1, -1, -1, -1, -1},   {0, 4, 6, 12, -1, -1, -1, -1},
+    {2, 4, 6, 12, -1, -1, -1, -1},    {0, 2, 4, 6, 12, -1, -1, -1},
+    {8, 12, -1, -1, -1, -1, -1, -1},  {0, 8, 12, -1, -1, -1, -1, -1},
+    {2, 8, 12, -1, -1, -1, -1, -1},   {0, 2, 8, 12, -1, -1, -1, -1},
+    {4, 8, 12, -1, -1, -1, -1, -1},   {0, 4, 8, 12, -1, -1, -1, -1},
+    {2, 4, 8, 12, -1, -1, -1, -1},    {0, 2, 4, 8, 12, -1, -1, -1},
+    {6, 8, 12, -1, -1, -1, -1, -1},   {0, 6, 8, 12, -1, -1, -1, -1},
+    {2, 6, 8, 12, -1, -1, -1, -1},    {0, 2, 6, 8, 12, -1, -1, -1},
+    {4, 6, 8, 12, -1, -1, -1, -1},    {0, 4, 6, 8, 12, -1, -1, -1},
+    {2, 4, 6, 8, 12, -1, -1, -1},     {0, 2, 4, 6, 8, 12, -1, -1},
+    {10, 12, -1, -1, -1, -1, -1, -1}, {0, 10, 12, -1, -1, -1, -1, -1},
+    {2, 10, 12, -1, -1, -1, -1, -1},  {0, 2, 10, 12, -1, -1, -1, -1},
+    {4, 10, 12, -1, -1, -1, -1, -1},  {0, 4, 10, 12, -1, -1, -1, -1},
+    {2, 4, 10, 12, -1, -1, -1, -1},   {0, 2, 4, 10, 12, -1, -1, -1},
+    {6, 10, 12, -1, -1, -1, -1, -1},  {0, 6, 10, 12, -1, -1, -1, -1},
+    {2, 6, 10, 12, -1, -1, -1, -1},   {0, 2, 6, 10, 12, -1, -1, -1},
+    {4, 6, 10, 12, -1, -1, -1, -1},   {0, 4, 6, 10, 12, -1, -1, -1},
+    {2, 4, 6, 10, 12, -1, -1, -1},    {0, 2, 4, 6, 10, 12, -1, -1},
+    {8, 10, 12, -1, -1, -1, -1, -1},  {0, 8, 10, 12, -1, -1, -1, -1},
+    {2, 8, 10, 12, -1, -1, -1, -1},   {0, 2, 8, 10, 12, -1, -1, -1},
+    {4, 8, 10, 12, -1, -1, -1, -1},   {0, 4, 8, 10, 12, -1, -1, -1},
+    {2, 4, 8, 10, 12, -1, -1, -1},    {0, 2, 4, 8, 10, 12, -1, -1},
+    {6, 8, 10, 12, -1, -1, -1, -1},   {0, 6, 8, 10, 12, -1, -1, -1},
+    {2, 6, 8, 10, 12, -1, -1, -1},    {0, 2, 6, 8, 10, 12, -1, -1},
+    {4, 6, 8, 10, 12, -1, -1, -1},    {0, 4, 6, 8, 10, 12, -1, -1},
+    {2, 4, 6, 8, 10, 12, -1, -1},     {0, 2, 4, 6, 8, 10, 12, -1},
+    {14, -1, -1, -1, -1, -1, -1, -1}, {0, 14, -1, -1, -1, -1, -1, -1},
+    {2, 14, -1, -1, -1, -1, -1, -1},  {0, 2, 14, -1, -1, -1, -1, -1},
+    {4, 14, -1, -1, -1, -1, -1, -1},  {0, 4, 14, -1, -1, -1, -1, -1},
+    {2, 4, 14, -1, -1, -1, -1, -1},   {0, 2, 4, 14, -1, -1, -1, -1},
+    {6, 14, -1, -1, -1, -1, -1, -1},  {0, 6, 14, -1, -1, -1, -1, -1},
+    {2, 6, 14, -1, -1, -1, -1, -1},   {0, 2, 6, 14, -1, -1, -1, -1},
+    {4, 6, 14, -1, -1, -1, -1, -1},   {0, 4, 6, 14, -1, -1, -1, -1},
+    {2, 4, 6, 14, -1, -1, -1, -1},    {0, 2, 4, 6, 14, -1, -1, -1},
+    {8, 14, -1, -1, -1, -1, -1, -1},  {0, 8, 14, -1, -1, -1, -1, -1},
+    {2, 8, 14, -1, -1, -1, -1, -1},   {0, 2, 8, 14, -1, -1, -1, -1},
+    {4, 8, 14, -1, -1, -1, -1, -1},   {0, 4, 8, 14, -1, -1, -1, -1},
+    {2, 4, 8, 14, -1, -1, -1, -1},    {0, 2, 4, 8, 14, -1, -1, -1},
+    {6, 8, 14, -1, -1, -1, -1, -1},   {0, 6, 8, 14, -1, -1, -1, -1},
+    {2, 6, 8, 14, -1, -1, -1, -1},    {0, 2, 6, 8, 14, -1, -1, -1},
+    {4, 6, 8, 14, -1, -1, -1, -1},    {0, 4, 6, 8, 14, -1, -1, -1},
+    {2, 4, 6, 8, 14, -1, -1, -1},     {0, 2, 4, 6, 8, 14, -1, -1},
+    {10, 14, -1, -1, -1, -1, -1, -1}, {0, 10, 14, -1, -1, -1, -1, -1},
+    {2, 10, 14, -1, -1, -1, -1, -1},  {0, 2, 10, 14, -1, -1, -1, -1},
+    {4, 10, 14, -1, -1, -1, -1, -1},  {0, 4, 10, 14, -1, -1, -1, -1},
+    {2, 4, 10, 14, -1, -1, -1, -1},   {0, 2, 4, 10, 14, -1, -1, -1},
+    {6, 10, 14, -1, -1, -1, -1, -1},  {0, 6, 10, 14, -1, -1, -1, -1},
+    {2, 6, 10, 14, -1, -1, -1, -1},   {0, 2, 6, 10, 14, -1, -1, -1},
+    {4, 6, 10, 14, -1, -1, -1, -1},   {0, 4, 6, 10, 14, -1, -1, -1},
+    {2, 4, 6, 10, 14, -1, -1, -1},    {0, 2, 4, 6, 10, 14, -1, -1},
+    {8, 10, 14, -1, -1, -1, -1, -1},  {0, 8, 10, 14, -1, -1, -1, -1},
+    {2, 8, 10, 14, -1, -1, -1, -1},   {0, 2, 8, 10, 14, -1, -1, -1},
+    {4, 8, 10, 14, -1, -1, -1, -1},   {0, 4, 8, 10, 14, -1, -1, -1},
+    {2, 4, 8, 10, 14, -1, -1, -1},    {0, 2, 4, 8, 10, 14, -1, -1},
+    {6, 8, 10, 14, -1, -1, -1, -1},   {0, 6, 8, 10, 14, -1, -1, -1},
+    {2, 6, 8, 10, 14, -1, -1, -1},    {0, 2, 6, 8, 10, 14, -1, -1},
+    {4, 6, 8, 10, 14, -1, -1, -1},    {0, 4, 6, 8, 10, 14, -1, -1},
+    {2, 4, 6, 8, 10, 14, -1, -1},     {0, 2, 4, 6, 8, 10, 14, -1},
+    {12, 14, -1, -1, -1, -1, -1, -1}, {0, 12, 14, -1, -1, -1, -1, -1},
+    {2, 12, 14, -1, -1, -1, -1, -1},  {0, 2, 12, 14, -1, -1, -1, -1},
+    {4, 12, 14, -1, -1, -1, -1, -1},  {0, 4, 12, 14, -1, -1, -1, -1},
+    {2, 4, 12, 14, -1, -1, -1, -1},   {0, 2, 4, 12, 14, -1, -1, -1},
+    {6, 12, 14, -1, -1, -1, -1, -1},  {0, 6, 12, 14, -1, -1, -1, -1},
+    {2, 6, 12, 14, -1, -1, -1, -1},   {0, 2, 6, 12, 14, -1, -1, -1},
+    {4, 6, 12, 14, -1, -1, -1, -1},   {0, 4, 6, 12, 14, -1, -1, -1},
+    {2, 4, 6, 12, 14, -1, -1, -1},    {0, 2, 4, 6, 12, 14, -1, -1},
+    {8, 12, 14, -1, -1, -1, -1, -1},  {0, 8, 12, 14, -1, -1, -1, -1},
+    {2, 8, 12, 14, -1, -1, -1, -1},   {0, 2, 8, 12, 14, -1, -1, -1},
+    {4, 8, 12, 14, -1, -1, -1, -1},   {0, 4, 8, 12, 14, -1, -1, -1},
+    {2, 4, 8, 12, 14, -1, -1, -1},    {0, 2, 4, 8, 12, 14, -1, -1},
+    {6, 8, 12, 14, -1, -1, -1, -1},   {0, 6, 8, 12, 14, -1, -1, -1},
+    {2, 6, 8, 12, 14, -1, -1, -1},    {0, 2, 6, 8, 12, 14, -1, -1},
+    {4, 6, 8, 12, 14, -1, -1, -1},    {0, 4, 6, 8, 12, 14, -1, -1},
+    {2, 4, 6, 8, 12, 14, -1, -1},     {0, 2, 4, 6, 8, 12, 14, -1},
+    {10, 12, 14, -1, -1, -1, -1, -1}, {0, 10, 12, 14, -1, -1, -1, -1},
+    {2, 10, 12, 14, -1, -1, -1, -1},  {0, 2, 10, 12, 14, -1, -1, -1},
+    {4, 10, 12, 14, -1, -1, -1, -1},  {0, 4, 10, 12, 14, -1, -1, -1},
+    {2, 4, 10, 12, 14, -1, -1, -1},   {0, 2, 4, 10, 12, 14, -1, -1},
+    {6, 10, 12, 14, -1, -1, -1, -1},  {0, 6, 10, 12, 14, -1, -1, -1},
+    {2, 6, 10, 12, 14, -1, -1, -1},   {0, 2, 6, 10, 12, 14, -1, -1},
+    {4, 6, 10, 12, 14, -1, -1, -1},   {0, 4, 6, 10, 12, 14, -1, -1},
+    {2, 4, 6, 10, 12, 14, -1, -1},    {0, 2, 4, 6, 10, 12, 14, -1},
+    {8, 10, 12, 14, -1, -1, -1, -1},  {0, 8, 10, 12, 14, -1, -1, -1},
+    {2, 8, 10, 12, 14, -1, -1, -1},   {0, 2, 8, 10, 12, 14, -1, -1},
+    {4, 8, 10, 12, 14, -1, -1, -1},   {0, 4, 8, 10, 12, 14, -1, -1},
+    {2, 4, 8, 10, 12, 14, -1, -1},    {0, 2, 4, 8, 10, 12, 14, -1},
+    {6, 8, 10, 12, 14, -1, -1, -1},   {0, 6, 8, 10, 12, 14, -1, -1},
+    {2, 6, 8, 10, 12, 14, -1, -1},    {0, 2, 6, 8, 10, 12, 14, -1},
+    {4, 6, 8, 10, 12, 14, -1, -1},    {0, 4, 6, 8, 10, 12, 14, -1},
+    {2, 4, 6, 8, 10, 12, 14, -1},     {0, 2, 4, 6, 8, 10, 12, 14},
+};
+
+#else
+
+/* Dummy declaration for compilers disliking empty compilation units */
+#define empty_cu_avx2_rej_uniform_table \
+  MLKEM_NAMESPACE(empty_cu_avx2_rej_uniform_table)
+int empty_cu_avx2_rej_uniform_table;
+#endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_aarch64/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_ref/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h
index 4b78c004a..4fbf92beb 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/arith_native_x86_64.h
@@ -20,6 +20,9 @@
 #define rej_uniform_avx2 MLKEM_NAMESPACE(rej_uniform_avx2)
 unsigned int rej_uniform_avx2(int16_t *r, const uint8_t *buf);
 
+#define rej_uniform_table MLKEM_NAMESPACE(rej_uniform_table)
+extern const uint8_t rej_uniform_table[256][8];
+
 #define ntt_avx2 MLKEM_NAMESPACE(ntt_avx2)
 void ntt_avx2(__m256i *r, const __m256i *qdata);
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c
index 3f1653ed3..098f90ef3 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/basemul.c
@@ -25,7 +25,7 @@ static void poly_basemul_montgomery_avx2(poly *r, const poly *a, const poly *b)
  */
 static void poly_add_avx2(poly *r, const poly *a, const poly *b)
 {
-  unsigned int i;
+  unsigned i;
   __m256i f0, f1;
 
   for (i = 0; i < MLKEM_N; i += 16)
@@ -41,7 +41,7 @@ void polyvec_basemul_acc_montgomery_cached_avx2(poly *r, const polyvec *a,
                                                 const polyvec *b,
                                                 const polyvec_mulcache *b_cache)
 {
-  unsigned int i;
+  unsigned i;
   poly t;
 
   /* TODO: Use mulcache for AVX2. So far, it is unused. */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c
index c3c8b8104..c65b3d3d8 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_avx2.c
@@ -17,139 +17,6 @@
 #include <string.h>
 #include "arith_native_x86_64.h"
 #include "consts.h"
-/* #define BMI */
-
-#ifndef BMI
-static const uint8_t idx[256][8] = {
-    {-1, -1, -1, -1, -1, -1, -1, -1}, {0, -1, -1, -1, -1, -1, -1, -1},
-    {2, -1, -1, -1, -1, -1, -1, -1},  {0, 2, -1, -1, -1, -1, -1, -1},
-    {4, -1, -1, -1, -1, -1, -1, -1},  {0, 4, -1, -1, -1, -1, -1, -1},
-    {2, 4, -1, -1, -1, -1, -1, -1},   {0, 2, 4, -1, -1, -1, -1, -1},
-    {6, -1, -1, -1, -1, -1, -1, -1},  {0, 6, -1, -1, -1, -1, -1, -1},
-    {2, 6, -1, -1, -1, -1, -1, -1},   {0, 2, 6, -1, -1, -1, -1, -1},
-    {4, 6, -1, -1, -1, -1, -1, -1},   {0, 4, 6, -1, -1, -1, -1, -1},
-    {2, 4, 6, -1, -1, -1, -1, -1},    {0, 2, 4, 6, -1, -1, -1, -1},
-    {8, -1, -1, -1, -1, -1, -1, -1},  {0, 8, -1, -1, -1, -1, -1, -1},
-    {2, 8, -1, -1, -1, -1, -1, -1},   {0, 2, 8, -1, -1, -1, -1, -1},
-    {4, 8, -1, -1, -1, -1, -1, -1},   {0, 4, 8, -1, -1, -1, -1, -1},
-    {2, 4, 8, -1, -1, -1, -1, -1},    {0, 2, 4, 8, -1, -1, -1, -1},
-    {6, 8, -1, -1, -1, -1, -1, -1},   {0, 6, 8, -1, -1, -1, -1, -1},
-    {2, 6, 8, -1, -1, -1, -1, -1},    {0, 2, 6, 8, -1, -1, -1, -1},
-    {4, 6, 8, -1, -1, -1, -1, -1},    {0, 4, 6, 8, -1, -1, -1, -1},
-    {2, 4, 6, 8, -1, -1, -1, -1},     {0, 2, 4, 6, 8, -1, -1, -1},
-    {10, -1, -1, -1, -1, -1, -1, -1}, {0, 10, -1, -1, -1, -1, -1, -1},
-    {2, 10, -1, -1, -1, -1, -1, -1},  {0, 2, 10, -1, -1, -1, -1, -1},
-    {4, 10, -1, -1, -1, -1, -1, -1},  {0, 4, 10, -1, -1, -1, -1, -1},
-    {2, 4, 10, -1, -1, -1, -1, -1},   {0, 2, 4, 10, -1, -1, -1, -1},
-    {6, 10, -1, -1, -1, -1, -1, -1},  {0, 6, 10, -1, -1, -1, -1, -1},
-    {2, 6, 10, -1, -1, -1, -1, -1},   {0, 2, 6, 10, -1, -1, -1, -1},
-    {4, 6, 10, -1, -1, -1, -1, -1},   {0, 4, 6, 10, -1, -1, -1, -1},
-    {2, 4, 6, 10, -1, -1, -1, -1},    {0, 2, 4, 6, 10, -1, -1, -1},
-    {8, 10, -1, -1, -1, -1, -1, -1},  {0, 8, 10, -1, -1, -1, -1, -1},
-    {2, 8, 10, -1, -1, -1, -1, -1},   {0, 2, 8, 10, -1, -1, -1, -1},
-    {4, 8, 10, -1, -1, -1, -1, -1},   {0, 4, 8, 10, -1, -1, -1, -1},
-    {2, 4, 8, 10, -1, -1, -1, -1},    {0, 2, 4, 8, 10, -1, -1, -1},
-    {6, 8, 10, -1, -1, -1, -1, -1},   {0, 6, 8, 10, -1, -1, -1, -1},
-    {2, 6, 8, 10, -1, -1, -1, -1},    {0, 2, 6, 8, 10, -1, -1, -1},
-    {4, 6, 8, 10, -1, -1, -1, -1},    {0, 4, 6, 8, 10, -1, -1, -1},
-    {2, 4, 6, 8, 10, -1, -1, -1},     {0, 2, 4, 6, 8, 10, -1, -1},
-    {12, -1, -1, -1, -1, -1, -1, -1}, {0, 12, -1, -1, -1, -1, -1, -1},
-    {2, 12, -1, -1, -1, -1, -1, -1},  {0, 2, 12, -1, -1, -1, -1, -1},
-    {4, 12, -1, -1, -1, -1, -1, -1},  {0, 4, 12, -1, -1, -1, -1, -1},
-    {2, 4, 12, -1, -1, -1, -1, -1},   {0, 2, 4, 12, -1, -1, -1, -1},
-    {6, 12, -1, -1, -1, -1, -1, -1},  {0, 6, 12, -1, -1, -1, -1, -1},
-    {2, 6, 12, -1, -1, -1, -1, -1},   {0, 2, 6, 12, -1, -1, -1, -1},
-    {4, 6, 12, -1, -1, -1, -1, -1},   {0, 4, 6, 12, -1, -1, -1, -1},
-    {2, 4, 6, 12, -1, -1, -1, -1},    {0, 2, 4, 6, 12, -1, -1, -1},
-    {8, 12, -1, -1, -1, -1, -1, -1},  {0, 8, 12, -1, -1, -1, -1, -1},
-    {2, 8, 12, -1, -1, -1, -1, -1},   {0, 2, 8, 12, -1, -1, -1, -1},
-    {4, 8, 12, -1, -1, -1, -1, -1},   {0, 4, 8, 12, -1, -1, -1, -1},
-    {2, 4, 8, 12, -1, -1, -1, -1},    {0, 2, 4, 8, 12, -1, -1, -1},
-    {6, 8, 12, -1, -1, -1, -1, -1},   {0, 6, 8, 12, -1, -1, -1, -1},
-    {2, 6, 8, 12, -1, -1, -1, -1},    {0, 2, 6, 8, 12, -1, -1, -1},
-    {4, 6, 8, 12, -1, -1, -1, -1},    {0, 4, 6, 8, 12, -1, -1, -1},
-    {2, 4, 6, 8, 12, -1, -1, -1},     {0, 2, 4, 6, 8, 12, -1, -1},
-    {10, 12, -1, -1, -1, -1, -1, -1}, {0, 10, 12, -1, -1, -1, -1, -1},
-    {2, 10, 12, -1, -1, -1, -1, -1},  {0, 2, 10, 12, -1, -1, -1, -1},
-    {4, 10, 12, -1, -1, -1, -1, -1},  {0, 4, 10, 12, -1, -1, -1, -1},
-    {2, 4, 10, 12, -1, -1, -1, -1},   {0, 2, 4, 10, 12, -1, -1, -1},
-    {6, 10, 12, -1, -1, -1, -1, -1},  {0, 6, 10, 12, -1, -1, -1, -1},
-    {2, 6, 10, 12, -1, -1, -1, -1},   {0, 2, 6, 10, 12, -1, -1, -1},
-    {4, 6, 10, 12, -1, -1, -1, -1},   {0, 4, 6, 10, 12, -1, -1, -1},
-    {2, 4, 6, 10, 12, -1, -1, -1},    {0, 2, 4, 6, 10, 12, -1, -1},
-    {8, 10, 12, -1, -1, -1, -1, -1},  {0, 8, 10, 12, -1, -1, -1, -1},
-    {2, 8, 10, 12, -1, -1, -1, -1},   {0, 2, 8, 10, 12, -1, -1, -1},
-    {4, 8, 10, 12, -1, -1, -1, -1},   {0, 4, 8, 10, 12, -1, -1, -1},
-    {2, 4, 8, 10, 12, -1, -1, -1},    {0, 2, 4, 8, 10, 12, -1, -1},
-    {6, 8, 10, 12, -1, -1, -1, -1},   {0, 6, 8, 10, 12, -1, -1, -1},
-    {2, 6, 8, 10, 12, -1, -1, -1},    {0, 2, 6, 8, 10, 12, -1, -1},
-    {4, 6, 8, 10, 12, -1, -1, -1},    {0, 4, 6, 8, 10, 12, -1, -1},
-    {2, 4, 6, 8, 10, 12, -1, -1},     {0, 2, 4, 6, 8, 10, 12, -1},
-    {14, -1, -1, -1, -1, -1, -1, -1}, {0, 14, -1, -1, -1, -1, -1, -1},
-    {2, 14, -1, -1, -1, -1, -1, -1},  {0, 2, 14, -1, -1, -1, -1, -1},
-    {4, 14, -1, -1, -1, -1, -1, -1},  {0, 4, 14, -1, -1, -1, -1, -1},
-    {2, 4, 14, -1, -1, -1, -1, -1},   {0, 2, 4, 14, -1, -1, -1, -1},
-    {6, 14, -1, -1, -1, -1, -1, -1},  {0, 6, 14, -1, -1, -1, -1, -1},
-    {2, 6, 14, -1, -1, -1, -1, -1},   {0, 2, 6, 14, -1, -1, -1, -1},
-    {4, 6, 14, -1, -1, -1, -1, -1},   {0, 4, 6, 14, -1, -1, -1, -1},
-    {2, 4, 6, 14, -1, -1, -1, -1},    {0, 2, 4, 6, 14, -1, -1, -1},
-    {8, 14, -1, -1, -1, -1, -1, -1},  {0, 8, 14, -1, -1, -1, -1, -1},
-    {2, 8, 14, -1, -1, -1, -1, -1},   {0, 2, 8, 14, -1, -1, -1, -1},
-    {4, 8, 14, -1, -1, -1, -1, -1},   {0, 4, 8, 14, -1, -1, -1, -1},
-    {2, 4, 8, 14, -1, -1, -1, -1},    {0, 2, 4, 8, 14, -1, -1, -1},
-    {6, 8, 14, -1, -1, -1, -1, -1},   {0, 6, 8, 14, -1, -1, -1, -1},
-    {2, 6, 8, 14, -1, -1, -1, -1},    {0, 2, 6, 8, 14, -1, -1, -1},
-    {4, 6, 8, 14, -1, -1, -1, -1},    {0, 4, 6, 8, 14, -1, -1, -1},
-    {2, 4, 6, 8, 14, -1, -1, -1},     {0, 2, 4, 6, 8, 14, -1, -1},
-    {10, 14, -1, -1, -1, -1, -1, -1}, {0, 10, 14, -1, -1, -1, -1, -1},
-    {2, 10, 14, -1, -1, -1, -1, -1},  {0, 2, 10, 14, -1, -1, -1, -1},
-    {4, 10, 14, -1, -1, -1, -1, -1},  {0, 4, 10, 14, -1, -1, -1, -1},
-    {2, 4, 10, 14, -1, -1, -1, -1},   {0, 2, 4, 10, 14, -1, -1, -1},
-    {6, 10, 14, -1, -1, -1, -1, -1},  {0, 6, 10, 14, -1, -1, -1, -1},
-    {2, 6, 10, 14, -1, -1, -1, -1},   {0, 2, 6, 10, 14, -1, -1, -1},
-    {4, 6, 10, 14, -1, -1, -1, -1},   {0, 4, 6, 10, 14, -1, -1, -1},
-    {2, 4, 6, 10, 14, -1, -1, -1},    {0, 2, 4, 6, 10, 14, -1, -1},
-    {8, 10, 14, -1, -1, -1, -1, -1},  {0, 8, 10, 14, -1, -1, -1, -1},
-    {2, 8, 10, 14, -1, -1, -1, -1},   {0, 2, 8, 10, 14, -1, -1, -1},
-    {4, 8, 10, 14, -1, -1, -1, -1},   {0, 4, 8, 10, 14, -1, -1, -1},
-    {2, 4, 8, 10, 14, -1, -1, -1},    {0, 2, 4, 8, 10, 14, -1, -1},
-    {6, 8, 10, 14, -1, -1, -1, -1},   {0, 6, 8, 10, 14, -1, -1, -1},
-    {2, 6, 8, 10, 14, -1, -1, -1},    {0, 2, 6, 8, 10, 14, -1, -1},
-    {4, 6, 8, 10, 14, -1, -1, -1},    {0, 4, 6, 8, 10, 14, -1, -1},
-    {2, 4, 6, 8, 10, 14, -1, -1},     {0, 2, 4, 6, 8, 10, 14, -1},
-    {12, 14, -1, -1, -1, -1, -1, -1}, {0, 12, 14, -1, -1, -1, -1, -1},
-    {2, 12, 14, -1, -1, -1, -1, -1},  {0, 2, 12, 14, -1, -1, -1, -1},
-    {4, 12, 14, -1, -1, -1, -1, -1},  {0, 4, 12, 14, -1, -1, -1, -1},
-    {2, 4, 12, 14, -1, -1, -1, -1},   {0, 2, 4, 12, 14, -1, -1, -1},
-    {6, 12, 14, -1, -1, -1, -1, -1},  {0, 6, 12, 14, -1, -1, -1, -1},
-    {2, 6, 12, 14, -1, -1, -1, -1},   {0, 2, 6, 12, 14, -1, -1, -1},
-    {4, 6, 12, 14, -1, -1, -1, -1},   {0, 4, 6, 12, 14, -1, -1, -1},
-    {2, 4, 6, 12, 14, -1, -1, -1},    {0, 2, 4, 6, 12, 14, -1, -1},
-    {8, 12, 14, -1, -1, -1, -1, -1},  {0, 8, 12, 14, -1, -1, -1, -1},
-    {2, 8, 12, 14, -1, -1, -1, -1},   {0, 2, 8, 12, 14, -1, -1, -1},
-    {4, 8, 12, 14, -1, -1, -1, -1},   {0, 4, 8, 12, 14, -1, -1, -1},
-    {2, 4, 8, 12, 14, -1, -1, -1},    {0, 2, 4, 8, 12, 14, -1, -1},
-    {6, 8, 12, 14, -1, -1, -1, -1},   {0, 6, 8, 12, 14, -1, -1, -1},
-    {2, 6, 8, 12, 14, -1, -1, -1},    {0, 2, 6, 8, 12, 14, -1, -1},
-    {4, 6, 8, 12, 14, -1, -1, -1},    {0, 4, 6, 8, 12, 14, -1, -1},
-    {2, 4, 6, 8, 12, 14, -1, -1},     {0, 2, 4, 6, 8, 12, 14, -1},
-    {10, 12, 14, -1, -1, -1, -1, -1}, {0, 10, 12, 14, -1, -1, -1, -1},
-    {2, 10, 12, 14, -1, -1, -1, -1},  {0, 2, 10, 12, 14, -1, -1, -1},
-    {4, 10, 12, 14, -1, -1, -1, -1},  {0, 4, 10, 12, 14, -1, -1, -1},
-    {2, 4, 10, 12, 14, -1, -1, -1},   {0, 2, 4, 10, 12, 14, -1, -1},
-    {6, 10, 12, 14, -1, -1, -1, -1},  {0, 6, 10, 12, 14, -1, -1, -1},
-    {2, 6, 10, 12, 14, -1, -1, -1},   {0, 2, 6, 10, 12, 14, -1, -1},
-    {4, 6, 10, 12, 14, -1, -1, -1},   {0, 4, 6, 10, 12, 14, -1, -1},
-    {2, 4, 6, 10, 12, 14, -1, -1},    {0, 2, 4, 6, 10, 12, 14, -1},
-    {8, 10, 12, 14, -1, -1, -1, -1},  {0, 8, 10, 12, 14, -1, -1, -1},
-    {2, 8, 10, 12, 14, -1, -1, -1},   {0, 2, 8, 10, 12, 14, -1, -1},
-    {4, 8, 10, 12, 14, -1, -1, -1},   {0, 4, 8, 10, 12, 14, -1, -1},
-    {2, 4, 8, 10, 12, 14, -1, -1},    {0, 2, 4, 8, 10, 12, 14, -1},
-    {6, 8, 10, 12, 14, -1, -1, -1},   {0, 6, 8, 10, 12, 14, -1, -1},
-    {2, 6, 8, 10, 12, 14, -1, -1},    {0, 2, 6, 8, 10, 12, 14, -1},
-    {4, 6, 8, 10, 12, 14, -1, -1},    {0, 4, 6, 8, 10, 12, 14, -1},
-    {2, 4, 6, 8, 10, 12, 14, -1},     {0, 2, 4, 6, 8, 10, 12, 14}};
-#endif
 
 #define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a)
 #define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a)
@@ -159,9 +26,6 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
   unsigned int ctr, pos;
   uint16_t val0, val1;
   uint32_t good;
-#ifdef BMI
-  uint64_t idx0, idx1, idx2, idx3;
-#endif
   const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ / 16]);
   const __m256i ones = _mm256_set1_epi8(1);
   const __m256i mask = _mm256_set1_epi16(0xFFF);
@@ -195,34 +59,16 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
     g0 = _mm256_packs_epi16(g0, g1);
     good = _mm256_movemask_epi8(g0);
 
-#ifdef BMI
-    idx0 = _pdep_u64(good >> 0, 0x0101010101010101);
-    idx1 = _pdep_u64(good >> 8, 0x0101010101010101);
-    idx2 = _pdep_u64(good >> 16, 0x0101010101010101);
-    idx3 = _pdep_u64(good >> 24, 0x0101010101010101);
-    idx0 = (idx0 << 8) - idx0;
-    idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
-    idx1 = (idx1 << 8) - idx1;
-    idx1 = _pext_u64(0x0E0C0A0806040200, idx1);
-    idx2 = (idx2 << 8) - idx2;
-    idx2 = _pext_u64(0x0E0C0A0806040200, idx2);
-    idx3 = (idx3 << 8) - idx3;
-    idx3 = _pext_u64(0x0E0C0A0806040200, idx3);
-
-    g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0));
-    g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1));
-    g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1);
-    g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1);
-#else
     g0 = _mm256_castsi128_si256(
-        _mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF]));
+        _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 0) & 0xFF]));
     g1 = _mm256_castsi128_si256(
-        _mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF]));
+        _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 8) & 0xFF]));
     g0 = _mm256_inserti128_si256(
-        g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1);
+        g0, _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 16) & 0xFF]),
+        1);
     g1 = _mm256_inserti128_si256(
-        g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1);
-#endif
+        g1, _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 24) & 0xFF]),
+        1);
 
     g2 = _mm256_add_epi8(g0, ones);
     g3 = _mm256_add_epi8(g1, ones);
@@ -254,16 +100,8 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
     t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f);
     good = _mm_movemask_epi8(t);
 
-#ifdef BMI
-    good &= 0x5555;
-    idx0 = _pdep_u64(good, 0x1111111111111111);
-    idx0 = (idx0 << 8) - idx0;
-    idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
-    pilo = _mm_cvtsi64_si128(idx0);
-#else
     good = _pext_u32(good, 0x5555);
-    pilo = _mm_loadl_epi64((__m128i *)&idx[good]);
-#endif
+    pilo = _mm_loadl_epi64((__m128i *)&rej_uniform_table[good]);
 
     pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones));
     pilo = _mm_unpacklo_epi8(pilo, pihi);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_table.c b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_table.c
new file mode 100644
index 000000000..e49029140
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-512_x86_64/x86_64/src/rej_uniform_table.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * WARNING: This file is auto-generated from scripts/autogenerate_files.py
+ *          Do not modify it directly.
+ */
+
+#include "common.h"
+
+#if defined(MLKEM_NATIVE_ARITH_BACKEND_X86_64_DEFAULT)
+
+#include <stdint.h>
+#include "arith_native_x86_64.h"
+
+/*
+ * Lookup table used by rejection sampling of the public matrix.
+ * See autogenerate_files.py for details.
+ */
+ALIGN const uint8_t rej_uniform_table[256][8] = {
+    {-1, -1, -1, -1, -1, -1, -1, -1}, {0, -1, -1, -1, -1, -1, -1, -1},
+    {2, -1, -1, -1, -1, -1, -1, -1},  {0, 2, -1, -1, -1, -1, -1, -1},
+    {4, -1, -1, -1, -1, -1, -1, -1},  {0, 4, -1, -1, -1, -1, -1, -1},
+    {2, 4, -1, -1, -1, -1, -1, -1},   {0, 2, 4, -1, -1, -1, -1, -1},
+    {6, -1, -1, -1, -1, -1, -1, -1},  {0, 6, -1, -1, -1, -1, -1, -1},
+    {2, 6, -1, -1, -1, -1, -1, -1},   {0, 2, 6, -1, -1, -1, -1, -1},
+    {4, 6, -1, -1, -1, -1, -1, -1},   {0, 4, 6, -1, -1, -1, -1, -1},
+    {2, 4, 6, -1, -1, -1, -1, -1},    {0, 2, 4, 6, -1, -1, -1, -1},
+    {8, -1, -1, -1, -1, -1, -1, -1},  {0, 8, -1, -1, -1, -1, -1, -1},
+    {2, 8, -1, -1, -1, -1, -1, -1},   {0, 2, 8, -1, -1, -1, -1, -1},
+    {4, 8, -1, -1, -1, -1, -1, -1},   {0, 4, 8, -1, -1, -1, -1, -1},
+    {2, 4, 8, -1, -1, -1, -1, -1},    {0, 2, 4, 8, -1, -1, -1, -1},
+    {6, 8, -1, -1, -1, -1, -1, -1},   {0, 6, 8, -1, -1, -1, -1, -1},
+    {2, 6, 8, -1, -1, -1, -1, -1},    {0, 2, 6, 8, -1, -1, -1, -1},
+    {4, 6, 8, -1, -1, -1, -1, -1},    {0, 4, 6, 8, -1, -1, -1, -1},
+    {2, 4, 6, 8, -1, -1, -1, -1},     {0, 2, 4, 6, 8, -1, -1, -1},
+    {10, -1, -1, -1, -1, -1, -1, -1}, {0, 10, -1, -1, -1, -1, -1, -1},
+    {2, 10, -1, -1, -1, -1, -1, -1},  {0, 2, 10, -1, -1, -1, -1, -1},
+    {4, 10, -1, -1, -1, -1, -1, -1},  {0, 4, 10, -1, -1, -1, -1, -1},
+    {2, 4, 10, -1, -1, -1, -1, -1},   {0, 2, 4, 10, -1, -1, -1, -1},
+    {6, 10, -1, -1, -1, -1, -1, -1},  {0, 6, 10, -1, -1, -1, -1, -1},
+    {2, 6, 10, -1, -1, -1, -1, -1},   {0, 2, 6, 10, -1, -1, -1, -1},
+    {4, 6, 10, -1, -1, -1, -1, -1},   {0, 4, 6, 10, -1, -1, -1, -1},
+    {2, 4, 6, 10, -1, -1, -1, -1},    {0, 2, 4, 6, 10, -1, -1, -1},
+    {8, 10, -1, -1, -1, -1, -1, -1},  {0, 8, 10, -1, -1, -1, -1, -1},
+    {2, 8, 10, -1, -1, -1, -1, -1},   {0, 2, 8, 10, -1, -1, -1, -1},
+    {4, 8, 10, -1, -1, -1, -1, -1},   {0, 4, 8, 10, -1, -1, -1, -1},
+    {2, 4, 8, 10, -1, -1, -1, -1},    {0, 2, 4, 8, 10, -1, -1, -1},
+    {6, 8, 10, -1, -1, -1, -1, -1},   {0, 6, 8, 10, -1, -1, -1, -1},
+    {2, 6, 8, 10, -1, -1, -1, -1},    {0, 2, 6, 8, 10, -1, -1, -1},
+    {4, 6, 8, 10, -1, -1, -1, -1},    {0, 4, 6, 8, 10, -1, -1, -1},
+    {2, 4, 6, 8, 10, -1, -1, -1},     {0, 2, 4, 6, 8, 10, -1, -1},
+    {12, -1, -1, -1, -1, -1, -1, -1}, {0, 12, -1, -1, -1, -1, -1, -1},
+    {2, 12, -1, -1, -1, -1, -1, -1},  {0, 2, 12, -1, -1, -1, -1, -1},
+    {4, 12, -1, -1, -1, -1, -1, -1},  {0, 4, 12, -1, -1, -1, -1, -1},
+    {2, 4, 12, -1, -1, -1, -1, -1},   {0, 2, 4, 12, -1, -1, -1, -1},
+    {6, 12, -1, -1, -1, -1, -1, -1},  {0, 6, 12, -1, -1, -1, -1, -1},
+    {2, 6, 12, -1, -1, -1, -1, -1},   {0, 2, 6, 12, -1, -1, -1, -1},
+    {4, 6, 12, -1, -1, -1, -1, -1},   {0, 4, 6, 12, -1, -1, -1, -1},
+    {2, 4, 6, 12, -1, -1, -1, -1},    {0, 2, 4, 6, 12, -1, -1, -1},
+    {8, 12, -1, -1, -1, -1, -1, -1},  {0, 8, 12, -1, -1, -1, -1, -1},
+    {2, 8, 12, -1, -1, -1, -1, -1},   {0, 2, 8, 12, -1, -1, -1, -1},
+    {4, 8, 12, -1, -1, -1, -1, -1},   {0, 4, 8, 12, -1, -1, -1, -1},
+    {2, 4, 8, 12, -1, -1, -1, -1},    {0, 2, 4, 8, 12, -1, -1, -1},
+    {6, 8, 12, -1, -1, -1, -1, -1},   {0, 6, 8, 12, -1, -1, -1, -1},
+    {2, 6, 8, 12, -1, -1, -1, -1},    {0, 2, 6, 8, 12, -1, -1, -1},
+    {4, 6, 8, 12, -1, -1, -1, -1},    {0, 4, 6, 8, 12, -1, -1, -1},
+    {2, 4, 6, 8, 12, -1, -1, -1},     {0, 2, 4, 6, 8, 12, -1, -1},
+    {10, 12, -1, -1, -1, -1, -1, -1}, {0, 10, 12, -1, -1, -1, -1, -1},
+    {2, 10, 12, -1, -1, -1, -1, -1},  {0, 2, 10, 12, -1, -1, -1, -1},
+    {4, 10, 12, -1, -1, -1, -1, -1},  {0, 4, 10, 12, -1, -1, -1, -1},
+    {2, 4, 10, 12, -1, -1, -1, -1},   {0, 2, 4, 10, 12, -1, -1, -1},
+    {6, 10, 12, -1, -1, -1, -1, -1},  {0, 6, 10, 12, -1, -1, -1, -1},
+    {2, 6, 10, 12, -1, -1, -1, -1},   {0, 2, 6, 10, 12, -1, -1, -1},
+    {4, 6, 10, 12, -1, -1, -1, -1},   {0, 4, 6, 10, 12, -1, -1, -1},
+    {2, 4, 6, 10, 12, -1, -1, -1},    {0, 2, 4, 6, 10, 12, -1, -1},
+    {8, 10, 12, -1, -1, -1, -1, -1},  {0, 8, 10, 12, -1, -1, -1, -1},
+    {2, 8, 10, 12, -1, -1, -1, -1},   {0, 2, 8, 10, 12, -1, -1, -1},
+    {4, 8, 10, 12, -1, -1, -1, -1},   {0, 4, 8, 10, 12, -1, -1, -1},
+    {2, 4, 8, 10, 12, -1, -1, -1},    {0, 2, 4, 8, 10, 12, -1, -1},
+    {6, 8, 10, 12, -1, -1, -1, -1},   {0, 6, 8, 10, 12, -1, -1, -1},
+    {2, 6, 8, 10, 12, -1, -1, -1},    {0, 2, 6, 8, 10, 12, -1, -1},
+    {4, 6, 8, 10, 12, -1, -1, -1},    {0, 4, 6, 8, 10, 12, -1, -1},
+    {2, 4, 6, 8, 10, 12, -1, -1},     {0, 2, 4, 6, 8, 10, 12, -1},
+    {14, -1, -1, -1, -1, -1, -1, -1}, {0, 14, -1, -1, -1, -1, -1, -1},
+    {2, 14, -1, -1, -1, -1, -1, -1},  {0, 2, 14, -1, -1, -1, -1, -1},
+    {4, 14, -1, -1, -1, -1, -1, -1},  {0, 4, 14, -1, -1, -1, -1, -1},
+    {2, 4, 14, -1, -1, -1, -1, -1},   {0, 2, 4, 14, -1, -1, -1, -1},
+    {6, 14, -1, -1, -1, -1, -1, -1},  {0, 6, 14, -1, -1, -1, -1, -1},
+    {2, 6, 14, -1, -1, -1, -1, -1},   {0, 2, 6, 14, -1, -1, -1, -1},
+    {4, 6, 14, -1, -1, -1, -1, -1},   {0, 4, 6, 14, -1, -1, -1, -1},
+    {2, 4, 6, 14, -1, -1, -1, -1},    {0, 2, 4, 6, 14, -1, -1, -1},
+    {8, 14, -1, -1, -1, -1, -1, -1},  {0, 8, 14, -1, -1, -1, -1, -1},
+    {2, 8, 14, -1, -1, -1, -1, -1},   {0, 2, 8, 14, -1, -1, -1, -1},
+    {4, 8, 14, -1, -1, -1, -1, -1},   {0, 4, 8, 14, -1, -1, -1, -1},
+    {2, 4, 8, 14, -1, -1, -1, -1},    {0, 2, 4, 8, 14, -1, -1, -1},
+    {6, 8, 14, -1, -1, -1, -1, -1},   {0, 6, 8, 14, -1, -1, -1, -1},
+    {2, 6, 8, 14, -1, -1, -1, -1},    {0, 2, 6, 8, 14, -1, -1, -1},
+    {4, 6, 8, 14, -1, -1, -1, -1},    {0, 4, 6, 8, 14, -1, -1, -1},
+    {2, 4, 6, 8, 14, -1, -1, -1},     {0, 2, 4, 6, 8, 14, -1, -1},
+    {10, 14, -1, -1, -1, -1, -1, -1}, {0, 10, 14, -1, -1, -1, -1, -1},
+    {2, 10, 14, -1, -1, -1, -1, -1},  {0, 2, 10, 14, -1, -1, -1, -1},
+    {4, 10, 14, -1, -1, -1, -1, -1},  {0, 4, 10, 14, -1, -1, -1, -1},
+    {2, 4, 10, 14, -1, -1, -1, -1},   {0, 2, 4, 10, 14, -1, -1, -1},
+    {6, 10, 14, -1, -1, -1, -1, -1},  {0, 6, 10, 14, -1, -1, -1, -1},
+    {2, 6, 10, 14, -1, -1, -1, -1},   {0, 2, 6, 10, 14, -1, -1, -1},
+    {4, 6, 10, 14, -1, -1, -1, -1},   {0, 4, 6, 10, 14, -1, -1, -1},
+    {2, 4, 6, 10, 14, -1, -1, -1},    {0, 2, 4, 6, 10, 14, -1, -1},
+    {8, 10, 14, -1, -1, -1, -1, -1},  {0, 8, 10, 14, -1, -1, -1, -1},
+    {2, 8, 10, 14, -1, -1, -1, -1},   {0, 2, 8, 10, 14, -1, -1, -1},
+    {4, 8, 10, 14, -1, -1, -1, -1},   {0, 4, 8, 10, 14, -1, -1, -1},
+    {2, 4, 8, 10, 14, -1, -1, -1},    {0, 2, 4, 8, 10, 14, -1, -1},
+    {6, 8, 10, 14, -1, -1, -1, -1},   {0, 6, 8, 10, 14, -1, -1, -1},
+    {2, 6, 8, 10, 14, -1, -1, -1},    {0, 2, 6, 8, 10, 14, -1, -1},
+    {4, 6, 8, 10, 14, -1, -1, -1},    {0, 4, 6, 8, 10, 14, -1, -1},
+    {2, 4, 6, 8, 10, 14, -1, -1},     {0, 2, 4, 6, 8, 10, 14, -1},
+    {12, 14, -1, -1, -1, -1, -1, -1}, {0, 12, 14, -1, -1, -1, -1, -1},
+    {2, 12, 14, -1, -1, -1, -1, -1},  {0, 2, 12, 14, -1, -1, -1, -1},
+    {4, 12, 14, -1, -1, -1, -1, -1},  {0, 4, 12, 14, -1, -1, -1, -1},
+    {2, 4, 12, 14, -1, -1, -1, -1},   {0, 2, 4, 12, 14, -1, -1, -1},
+    {6, 12, 14, -1, -1, -1, -1, -1},  {0, 6, 12, 14, -1, -1, -1, -1},
+    {2, 6, 12, 14, -1, -1, -1, -1},   {0, 2, 6, 12, 14, -1, -1, -1},
+    {4, 6, 12, 14, -1, -1, -1, -1},   {0, 4, 6, 12, 14, -1, -1, -1},
+    {2, 4, 6, 12, 14, -1, -1, -1},    {0, 2, 4, 6, 12, 14, -1, -1},
+    {8, 12, 14, -1, -1, -1, -1, -1},  {0, 8, 12, 14, -1, -1, -1, -1},
+    {2, 8, 12, 14, -1, -1, -1, -1},   {0, 2, 8, 12, 14, -1, -1, -1},
+    {4, 8, 12, 14, -1, -1, -1, -1},   {0, 4, 8, 12, 14, -1, -1, -1},
+    {2, 4, 8, 12, 14, -1, -1, -1},    {0, 2, 4, 8, 12, 14, -1, -1},
+    {6, 8, 12, 14, -1, -1, -1, -1},   {0, 6, 8, 12, 14, -1, -1, -1},
+    {2, 6, 8, 12, 14, -1, -1, -1},    {0, 2, 6, 8, 12, 14, -1, -1},
+    {4, 6, 8, 12, 14, -1, -1, -1},    {0, 4, 6, 8, 12, 14, -1, -1},
+    {2, 4, 6, 8, 12, 14, -1, -1},     {0, 2, 4, 6, 8, 12, 14, -1},
+    {10, 12, 14, -1, -1, -1, -1, -1}, {0, 10, 12, 14, -1, -1, -1, -1},
+    {2, 10, 12, 14, -1, -1, -1, -1},  {0, 2, 10, 12, 14, -1, -1, -1},
+    {4, 10, 12, 14, -1, -1, -1, -1},  {0, 4, 10, 12, 14, -1, -1, -1},
+    {2, 4, 10, 12, 14, -1, -1, -1},   {0, 2, 4, 10, 12, 14, -1, -1},
+    {6, 10, 12, 14, -1, -1, -1, -1},  {0, 6, 10, 12, 14, -1, -1, -1},
+    {2, 6, 10, 12, 14, -1, -1, -1},   {0, 2, 6, 10, 12, 14, -1, -1},
+    {4, 6, 10, 12, 14, -1, -1, -1},   {0, 4, 6, 10, 12, 14, -1, -1},
+    {2, 4, 6, 10, 12, 14, -1, -1},    {0, 2, 4, 6, 10, 12, 14, -1},
+    {8, 10, 12, 14, -1, -1, -1, -1},  {0, 8, 10, 12, 14, -1, -1, -1},
+    {2, 8, 10, 12, 14, -1, -1, -1},   {0, 2, 8, 10, 12, 14, -1, -1},
+    {4, 8, 10, 12, 14, -1, -1, -1},   {0, 4, 8, 10, 12, 14, -1, -1},
+    {2, 4, 8, 10, 12, 14, -1, -1},    {0, 2, 4, 8, 10, 12, 14, -1},
+    {6, 8, 10, 12, 14, -1, -1, -1},   {0, 6, 8, 10, 12, 14, -1, -1},
+    {2, 6, 8, 10, 12, 14, -1, -1},    {0, 2, 6, 8, 10, 12, 14, -1},
+    {4, 6, 8, 10, 12, 14, -1, -1},    {0, 4, 6, 8, 10, 12, 14, -1},
+    {2, 4, 6, 8, 10, 12, 14, -1},     {0, 2, 4, 6, 8, 10, 12, 14},
+};
+
+#else
+
+/* Dummy declaration for compilers disliking empty compilation units */
+#define empty_cu_avx2_rej_uniform_table \
+  MLKEM_NAMESPACE(empty_cu_avx2_rej_uniform_table)
+int empty_cu_avx2_rej_uniform_table;
+#endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_aarch64/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_ref/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h
index a6edf844d..09e30f207 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/arith_backend.h
@@ -3,9 +3,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#ifdef MLKEM_NATIVE_ARITH_IMPL_H
-#error Only one ARITH assembly profile can be defined -- did you include multiple profiles?
-#else
+#if !defined(MLKEM_NATIVE_ARITH_IMPL_H)
 #define MLKEM_NATIVE_ARITH_IMPL_H
 
 #include "common.h"
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c
index 2e0fac38a..a20919bc2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.c
@@ -5,6 +5,16 @@
 #include "cbd.h"
 #include <stdint.h>
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define load32_littleendian MLKEM_NAMESPACE(load32_littleendian)
+#define load24_littleendian MLKEM_NAMESPACE(load24_littleendian)
+#define cbd2 MLKEM_NAMESPACE(cbd2)
+#define cbd3 MLKEM_NAMESPACE(cbd3)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        load32_littleendian
  *
@@ -25,6 +35,7 @@ static uint32_t load32_littleendian(const uint8_t x[4])
   return r;
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        load24_littleendian
  *
@@ -36,7 +47,6 @@ static uint32_t load32_littleendian(const uint8_t x[4])
  *
  * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
  **************************************************/
-#if MLKEM_ETA1 == 3
 static uint32_t load24_littleendian(const uint8_t x[3])
 {
   uint32_t r;
@@ -45,7 +55,7 @@ static uint32_t load24_littleendian(const uint8_t x[3])
   r |= (uint32_t)x[2] << 16;
   return r;
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
 /*************************************************
  * Name:        cbd2
@@ -59,13 +69,13 @@ static uint32_t load24_littleendian(const uint8_t x[3])
  **************************************************/
 static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_abs_bound(r->coeffs, 0, (8 * i - 1), 2)))
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 2)))
   {
-    int j;
+    unsigned j;
     uint32_t t = load32_littleendian(buf + 4 * i);
     uint32_t d = t & 0x55555555;
     d += (t >> 1) & 0x55555555;
@@ -73,7 +83,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j - 1, 2)))
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 2)))
     {
       const int16_t a = (d >> (4 * j + 0)) & 0x3;
       const int16_t b = (d >> (4 * j + 2)) & 0x3;
@@ -82,6 +92,7 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
   }
 }
 
+#if MLKEM_ETA1 == 3
 /*************************************************
  * Name:        cbd3
  *
@@ -93,16 +104,15 @@ static void cbd2(poly *r, const uint8_t buf[2 * MLKEM_N / 4])
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
-#if MLKEM_ETA1 == 3
 static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 3)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 3)))
   {
-    int j;
+    unsigned j;
     const uint32_t t = load24_littleendian(buf + 3 * i);
     uint32_t d = t & 0x00249249;
     d += (t >> 1) & 0x00249249;
@@ -111,7 +121,7 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     for (j = 0; j < 4; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 4 && j >= 0 && j <= 4)
-      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j - 1, 3)))
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 3)))
     {
       const int16_t a = (d >> (6 * j + 0)) & 0x7;
       const int16_t b = (d >> (6 * j + 3)) & 0x7;
@@ -119,8 +129,9 @@ static void cbd3(poly *r, const uint8_t buf[3 * MLKEM_N / 4])
     }
   }
 }
-#endif
+#endif /* MLKEM_ETA1 == 3 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 {
 #if MLKEM_ETA1 == 2
@@ -132,6 +143,8 @@ void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 #endif
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 {
 #if MLKEM_ETA2 == 2
@@ -140,3 +153,4 @@ void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 #error "This implementation requires eta2 = 2"
 #endif
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h
index 31c9649e3..a3942ecf0 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbd.h
@@ -20,14 +20,16 @@
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta1(poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1))
 );
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_cbd_eta2 MLKEM_NAMESPACE(poly_cbd_eta2)
 /*************************************************
  * Name:        poly_cbd_eta1
@@ -39,12 +41,14 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *buf: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_cbd_eta2(poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h
index 317a26421..af6fc1477 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/cbmc.h
@@ -11,19 +11,12 @@
 
 #ifndef CBMC
 
-#define STATIC_INLINE_TESTABLE static INLINE
-#define STATIC_TESTABLE static
-
 #define __contract__(x)
 #define __loop__(x)
 #define cassert(x, y)
 
 #else /* CBMC _is_ defined, therefore we're doing proof */
 
-/* expose certain procedures to CBMC proofs that are static otherwise */
-#define STATIC_TESTABLE
-#define STATIC_INLINE_TESTABLE
-
 #define __contract__(x) x
 #define __loop__(x) x
 
@@ -76,7 +69,7 @@
 
 /*
  * Quantifiers
- * Note that the range on qvar is _inclusive_ between qvar_lb .. qvar_ub
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
  * https://diffblue.github.io/cbmc/contracts-quantifiers.html
  */
 
@@ -84,18 +77,18 @@
  * Prevent clang-format from corrupting CBMC's special ==> operator
  */
 /* clang-format off */
-#define forall(type, qvar, qvar_lb, qvar_ub, predicate)           \
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
   __CPROVER_forall                                                \
   {                                                               \
-    type qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==> (predicate)  \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
   }
 
-#define EXISTS(type, qvar, qvar_lb, qvar_ub, predicate)         \
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
   __CPROVER_exists                                              \
   {                                                             \
-    type qvar;                                                  \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) && (predicate) \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
   }
 /* clang-format on */
 
@@ -107,7 +100,7 @@
  * Boolean-value predidate that asserts that "all values of array_var are in
  * range value_lb .. value_ub (inclusive)"
  * Example:
- *  array_bound(a->coeffs, 0, MLKEM_N-1, -(MLKEM_Q - 1), MLKEM_Q - 1)
+ *  array_bound(a->coeffs, 0, MLKEM_N, -(MLKEM_Q - 1), MLKEM_Q - 1)
  * expands to
  *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( (-(MLKEM_Q -
  *  1) <= a->coeffs[k]) && (a->coeffs[k] <= (MLKEM_Q - 1))) }
@@ -120,18 +113,18 @@
 #define CBMC_CONCAT_(left, right) left##right
 #define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
 
-#define array_bound_core(indextype, qvar, qvar_lb, qvar_ub, array_var, \
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
                          value_lb, value_ub)                           \
   __CPROVER_forall                                                     \
   {                                                                    \
-    indextype qvar;                                                    \
-    ((qvar_lb) <= (qvar) && (qvar) <= (qvar_ub)) ==>                   \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
         (((value_lb) <= (array_var[(qvar)])) &&                        \
         ((array_var[(qvar)]) <= (value_ub)))                           \
   }
 
 #define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
-  array_bound_core(int, CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb), \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
                    (qvar_ub), (array_var), (value_lb), (value_ub))
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h
index 8177b0b50..76141eb96 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/common.h
@@ -7,6 +7,8 @@
 
 #if defined(MLKEM_NATIVE_CONFIG_FILE)
 #include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
 #endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #include "params.h"
@@ -22,9 +24,21 @@
 #endif
 #endif
 
-/* This must come after the inclusion of the backend metadata
- * since the backend choice may be part of the namespace. */
-#include "namespace.h"
+#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
+#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
+#endif
+
+#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
+#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
+#endif
+
+/* For a monobuild (where all compilation units are merged into one), mark
+ * all non-public API as static since they don't need external linkage. */
+#if !defined(MLKEM_NATIVE_MONOBUILD)
+#define MLKEM_NATIVE_INTERNAL_API
+#else
+#define MLKEM_NATIVE_INTERNAL_API static
+#endif
 
 /* On Apple platforms, we need to emit leading underscore
  * in front of assembly symbols. We thus introducee a separate
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h
index 31040a471..3caaf6ba9 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/config.h
@@ -25,25 +25,36 @@
  * Name:        MLKEM_NATIVE_CONFIG_FILE
  *
  * Description: If defined, this is a header that will be included instead
- *              of mlkem/config.h.
- *
- *              This _must_ be set on the command line using
- *              `-DMLKEM_NATIVE_CONFIG_FILE="..."`.
+ *              of this default configuration file mlkem/config.h.
  *
  *              When you need to build mlkem-native in multiple configurations,
- *              using varying MLKEM_NATIE_CONFIG_FILE can be more convenient
+ *              using varying MLKEM_NATIVE_CONFIG_FILE can be more convenient
  *              then configuring everything through CFLAGS.
  *
+ *              To use, MLKEM_NATIVE_CONFIG_FILE _must_ be defined prior
+ *              to the inclusion of any mlkem-native headers. For example,
+ *              it can be set by passing `-DMLKEM_NATIVE_CONFIG_FILE="..."`
+ *              on the command line.
+ *
  *****************************************************************************/
 /* #define MLKEM_NATIVE_CONFIG_FILE "config.h" */
 
+
+#if !defined(MLKEM_NAMESPACE_PREFIX)
+#error "MLKEM_NAMESPACE_PREFIX not defined!"
+#endif
+
+
+#define _NMSP_CONCAT(a, b) a##_##b
+#define NMSP_CONCAT(a, b) _NMSP_CONCAT(a, b)
+
 /******************************************************************************
  * Name:        MLKEM_NAMESPACE
  *
  * Description: The macros to use to namespace global symbols
  *              from mlkem/.
  *****************************************************************************/
-#define MLKEM_NAMESPACE(sym) MLKEM_DEFAULT_NAMESPACE(sym)
+#define MLKEM_NAMESPACE(sym) NMSP_CONCAT(MLKEM_NAMESPACE_PREFIX, sym)
 
 /******************************************************************************
  * Name:        FIPS202_NAMESPACE
@@ -95,4 +106,35 @@
 #define MLKEM_NATIVE_FIPS202_BACKEND "fips202/native/default.h"
 #endif /* MLKEM_NATIVE_FIPS202_BACKEND */
 
+/*************************  Config internals  ********************************/
+
+/* Default namespace
+ *
+ * Don't change this. If you need a different namespace, re-define
+ * MLKEM_NAMESPACE above instead, and remove the following.
+ */
+
+/*
+ * The default FIPS202 namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_FIPS202_C_
+ */
+
+#define FIPS202_DEFAULT_NAMESPACE___(x1, x2) x1##_##x2
+#define FIPS202_DEFAULT_NAMESPACE__(x1, x2) FIPS202_DEFAULT_NAMESPACE___(x1, x2)
+
+#define FIPS202_DEFAULT_NAMESPACE(s) \
+  FIPS202_DEFAULT_NAMESPACE__(PQCP_MLKEM_NATIVE_FIPS202, s)
+
+/*
+ * The default MLKEM namespace is
+ *
+ *   PQCP_MLKEM_NATIVE_MLKEM<LEVEL>_<BACKEND>_
+ *
+ * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
+ */
+
+
 #endif /* MLkEM_NATIVE_CONFIG_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h
index 5838ae4bf..5f7d02ba6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/debug/debug.h
@@ -25,6 +25,7 @@
  *              - description: Textual description of assertion
  *              - val: Value asserted to be non-zero
  **************************************************/
+#define mlkem_debug_assert MLKEM_NAMESPACE(mlkem_debug_assert)
 void mlkem_debug_assert(const char *file, int line, const char *description,
                         const int val);
 
@@ -45,12 +46,14 @@ void mlkem_debug_assert(const char *file, int line, const char *description,
  *              - lower_bound_exclusive: Exclusive lower bound
  *              - upper_bound_exclusive: Exclusive upper bound
  **************************************************/
+#define mlkem_debug_check_bounds MLKEM_NAMESPACE(mlkem_debug_check_bounds)
 void mlkem_debug_check_bounds(const char *file, int line,
                               const char *description, const int16_t *ptr,
                               unsigned len, int lower_bound_exclusive,
                               int upper_bound_exclusive);
 
 /* Print error message to stderr alongside file and line information */
+#define mlkem_debug_print_error MLKEM_NAMESPACE(mlkem_debug_print_error)
 void mlkem_debug_print_error(const char *file, int line, const char *msg);
 
 /* Check assertion, calling exit() upon failure
@@ -163,7 +166,8 @@ void mlkem_debug_print_error(const char *file, int line, const char *msg);
   typedef struct                                                         \
   {                                                                      \
     unsigned int MLKEM_CONCAT(static_assertion_, msg) : (cond) ? 1 : -1; \
-  } MLKEM_CONCAT(static_assertion_, msg) __attribute__((unused));
+  } MLKEM_CONCAT(MLKEM_NAMESPACE(static_assertion_), msg)                \
+      __attribute__((unused));
 
 #define MLKEM_STATIC_ASSERT_ADD_LINE0(cond, suffix) \
   MLKEM_STATIC_ASSERT_DEFINE(cond, MLKEM_CONCAT(at_line_, suffix))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c
index 0fa11259b..3343c8f2a 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.c
@@ -21,6 +21,21 @@
 
 #include "cbmc.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define pack_pk MLKEM_NAMESPACE(pack_pk)
+#define unpack_pk MLKEM_NAMESPACE(unpack_pk)
+#define pack_sk MLKEM_NAMESPACE(pack_sk)
+#define unpack_sk MLKEM_NAMESPACE(unpack_sk)
+#define pack_ciphertext MLKEM_NAMESPACE(pack_ciphertext)
+#define unpack_ciphertext MLKEM_NAMESPACE(unpack_ciphertext)
+#define gen_matrix_entry_x4 MLKEM_NAMESPACE(gen_matrix_entry_x4)
+#define gen_matrix_entry MLKEM_NAMESPACE(gen_matrix_entry)
+#define matvec_mul MLKEM_NAMESPACE(matvec_mul)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        pack_pk
  *
@@ -139,8 +154,7 @@ static void unpack_ciphertext(polyvec *b, poly *v,
  * Generate four A matrix entries from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
+static void gen_matrix_entry_x4(poly *vec, uint8_t *seed[4])
 __contract__(
   requires(memory_no_alias(vec, sizeof(poly) * 4))
   requires(memory_no_alias(seed, sizeof(uint8_t*) * 4))
@@ -149,10 +163,10 @@ __contract__(
   requires(memory_no_alias(seed[2], MLKEM_SYMBYTES + 2))
   requires(memory_no_alias(seed[3], MLKEM_SYMBYTES + 2))
   assigns(memory_slice(vec, sizeof(poly) * 4))
-  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
-  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   /* Temporary buffers for XOF output before rejection sampling */
   uint8_t buf0[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -195,10 +209,10 @@ __contract__(
        object_whole(buf1), object_whole(buf2), object_whole(buf3))
     invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
     invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
-    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2] - 1, 0, (MLKEM_Q - 1)))
-    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3] - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr[0] > 0 ==> array_bound(vec[0].coeffs, 0, ctr[0], 0, (MLKEM_Q - 1)))
+    invariant(ctr[1] > 0 ==> array_bound(vec[1].coeffs, 0, ctr[1], 0, (MLKEM_Q - 1)))
+    invariant(ctr[2] > 0 ==> array_bound(vec[2].coeffs, 0, ctr[2], 0, (MLKEM_Q - 1)))
+    invariant(ctr[3] > 0 ==> array_bound(vec[3].coeffs, 0, ctr[3], 0, (MLKEM_Q - 1))))
   {
     xof_x4_squeezeblocks(buf0, buf1, buf2, buf3, 1, &statex);
     ctr[0] = rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf0, buflen);
@@ -214,13 +228,12 @@ __contract__(
  * Generate a single A matrix entry from a seed, using rejection
  * sampling on the output of a XOF.
  */
-STATIC_TESTABLE
-void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
 __contract__(
   requires(memory_no_alias(entry, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
   assigns(memory_slice(entry, sizeof(poly)))
-  ensures(array_bound(entry->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 {
   xof_ctx state;
   uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * XOF_RATE];
@@ -242,33 +255,37 @@ __contract__(
   __loop__(
     assigns(ctr, state, memory_slice(entry, sizeof(poly)), object_whole(buf))
     invariant(0 <= ctr && ctr <= MLKEM_N)
-    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr - 1,
+    invariant(ctr > 0 ==> array_bound(entry->coeffs, 0, ctr,
                                           0, (MLKEM_Q - 1))))
   {
     xof_squeezeblocks(buf, 1, &state);
-    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, XOF_RATE);
+    ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
   }
 
   xof_release(&state);
 }
 
 #if !defined(MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER)
-STATIC_INLINE_TESTABLE
-void poly_permute_bitrev_to_custom(poly *data)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define poly_permute_bitrev_to_custom \
+  MLKEM_NAMESPACE(poly_permute_bitrev_to_custom)
+
+static INLINE void poly_permute_bitrev_to_custom(poly *data)
 __contract__(
   /* We don't specify that this should be a permutation, but only
    * that it does not change the bound established at the end of gen_matrix. */
   requires(memory_no_alias(data, sizeof(poly)))
-  requires(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))
+  requires(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))
   assigns(memory_slice(data, sizeof(poly)))
-  ensures(array_bound(data->coeffs, 0, MLKEM_N - 1, 0, MLKEM_Q - 1))) { ((void)data); }
+  ensures(array_bound(data->coeffs, 0, MLKEM_N, 0, MLKEM_Q - 1))) { ((void)data); }
 #endif /* MLKEM_USE_NATIVE_NTT_CUSTOM_ORDER */
 
 /* Not static for benchmarking */
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 {
-  int i;
-  unsigned int j;
+  unsigned i, j;
   /*
    * We generate four separate seed arrays rather than a single one to work
    * around limitations in CBMC function contracts dealing with disjoint slices
@@ -369,20 +386,19 @@ void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
  *              - polyvec *vc: Mulcache for v, computed via
  *                  polyvec_mulcache_compute().
  **************************************************/
-STATIC_TESTABLE
-void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
-                const polyvec_mulcache *vc)
+static void matvec_mul(polyvec *out, const polyvec a[MLKEM_K], const polyvec *v,
+                       const polyvec_mulcache *vc)
 __contract__(
   requires(memory_no_alias(out, sizeof(polyvec)))
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(v, sizeof(polyvec)))
   requires(memory_no_alias(vc, sizeof(polyvec_mulcache)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-  forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX))))
+  requires(forall(k0, 0, MLKEM_K,
+    forall(k1, 0, MLKEM_K,
+      array_abs_bound(a[k0].vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX))))
   assigns(object_whole(out)))
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   __loop__(
     assigns(i, object_whole(out))
@@ -396,6 +412,7 @@ __contract__(
 
 STATIC_ASSERT(NTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_enc_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -459,6 +476,7 @@ STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA1 < INT16_MAX, indcpa_enc_bound_0)
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_ETA2 + MLKEM_Q < INT16_MAX,
               indcpa_enc_bound_1)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -518,6 +536,7 @@ void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
 /* Check that the arithmetic in indcpa_dec() does not overflow */
 STATIC_ASSERT(INVNTT_BOUND + MLKEM_Q < INT16_MAX, indcpa_dec_bound_0)
 
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h
index 7e2a0b247..ac631cef2 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/indcpa.h
@@ -23,14 +23,15 @@
  *              - const uint8_t *seed: pointer to input seed
  *              - int transposed: boolean deciding whether A or A^T is generated
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void gen_matrix(polyvec *a, const uint8_t seed[MLKEM_SYMBYTES], int transposed)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec) * MLKEM_K))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   requires(transposed == 0 || transposed == 1)
   assigns(object_whole(a))
-  ensures(forall(int, x, 0, MLKEM_K - 1, forall(int, y, 0, MLKEM_K - 1,
-  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))));
+  ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K,
+  array_bound(a[x].vec[y].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))));
 );
 
 #define indcpa_keypair_derand MLKEM_NAMESPACE(indcpa_keypair_derand)
@@ -47,6 +48,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input randomness
  *                             (of length MLKEM_SYMBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
                            uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
                            const uint8_t coins[MLKEM_SYMBYTES])
@@ -74,6 +76,7 @@ __contract__(
  *              - const uint8_t *coins: pointer to input random coins used as
  *seed (of length MLKEM_SYMBYTES) to deterministically generate all randomness
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
@@ -100,6 +103,7 @@ __contract__(
  *              - const uint8_t *sk: pointer to input secret key
  *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
                 const uint8_t c[MLKEM_INDCPA_BYTES],
                 const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c
index 03e997af3..5779d3273 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.c
@@ -2,15 +2,24 @@
  * Copyright (c) 2024 The mlkem-native project authors
  * SPDX-License-Identifier: Apache-2.0
  */
-#include "kem.h"
 #include <stddef.h>
 #include <stdint.h>
 #include <string.h>
+
 #include "indcpa.h"
+#include "kem.h"
 #include "randombytes.h"
 #include "symmetric.h"
 #include "verify.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define check_pk MLKEM_NAMESPACE(check_pk)
+#define check_sk MLKEM_NAMESPACE(check_sk)
+/* End of static namespacing */
+
 #if defined(CBMC)
 /* Redeclaration with contract needed for CBMC only */
 int memcmp(const void *str1, const void *str2, size_t n)
@@ -28,11 +37,12 @@ __contract__(
  *              Described in Section 7.2 of FIPS203.
  *
  * Arguments:   - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
- **
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
+static int check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   polyvec p;
   uint8_t p_reencoded[MLKEM_POLYVECBYTES];
@@ -56,11 +66,12 @@ static int check_pk(const uint8_t pk[MLKEM_PUBLICKEYBYTES])
  *              Described in Section 7.3 of FIPS203.
  *
  * Arguments:   - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 on success, and -1 on failure
  **************************************************/
-static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
+static int check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t test[MLKEM_SYMBYTES];
   /*
@@ -68,8 +79,8 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
    * no public information is leaked through the runtime or the return value
    * of this function.
    */
-  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_PUBLICKEYBYTES);
-  if (memcmp(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+  hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, MLKEM_INDCCA_PUBLICKEYBYTES);
+  if (memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
              MLKEM_SYMBYTES))
   {
     return -1;
@@ -77,19 +88,22 @@ static int check_sk(const uint8_t sk[MLKEM_SECRETKEYBYTES])
   return 0;
 }
 
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 {
   indcpa_keypair_derand(pk, sk, coins);
-  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_PUBLICKEYBYTES);
-  hash_h(sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
-         MLKEM_PUBLICKEYBYTES);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+         MLKEM_INDCCA_PUBLICKEYBYTES);
   /* Value z for pseudo-random output on reject */
-  memcpy(sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, coins + MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   return 0;
 }
 
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
   randombytes(coins, 2 * MLKEM_SYMBYTES);
@@ -97,8 +111,10 @@ int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
   return 0;
 }
 
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 {
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
   /* Will contain key, coins */
@@ -112,7 +128,7 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   memcpy(buf, coins, MLKEM_SYMBYTES);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_PUBLICKEYBYTES);
+  hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* coins are in kr+MLKEM_SYMBYTES */
@@ -122,14 +138,18 @@ int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
   return 0;
 }
 
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 {
   ALIGN uint8_t coins[MLKEM_SYMBYTES];
   randombytes(coins, MLKEM_SYMBYTES);
   return crypto_kem_enc_derand(ct, ss, pk, coins);
 }
 
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 {
   uint8_t fail;
   ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
@@ -145,25 +165,26 @@ int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
   indcpa_dec(buf, ct, sk);
 
   /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf + MLKEM_SYMBYTES, sk + MLKEM_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES,
-         MLKEM_SYMBYTES);
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
   hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
 
   /* Recompute and compare ciphertext */
   {
     /* Temporary buffer */
-    ALIGN uint8_t cmp[MLKEM_CIPHERTEXTBYTES];
+    ALIGN uint8_t cmp[MLKEM_INDCCA_CIPHERTEXTBYTES];
     /* coins are in kr+MLKEM_SYMBYTES */
     indcpa_enc(cmp, buf, pk, kr + MLKEM_SYMBYTES);
-    fail = ct_memcmp(ct, cmp, MLKEM_CIPHERTEXTBYTES);
+    fail = ct_memcmp(ct, cmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
   }
 
   /* Compute rejection key */
   {
     /* Temporary buffer */
-    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_CIPHERTEXTBYTES];
-    memcpy(tmp, sk + MLKEM_SECRETKEYBYTES - MLKEM_SYMBYTES, MLKEM_SYMBYTES);
-    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_CIPHERTEXTBYTES);
+    ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+    memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+           MLKEM_SYMBYTES);
+    memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
     hash_j(ss, tmp, sizeof(tmp));
   }
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h
index 2ba4af066..074e4771e 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/kem.h
@@ -7,22 +7,24 @@
 
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
 
-#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES
-#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES
-#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES
-#define CRYPTO_BYTES MLKEM_SSBYTES
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#include "mlkem_native.h"
 
-#if (MLKEM_K == 2)
-#define CRYPTO_ALGNAME "Kyber512"
-#elif (MLKEM_K == 3)
-#define CRYPTO_ALGNAME "Kyber768"
-#elif (MLKEM_K == 4)
-#define CRYPTO_ALGNAME "Kyber1024"
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
 #endif
 
-#define crypto_kem_keypair_derand MLKEM_NAMESPACE(keypair_derand)
 /*************************************************
  * Name:        crypto_kem_keypair_derand
  *
@@ -30,25 +32,28 @@
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *              - uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with 2*MLKEM_SYMBYTES
- *random bytes)
+ *                 random bytes)
  **
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins)
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t *coins)
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_keypair MLKEM_NAMESPACE(keypair)
 /*************************************************
  * Name:        crypto_kem_keypair
  *
@@ -56,21 +61,23 @@ __contract__(
  *              for CCA-secure ML-KEM key encapsulation mechanism
  *
  * Arguments:   - uint8_t *pk: pointer to output public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - uint8_t *sk: pointer to output private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
  *
  * Returns 0 (success)
  **************************************************/
-int crypto_kem_keypair(uint8_t *pk, uint8_t *sk)
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(pk))
   assigns(object_whole(sk))
 );
 
-#define crypto_kem_enc_derand MLKEM_NAMESPACE(enc_derand)
 /*************************************************
  * Name:        crypto_kem_enc_derand
  *
@@ -78,30 +85,33 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
  *              - const uint8_t *coins: pointer to input randomness
  *                (an already allocated array filled with MLKEM_SYMBYTES random
- *bytes)
+ *                 bytes)
  **
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk,
-                          const uint8_t *coins)
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   requires(memory_no_alias(coins, MLKEM_SYMBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_enc MLKEM_NAMESPACE(enc)
 /*************************************************
  * Name:        crypto_kem_enc
  *
@@ -109,25 +119,28 @@ __contract__(
  *              secret for given public key
  *
  * Arguments:   - uint8_t *ct: pointer to output cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *pk: pointer to input public key
- *                (an already allocated array of MLKEM_PUBLICKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
  * of FIPS203) fails.
  **************************************************/
-int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk)
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
 __contract__(
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(pk, MLKEM_PUBLICKEYBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
   assigns(object_whole(ct))
   assigns(object_whole(ss))
 );
 
-#define crypto_kem_dec MLKEM_NAMESPACE(dec)
 /*************************************************
  * Name:        crypto_kem_dec
  *
@@ -137,20 +150,24 @@ __contract__(
  * Arguments:   - uint8_t *ss: pointer to output shared secret
  *                (an already allocated array of MLKEM_SSBYTES bytes)
  *              - const uint8_t *ct: pointer to input cipher text
- *                (an already allocated array of MLKEM_CIPHERTEXTBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *bytes)
  *              - const uint8_t *sk: pointer to input private key
- *                (an already allocated array of MLKEM_SECRETKEYBYTES bytes)
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *bytes)
  *
  * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
  * FIPS203) fails.
  *
  * On failure, ss will contain a pseudo-random value.
  **************************************************/
-int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk)
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
 __contract__(
   requires(memory_no_alias(ss, MLKEM_SSBYTES))
-  requires(memory_no_alias(ct, MLKEM_CIPHERTEXTBYTES))
-  requires(memory_no_alias(sk, MLKEM_SECRETKEYBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
   assigns(object_whole(ss))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h
new file mode 100644
index 000000000..6cbaa9122
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/mlkem_native.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * To use this header, make sure one of the following holds:
+ *
+ * - The config.h used for the build is available in the include paths.
+ * - The values of BUILD_INFO_LVL and BUILD_INFO_NAMESPACE are set, reflecting
+ *   the security level (512/768/1024) and namespace of the build.
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header.
+ */
+
+/* NOTE: To use multiple instances of this header, use separate guards. */
+#ifndef MLKEM_NATIVE_H
+#define MLKEM_NATIVE_H
+
+#include <stdint.h>
+
+/*************************** Build information ********************************/
+
+/*
+ * Provide security level (BUILD_INFO_LVL) and namespacing
+ * (BUILD_INFO_NAMESPACE)
+ *
+ * By default, this is extracted from the configuration used for the build,
+ * but you can also set it manually to avoid a dependency on the build config.
+ */
+
+/* Skip this if BUILD_INFO_LVL has already been set */
+#if !defined(BUILD_INFO_LVL)
+
+/* Option 1: Extract from config */
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if MLKEM_K == 2
+#define BUILD_INFO_LVL 512
+#elif MLKEM_K == 3
+#define BUILD_INFO_LVL 768
+#elif MLKEM_K == 4
+#define BUILD_INFO_LVL 1024
+#else
+#error MLKEM_K not set by config file
+#endif
+
+#ifndef MLKEM_NAMESPACE
+#error MLKEM_NAMESPACE not set by config file
+#endif
+
+#define BUILD_INFO_NAMESPACE(sym) MLKEM_NAMESPACE(sym)
+
+#endif /* BUILD_INFO_LVL */
+
+/* Option 2: Provide BUILD_INFO_LVL and BUILD_INFO_NAMESPACE manually */
+
+/* #define BUILD_INFO_LVL            ADJUSTME */
+/* #define BUILD_INFO_NAMESPACE(sym) ADJUSTME */
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)], const uint8_t *coins);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 (success)
+ **************************************************/
+int BUILD_INFO_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the public key modulus check (see Section 7.2
+ * of FIPS203) fails.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)], uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns 0 on success, and -1 if the secret key hash check (see Section 7.3 of
+ * FIPS203) fails.
+ *
+ * On failure, ss will contain a pseudo-random value.
+ **************************************************/
+int BUILD_INFO_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)]);
+
+/****************************** Standard API *********************************/
+
+/* If desired, export API in CRYPTO_xxx and crypto_kem_xxx format as used
+ * e.g. by SUPERCOP and NIST.
+ *
+ * Remove this if you don't need it, or if you need multiple instances
+ * of this header. */
+
+#if !defined(BUILD_INFO_NO_STANDARD_API)
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(BUILD_INFO_LVL)
+#define CRYPTO_CIPHERTEXTBYTES MLKEM_CIPHERTEXTBYTES(BUILD_INFO_LVL)
+
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand BUILD_INFO_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair BUILD_INFO_NAMESPACE(keypair)
+#define crypto_kem_enc_derand BUILD_INFO_NAMESPACE(enc_derand)
+#define crypto_kem_enc BUILD_INFO_NAMESPACE(enc)
+#define crypto_kem_dec BUILD_INFO_NAMESPACE(dec)
+#endif /* BUILD_INFO_NO_STANDARD_API */
+
+/********************************* Cleanup ************************************/
+
+/* Unset build information to allow multiple instances of this header.
+ * Keep this commented out when using the standard API. */
+/* #undef BUILD_INFO_LVL */
+/* #undef BUILD_INFO_NAMESPACE */
+
+#endif /* MLKEM_NATIVE_API_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/namespace.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/namespace.h
deleted file mode 100644
index 8c409fb0c..000000000
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/namespace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2024 The mlkem-native project authors
- * SPDX-License-Identifier: Apache-2.0
- */
-#ifndef MLKEM_NATIVE_NAMESPACE_H
-#define MLKEM_NATIVE_NAMESPACE_H
-
-#if !defined(MLKEM_NATIVE_ARITH_BACKEND_NAME)
-#define MLKEM_NATIVE_ARITH_BACKEND_NAME C
-#endif
-
-/* Don't change parameters below this line */
-#if (MLKEM_K == 2)
-#define MLKEM_PARAM_NAME MLKEM512
-#elif (MLKEM_K == 3)
-#define MLKEM_PARAM_NAME MLKEM768
-#elif (MLKEM_K == 4)
-#define MLKEM_PARAM_NAME MLKEM1024
-#else
-#error "MLKEM_K must be in {2,3,4}"
-#endif
-
-#define ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) x1##_##x2##_##x3##_##x4
-#define __MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4) \
-  ___MLKEM_DEFAULT_NAMESPACE(x1, x2, x3, x4)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_<PARAM_NAME>_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_MLKEM512_AARCH64_OPT_
- */
-#define MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-#define _MLKEM_DEFAULT_NAMESPACE(s)                               \
-  __MLKEM_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE, MLKEM_PARAM_NAME, \
-                            MLKEM_NATIVE_ARITH_BACKEND_NAME, s)
-
-#if !defined(MLKEM_NATIVE_FIPS202_BACKEND_NAME)
-#define MLKEM_NATIVE_FIPS202_BACKEND_NAME C
-#endif
-
-#define ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) x1##_##x2##_##x3
-#define __FIPS202_DEFAULT_NAMESPACE(x1, x2, x3) \
-  ___FIPS202_DEFAULT_NAMESPACE(x1, x2, x3)
-
-/*
- * NAMESPACE is PQCP_MLKEM_NATIVE_FIPS202_<BACKEND>_
- * e.g., PQCP_MLKEM_NATIVE_FIPS202_X86_64_XKCP_
- */
-#define FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-#define _FIPS202_DEFAULT_NAMESPACE(s)                     \
-  __FIPS202_DEFAULT_NAMESPACE(_PQCP_MLKEM_NATIVE_FIPS202, \
-                              MLKEM_NATIVE_FIPS202_BACKEND_NAME, s)
-
-#endif /* MLKEM_NATIVE_NAMESPACE_H */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c
index 178e8467c..c30a37b0c 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.c
@@ -9,6 +9,15 @@
 #include "ntt.h"
 #include "reduce.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define ntt_butterfly_block MLKEM_NAMESPACE(ntt_butterfly_block)
+#define ntt_layer MLKEM_NAMESPACE(ntt_layer)
+#define invntt_layer MLKEM_NAMESPACE(invntt_layer)
+/* End of static namespacing */
+
 #if !defined(MLKEM_USE_NATIVE_NTT)
 /*
  * Computes a block CT butterflies with a fixed twiddle factor,
@@ -36,20 +45,19 @@
  *          4 -- 6
  *             5 -- 7
  */
-STATIC_TESTABLE
-void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start, int len,
-                         int bound)
+static void ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, int start,
+                                int len, int bound)
 __contract__(
   requires(0 <= start && start < MLKEM_N)
   requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
   requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
   requires(-HALF_Q < zeta && zeta < HALF_Q)
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
-  requires(array_abs_bound(r, 0, start - 1, bound + MLKEM_Q))
-  requires(array_abs_bound(r, start, MLKEM_N - 1, bound))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, start + 2*len - 1, bound + MLKEM_Q))
-  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N - 1, bound)))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
 {
   /* `bound` is a ghost variable only needed in the CBMC specification */
   int j;
@@ -61,10 +69,10 @@ __contract__(
      * Coefficients are updated in strided pairs, so the bounds for the
      * intermediate states alternate twice between the old and new bound
      */
-    invariant(array_abs_bound(r, 0,           j - 1,           bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j,           start + len - 1, bound))
-    invariant(array_abs_bound(r, start + len, j + len - 1,     bound + MLKEM_Q))
-    invariant(array_abs_bound(r, j + len,     MLKEM_N - 1,     bound)))
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
   {
     int16_t t;
     t = fqmul(r[j + len], zeta);
@@ -85,14 +93,13 @@ __contract__(
  *   official Kyber implementation here, merely adding `layer` as
  *   a ghost variable for the specifications.
  */
-STATIC_TESTABLE
-void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
+static void ntt_layer(int16_t r[MLKEM_N], int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(1 <= layer && layer <= 7 && len == (MLKEM_N >> layer))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1))
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, (layer + 1) * MLKEM_Q - 1)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q - 1)))
 {
   int start, k;
   /* `layer` is a ghost variable only needed in the CBMC specification */
@@ -103,8 +110,8 @@ __contract__(
   __loop__(
     invariant(0 <= start && start < MLKEM_N + 2 * len)
     invariant(0 <= k && k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
-    invariant(array_abs_bound(r, 0, start - 1, (layer * MLKEM_Q - 1) + MLKEM_Q))
-    invariant(array_abs_bound(r, start, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, start, (layer * MLKEM_Q - 1) + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     int16_t zeta = zetas[k++];
     ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q - 1);
@@ -120,6 +127,7 @@ __contract__(
  * the proof may need strengthening.
  */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   int len, layer;
@@ -130,7 +138,7 @@ void poly_ntt(poly *p)
   for (len = 128, layer = 1; len >= 2; len >>= 1, layer++)
   __loop__(
     invariant(1 <= layer && layer <= 8 && len == (MLKEM_N >> layer))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, layer * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q - 1)))
   {
     ntt_layer(r, len, layer);
   }
@@ -143,6 +151,7 @@ void poly_ntt(poly *p)
 /* Check that bound for native NTT implies contractual bound */
 STATIC_ASSERT(NTT_BOUND_NATIVE <= NTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *p)
 {
   POLY_BOUND_MSG(p, MLKEM_Q, "native ntt input");
@@ -158,15 +167,14 @@ void poly_ntt(poly *p)
 STATIC_ASSERT(INVNTT_BOUND_REF <= INVNTT_BOUND, invntt_bound)
 
 /* Compute one layer of inverse NTT */
-STATIC_TESTABLE
-void invntt_layer(int16_t *r, int len, int layer)
+static void invntt_layer(int16_t *r, int len, int layer)
 __contract__(
   requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
   requires(2 <= len && len <= 128 && 1 <= layer && layer <= 7)
   requires(len == (1 << (8 - layer)))
-  requires(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
   assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
-  ensures(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
 {
   int start, k;
   /* `layer` is a ghost variable used only in the specification */
@@ -174,7 +182,7 @@ __contract__(
   k = MLKEM_N / len - 1;
   for (start = 0; start < MLKEM_N; start += 2 * len)
   __loop__(
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
     invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
     /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
     invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
@@ -185,7 +193,7 @@ __contract__(
     __loop__(
       invariant(start <= j && j <= start + len)
       invariant(0 <= start && start <= MLKEM_N && 0 <= k && k <= 127)
-      invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
     {
       int16_t t = r[j];
       r[j] = barrett_reduce(t + r[j + len]);
@@ -195,6 +203,7 @@ __contract__(
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   /*
@@ -209,7 +218,7 @@ void poly_invntt_tomont(poly *p)
   for (j = 0; j < MLKEM_N; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N)
-    invariant(array_abs_bound(r, 0, j - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
   {
     r[j] = fqmul(r[j], f);
   }
@@ -218,7 +227,7 @@ void poly_invntt_tomont(poly *p)
   for (len = 2, layer = 7; len <= 128; len <<= 1, layer--)
   __loop__(
     invariant(2 <= len && len <= 256 && 0 <= layer && layer <= 7 && len == (1 << (8 - layer)))
-    invariant(array_abs_bound(r, 0, MLKEM_N - 1, MLKEM_Q)))
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
   {
     invntt_layer(p->coeffs, len, layer);
   }
@@ -230,6 +239,7 @@ void poly_invntt_tomont(poly *p)
 /* Check that bound for native invNTT implies contractual bound */
 STATIC_ASSERT(INVNTT_BOUND_NATIVE <= INVNTT_BOUND, invntt_bound)
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *p)
 {
   intt_native(p);
@@ -237,6 +247,7 @@ void poly_invntt_tomont(poly *p)
 }
 #endif /* MLKEM_USE_NATIVE_INTT */
 
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h
index efa38ecc9..dfe919869 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/ntt.h
@@ -32,12 +32,13 @@ extern const int16_t zetas[128];
  *
  * Arguments:   - poly *p: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_ntt(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_Q - 1))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q - 1))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, NTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, NTT_BOUND - 1))
 );
 
 #define poly_invntt_tomont MLKEM_NAMESPACE(poly_invntt_tomont)
@@ -57,11 +58,12 @@ __contract__(
  *
  * Arguments:   - uint16_t *a: pointer to in/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_invntt_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, INVNTT_BOUND - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, INVNTT_BOUND - 1))
 );
 
 #define basemul_cached MLKEM_NAMESPACE(basemul_cached)
@@ -85,15 +87,16 @@ __contract__(
  *            - b_cached: Some precomputed value, typically derived from
  *                   b1 and a twiddle factor. Can be an arbitary int16_t.
  ************************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void basemul_cached(int16_t r[2], const int16_t a[2], const int16_t b[2],
                     int16_t b_cached)
 __contract__(
   requires(memory_no_alias(r, 2 * sizeof(int16_t)))
   requires(memory_no_alias(a, 2 * sizeof(int16_t)))
   requires(memory_no_alias(b, 2 * sizeof(int16_t)))
-  requires(array_abs_bound(a, 0, 1, UINT12_MAX))
+  requires(array_abs_bound(a, 0, 2, UINT12_MAX))
   assigns(memory_slice(r, 2 * sizeof(int16_t)))
-  ensures(array_abs_bound(r, 0, 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r, 0, 2, 2 * MLKEM_Q - 1))
 );
 
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h
index 586c31d33..d9a24a38b 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/params.h
@@ -5,7 +5,11 @@
 #ifndef PARAMS_H
 #define PARAMS_H
 
+#if defined(MLKEM_NATIVE_CONFIG_FILE)
+#include MLKEM_NATIVE_CONFIG_FILE
+#else
 #include "config.h"
+#endif /* MLKEM_NATIVE_CONFIG_FILE */
 
 #if !defined(MLKEM_K)
 #error MLKEM_K is not defined
@@ -22,16 +26,19 @@
 #define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
 
 #if MLKEM_K == 2
+#define MLKEM_LVL 512
 #define MLKEM_ETA1 3
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 3
+#define MLKEM_LVL 768
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 128
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 320
 #define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
 #elif MLKEM_K == 4
+#define MLKEM_LVL 1024
 #define MLKEM_ETA1 2
 #define MLKEM_POLYCOMPRESSEDBYTES_DV 160
 #define MLKEM_POLYCOMPRESSEDBYTES_DU 352
@@ -46,12 +53,12 @@
 #define MLKEM_INDCPA_BYTES \
   (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
 
-#define MLKEM_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
 /* 32 bytes of additional space to save H(pk) */
-#define MLKEM_SECRETKEYBYTES                                   \
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
   (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
    2 * MLKEM_SYMBYTES)
-#define MLKEM_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
 
 #define KECCAK_WAY 4
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c
index db7d64ebf..9e39916b7 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.c
@@ -16,19 +16,20 @@
 #include "symmetric.h"
 #include "verify.h"
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 8))
   {
-    int k;
+    unsigned k;
     uint16_t t[8];
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(k >= 0 && k <= 8)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 11))))
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
     {
       t[k] = scalar_compress_d11(a->coeffs[8 * j + k]);
     }
@@ -54,12 +55,12 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(invariant(j >= 0 && j <= MLKEM_N / 4))
   {
-    int k;
+    unsigned k;
     uint16_t t[4];
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(k >= 0 && k <= 4)
-      invariant(forall(int, r, 0, k - 1, t[r] < (1u << 10))))
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
     {
       t[k] = scalar_compress_d10(a->coeffs[4 * j + k]);
     }
@@ -80,14 +81,15 @@ void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 }
 
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 {
-  int j;
+  unsigned j;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DU == 352)
   for (j = 0; j < MLKEM_N / 8; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, 8 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[8];
@@ -106,7 +108,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 8; k++)
     __loop__(
       invariant(0 <= k && k <= 8)
-      invariant(array_bound(r->coeffs, 0, 8 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * j + k] = scalar_decompress_d11(t[k]);
     }
@@ -115,7 +117,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
   for (j = 0; j < MLKEM_N / 4; j++)
   __loop__(
     invariant(0 <= j && j <= MLKEM_N / 4)
-    invariant(array_bound(r->coeffs, 0, 4 * j - 1, 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, (MLKEM_Q - 1))))
   {
     int k;
     uint16_t t[4];
@@ -129,7 +131,7 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
     for (k = 0; k < 4; k++)
     __loop__(
       invariant(0 <= k && k <= 4)
-      invariant(array_bound(r->coeffs, 0, 4 * j + k - 1, 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[4 * j + k] = scalar_decompress_d10(t[k]);
     }
@@ -139,21 +141,22 @@ void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 15)))
+      invariant(array_bound(t, 0, j, 0, 15)))
     {
       t[j] = scalar_compress_d4(a->coeffs[8 * i + j]);
     }
@@ -167,12 +170,12 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     uint8_t t[8] = {0};
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <= MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(t, 0, (j-1), 0, 31)))
+      invariant(array_bound(t, 0, j, 0, 31)))
     {
       t[j] = scalar_compress_d5(a->coeffs[8 * i + j]);
     }
@@ -193,14 +196,15 @@ void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 #endif
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_POLYCOMPRESSEDBYTES_DV == 128)
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, (MLKEM_Q - 1))))
   {
     r->coeffs[2 * i + 0] = scalar_decompress_d4((a[i] >> 0) & 0xF);
     r->coeffs[2 * i + 1] = scalar_decompress_d4((a[i] >> 4) & 0xF);
@@ -209,9 +213,9 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     uint8_t t[8];
     const int offset = i * 5;
     /*
@@ -237,7 +241,7 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(j >= 0 && j <= 8 && i >= 0 && i <= MLKEM_N / 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       r->coeffs[8 * i + j] = scalar_decompress_d5(t[j]);
     }
@@ -250,9 +254,10 @@ void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
-  unsigned int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
 
@@ -282,6 +287,7 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
   }
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 {
   POLY_UBOUND(a, MLKEM_Q);
@@ -290,13 +296,14 @@ void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 #endif /* MLKEM_USE_NATIVE_POLY_TOBYTES */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_FROMBYTES)
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 2; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 2)
-    invariant(array_bound(r->coeffs, 0, (2 * i - 1), 0, UINT12_MAX)))
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, UINT12_MAX)))
   {
     const uint8_t t0 = a[3 * i + 0];
     const uint8_t t1 = a[3 * i + 1];
@@ -309,15 +316,17 @@ void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
   POLY_UBOUND(r, 4096);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 {
   poly_frombytes_native(r, a);
 }
 #endif /* MLKEM_USE_NATIVE_POLY_FROMBYTES */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 {
-  int i;
+  unsigned i;
 #if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
 #error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
 #endif
@@ -325,13 +334,13 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N / 8)
-    invariant(array_bound(r->coeffs, 0, (8 * i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, (MLKEM_Q - 1))))
   {
-    int j;
+    unsigned j;
     for (j = 0; j < 8; j++)
     __loop__(
       invariant(i >= 0 && i <  MLKEM_N / 8 && j >= 0 && j <= 8)
-      invariant(array_bound(r->coeffs, 0, (8 * i + j - 1), 0, (MLKEM_Q - 1))))
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, (MLKEM_Q - 1))))
     {
       /* Prevent the compiler from recognizing this as a bit selection */
       uint8_t mask = value_barrier_u8(1u << j);
@@ -341,15 +350,16 @@ void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
   POLY_BOUND_MSG(r, MLKEM_Q, "poly_frommsg output");
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
 {
-  int i;
+  unsigned i;
   POLY_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_N / 8; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 8))
   {
-    int j;
+    unsigned j;
     msg[i] = 0;
     for (j = 0; j < 8; j++)
     __loop__(
@@ -361,26 +371,32 @@ void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *a)
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
 {
-  ALIGN uint8_t buf[KECCAK_WAY][MLKEM_ETA1 * MLKEM_N / 4];
-  ALIGN uint8_t extkey[KECCAK_WAY][MLKEM_SYMBYTES + 1];
-  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
-  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
-  extkey[0][MLKEM_SYMBYTES] = nonce0;
-  extkey[1][MLKEM_SYMBYTES] = nonce1;
-  extkey[2][MLKEM_SYMBYTES] = nonce2;
-  extkey[3][MLKEM_SYMBYTES] = nonce3;
-  prf_eta1_x4(buf[0], buf[1], buf[2], buf[3], extkey[0], extkey[1], extkey[2],
-              extkey[3]);
-  poly_cbd_eta1(r0, buf[0]);
-  poly_cbd_eta1(r1, buf[1]);
-  poly_cbd_eta1(r2, buf[2]);
-  poly_cbd_eta1(r3, buf[3]);
+  ALIGN uint8_t buf0[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf1[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf2[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t buf3[MLKEM_ETA1 * MLKEM_N / 4];
+  ALIGN uint8_t extkey0[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey1[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey2[MLKEM_SYMBYTES + 1];
+  ALIGN uint8_t extkey3[MLKEM_SYMBYTES + 1];
+  memcpy(extkey0, seed, MLKEM_SYMBYTES);
+  memcpy(extkey1, seed, MLKEM_SYMBYTES);
+  memcpy(extkey2, seed, MLKEM_SYMBYTES);
+  memcpy(extkey3, seed, MLKEM_SYMBYTES);
+  extkey0[MLKEM_SYMBYTES] = nonce0;
+  extkey1[MLKEM_SYMBYTES] = nonce1;
+  extkey2[MLKEM_SYMBYTES] = nonce2;
+  extkey3[MLKEM_SYMBYTES] = nonce3;
+  prf_eta1_x4(buf0, buf1, buf2, buf3, extkey0, extkey1, extkey2, extkey3);
+  poly_cbd_eta1(r0, buf0);
+  poly_cbd_eta1(r1, buf1);
+  poly_cbd_eta1(r2, buf2);
+  poly_cbd_eta1(r3, buf3);
 
   POLY_BOUND_MSG(r0, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 0");
   POLY_BOUND_MSG(r1, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 1");
@@ -388,6 +404,8 @@ void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r3, MLKEM_ETA1 + 1, "poly_getnoise_eta1_4x output 3");
 }
 
+#if MLKEM_K == 2 || MLKEM_K == 4
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 {
@@ -402,7 +420,10 @@ void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
 
   POLY_BOUND_MSG(r, MLKEM_ETA1 + 1, "poly_getnoise_eta2 output");
 }
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -420,15 +441,10 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   extkey[2][MLKEM_SYMBYTES] = nonce2;
   extkey[3][MLKEM_SYMBYTES] = nonce3;
 
-#if MLKEM_ETA1 == MLKEM_ETA2
-  prf_eta1_x4(buf1[0], buf1[1], buf2[0], buf2[1], extkey[0], extkey[1],
-              extkey[2], extkey[3]);
-#else
   prf_eta1(buf1[0], extkey[0]);
   prf_eta1(buf1[1], extkey[1]);
   prf_eta2(buf2[0], extkey[2]);
   prf_eta2(buf2[1], extkey[3]);
-#endif
 
   poly_cbd_eta1(r0, buf1[0]);
   poly_cbd_eta1(r1, buf1[1]);
@@ -440,18 +456,20 @@ void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
   POLY_BOUND_MSG(r2, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 2");
   POLY_BOUND_MSG(r3, MLKEM_ETA2 + 1, "poly_getnoise_eta1122_4x output 3");
 }
+#endif /* MLKEM_K == 2 */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   POLY_BOUND(b_cache, 4096);
 
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(
     assigns(i, object_whole(r))
     invariant(i >= 0 && i <= MLKEM_N / 4)
-    invariant(array_abs_bound(r->coeffs, 0, (4 * i - 1), 2 * MLKEM_Q - 1)))
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 2 * MLKEM_Q - 1)))
   {
     basemul_cached(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i],
                    b_cache->coeffs[2 * i]);
@@ -461,14 +479,15 @@ void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_TOMONT)
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
-  int i;
+  unsigned i;
   const int16_t f = (1ULL << 32) % MLKEM_Q; /* 1353 */
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_abs_bound(r->coeffs ,0, (i - 1), (MLKEM_Q - 1))))
+    invariant(array_abs_bound(r->coeffs ,0, i, (MLKEM_Q - 1))))
   {
     r->coeffs[i] = fqmul(r->coeffs[i], f);
   }
@@ -476,6 +495,7 @@ void poly_tomont(poly *r)
   POLY_BOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_TOMONT */
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 {
   poly_tomont_native(r);
@@ -484,13 +504,14 @@ void poly_tomont(poly *r)
 #endif /* MLKEM_USE_NATIVE_POLY_TOMONT */
 
 #if !defined(MLKEM_USE_NATIVE_POLY_REDUCE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(array_bound(r->coeffs, 0, (i - 1), 0, (MLKEM_Q - 1))))
+    invariant(array_bound(r->coeffs, 0, i, 0, (MLKEM_Q - 1))))
   {
     /* Barrett reduction, giving signed canonical representative */
     int16_t t = barrett_reduce(r->coeffs[i]);
@@ -501,6 +522,7 @@ void poly_reduce(poly *r)
   POLY_UBOUND(r, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_REDUCE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 {
   poly_reduce_native(r);
@@ -508,36 +530,39 @@ void poly_reduce(poly *r)
 }
 #endif /* MLKEM_USE_NATIVE_POLY_REDUCE */
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N; i++)
   __loop__(
     invariant(i >= 0 && i <= MLKEM_N)
-    invariant(forall(int, k0, i, MLKEM_N - 1, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
-    invariant(forall(int, k1, 0, i - 1, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
   {
     r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
   }
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_N / 4; i++)
   __loop__(invariant(i >= 0 && i <= MLKEM_N / 4))
   {
@@ -547,6 +572,7 @@ void poly_mulcache_compute(poly_mulcache *x, const poly *a)
   POLY_BOUND(x, MLKEM_Q);
 }
 #else  /* MLKEM_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 {
   poly_mulcache_compute_native(x, a);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h
index 19cf7b96b..32713990d 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/poly.h
@@ -22,6 +22,7 @@
  * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
  * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
  */
+#define poly MLKEM_NAMESPACE(poly)
 typedef struct
 {
   int16_t coeffs[MLKEM_N];
@@ -31,11 +32,28 @@ typedef struct
  * INTERNAL presentation of precomputed data speeding up
  * the base multiplication of two polynomials in NTT domain.
  */
+#define poly_mulcache MLKEM_NAMESPACE(poly_mulcache)
 typedef struct
 {
   int16_t coeffs[MLKEM_N >> 1];
 } poly_mulcache;
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define scalar_compress_d1 MLKEM_NAMESPACE(scalar_compress_d1)
+#define scalar_compress_d4 MLKEM_NAMESPACE(scalar_compress_d4)
+#define scalar_compress_d5 MLKEM_NAMESPACE(scalar_compress_d5)
+#define scalar_compress_d10 MLKEM_NAMESPACE(scalar_compress_d10)
+#define scalar_compress_d11 MLKEM_NAMESPACE(scalar_compress_d11)
+#define scalar_decompress_d4 MLKEM_NAMESPACE(scalar_decompress_d4)
+#define scalar_decompress_d5 MLKEM_NAMESPACE(scalar_decompress_d5)
+#define scalar_decompress_d10 MLKEM_NAMESPACE(scalar_decompress_d10)
+#define scalar_decompress_d11 MLKEM_NAMESPACE(scalar_decompress_d11)
+#define scalar_signed_to_unsigned_q MLKEM_NAMESPACE(scalar_signed_to_unsigned_q)
+/* End of static namespacing */
+
 /************************************************************
  * Name: scalar_compress_d1
  *
@@ -316,11 +334,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_du(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
 );
 
@@ -339,12 +358,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_du(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_compress_dv MLKEM_NAMESPACE(poly_compress_dv)
@@ -360,11 +380,12 @@ __contract__(
  *                  Coefficients must be unsigned canonical,
  *                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_compress_dv(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -384,12 +405,13 @@ __contract__(
  * (non-negative and smaller than MLKEM_Q).
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_decompress_dv(poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tobytes MLKEM_NAMESPACE(poly_tobytes)
@@ -407,11 +429,12 @@ __contract__(
  *              - r: pointer to output byte array
  *                   (of MLKEM_POLYBYTES bytes)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const poly *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYBYTES))
   requires(memory_no_alias(a, sizeof(poly)))
-  requires(array_bound(a->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(r))
 );
 
@@ -430,12 +453,13 @@ __contract__(
  *                   each coefficient unsigned and in the range
  *                   0 .. 4095
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frombytes(poly *r, const uint8_t a[MLKEM_POLYBYTES])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, UINT12_MAX))
 );
 
 
@@ -448,12 +472,13 @@ __contract__(
  * Arguments:   - poly *r: pointer to output polynomial
  *              - const uint8_t *msg: pointer to input message
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_frommsg(poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(object_whole(r))
-  ensures(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_tomsg MLKEM_NAMESPACE(poly_tomsg)
@@ -466,11 +491,12 @@ __contract__(
  *              - const poly *r: pointer to input polynomial
  *                Coefficients must be unsigned canonical
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const poly *r)
 __contract__(
   requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
   requires(memory_no_alias(r, sizeof(poly)))
-  requires(array_bound(r->coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
   assigns(object_whole(msg))
 );
 
@@ -487,6 +513,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                            const uint8_t seed[MLKEM_SYMBYTES], uint8_t nonce0,
                            uint8_t nonce1, uint8_t nonce2, uint8_t nonce3)
@@ -507,10 +534,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 4
 __contract__(
@@ -522,10 +549,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #elif MLKEM_K == 3
 __contract__(
@@ -538,10 +565,10 @@ __contract__(
   assigns(memory_slice(r2, sizeof(poly)))
   assigns(memory_slice(r3, sizeof(poly)))
   ensures(
-    array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-    && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA1));
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1));
 );
 #endif /* MLKEM_K */
 
@@ -554,6 +581,7 @@ __contract__(
 #define poly_getnoise_eta2_4x poly_getnoise_eta1_4x
 #endif /* MLKEM_ETA1 == MLKEM_ETA2 */
 
+#if MLKEM_K == 2 || MLKEM_K == 4
 #define poly_getnoise_eta2 MLKEM_NAMESPACE(poly_getnoise_eta2)
 /*************************************************
  * Name:        poly_getnoise_eta2
@@ -567,15 +595,18 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta2(poly *r, const uint8_t seed[MLKEM_SYMBYTES],
                         uint8_t nonce)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_ETA2))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2))
 );
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
 
+#if MLKEM_K == 2
 #define poly_getnoise_eta1122_4x MLKEM_NAMESPACE(poly_getnoise_eta1122_4x)
 /*************************************************
  * Name:        poly_getnoise_eta1122_4x
@@ -589,6 +620,7 @@ __contract__(
  *                                     (of length MLKEM_SYMBYTES bytes)
  *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_getnoise_eta1122_4x(poly *r0, poly *r1, poly *r2, poly *r3,
                               const uint8_t seed[MLKEM_SYMBYTES],
                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
@@ -599,11 +631,12 @@ __contract__(
    r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
   requires(memory_no_alias(seed, MLKEM_SYMBYTES))
   assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
-  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r1->coeffs,0, MLKEM_N - 1, MLKEM_ETA1)
-     && array_abs_bound(r2->coeffs,0, MLKEM_N - 1, MLKEM_ETA2)
-     && array_abs_bound(r3->coeffs,0, MLKEM_N - 1, MLKEM_ETA2));
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2));
 );
+#endif /* MLKEM_K == 2 */
 
 #define poly_basemul_montgomery_cached \
   MLKEM_NAMESPACE(poly_basemul_montgomery_cached)
@@ -626,6 +659,7 @@ __contract__(
  *                  for second input polynomial. Can be computed
  *                  via poly_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_basemul_montgomery_cached(poly *r, const poly *a, const poly *b,
                                     const poly_mulcache *b_cache)
 __contract__(
@@ -633,9 +667,9 @@ __contract__(
   requires(memory_no_alias(a, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
   requires(memory_no_alias(b_cache, sizeof(poly_mulcache)))
-  requires(array_abs_bound(a->coeffs, 0, MLKEM_N - 1, UINT12_MAX))
+  requires(array_abs_bound(a->coeffs, 0, MLKEM_N, UINT12_MAX))
   assigns(object_whole(r))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, 2 * MLKEM_Q - 1))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, 2 * MLKEM_Q - 1))
 );
 
 #define poly_tomont MLKEM_NAMESPACE(poly_tomont)
@@ -649,11 +683,12 @@ __contract__(
  *
  * Arguments:   - poly *r: pointer to input/output polynomial
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void poly_tomont(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, (MLKEM_Q - 1)))
 );
 
 #define poly_mulcache_compute MLKEM_NAMESPACE(poly_mulcache_compute)
@@ -679,6 +714,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_mulcache_compute(poly_mulcache *x, const poly *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(poly_mulcache)))
@@ -704,11 +740,12 @@ __contract__(
  * outputs are better suited to the only remaining
  * use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_reduce(poly *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   assigns(memory_slice(r, sizeof(poly)))
-  ensures(array_bound(r->coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1)))
 );
 
 #define poly_add MLKEM_NAMESPACE(poly_add)
@@ -729,13 +766,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_add.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_add(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -753,13 +791,14 @@ __contract__(
  * NOTE: The reference implementation uses a 3-argument poly_sub.
  * We specialize to the accumulator form to avoid reasoning about aliasing.
  */
+MLKEM_NATIVE_INTERNAL_API
 void poly_sub(poly *r, const poly *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(b, sizeof(poly)))
-  requires(forall(int, k0, 0, MLKEM_N - 1, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
-  requires(forall(int, k1, 0, MLKEM_N - 1, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
-  ensures(forall(int, k, 0, MLKEM_N - 1, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
   assigns(object_whole(r))
 );
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c
index 72277a626..9e000e5c5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.c
@@ -5,15 +5,16 @@
 #include "polyvec.h"
 #include <stdint.h>
 #include "arith_backend.h"
-#include "config.h"
 #include "ntt.h"
 #include "poly.h"
 
 #include "debug/debug.h"
+
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   POLYVEC_UBOUND(a, MLKEM_Q);
 
   for (i = 0; i < MLKEM_K; i++)
@@ -22,10 +23,11 @@ void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
@@ -34,36 +36,40 @@ void polyvec_decompress_du(polyvec *r,
   POLYVEC_UBOUND(r, MLKEM_Q);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_ntt(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_invntt_tomont(&r->vec[i]);
@@ -71,11 +77,12 @@ void polyvec_invntt_tomont(polyvec *r)
 }
 
 #if !defined(MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
 {
-  int i;
+  unsigned i;
   poly t;
 
   POLYVEC_BOUND(a, 4096);
@@ -96,13 +103,13 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
    * in the higher level bounds reasoning. It is thus best to omit
    * them from the spec to not unnecessarily constraint native implementations.
    */
-  cassert(
-      array_abs_bound(r->coeffs, 0, MLKEM_N - 1, MLKEM_K * (2 * MLKEM_Q - 1)),
-      "polyvec_basemul_acc_montgomery_cached output bounds");
+  cassert(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_K * (2 * MLKEM_Q - 1)),
+          "polyvec_basemul_acc_montgomery_cached output bounds");
   /* TODO: Integrate CBMC assertion into POLY_BOUND if CBMC is set */
   POLY_BOUND(r, MLKEM_K * 2 * MLKEM_Q);
 }
 #else  /* !MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -116,6 +123,7 @@ void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
 }
 #endif /* MLKEM_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 {
   polyvec_mulcache b_cache;
@@ -123,36 +131,40 @@ void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
   polyvec_basemul_acc_montgomery_cached(r, a, b, &b_cache);
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_mulcache_compute(&x->vec[i], &a->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_reduce(&r->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 {
-  int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_add(&r->vec[i], &b->vec[i]);
   }
 }
 
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 {
-  unsigned int i;
+  unsigned i;
   for (i = 0; i < MLKEM_K; i++)
   {
     poly_tomont(&r->vec[i]);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h
index cd90734fa..de2882c84 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/polyvec.h
@@ -9,11 +9,13 @@
 #include "common.h"
 #include "poly.h"
 
+#define polyvec MLKEM_NAMESPACE(polyvec)
 typedef struct
 {
   poly vec[MLKEM_K];
 } ALIGN polyvec;
 
+#define polyvec_mulcache MLKEM_NAMESPACE(polyvec_mulcache)
 typedef struct
 {
   poly_mulcache vec[MLKEM_K];
@@ -31,13 +33,14 @@ typedef struct
  *                                  Coefficients must be unsigned canonical,
  *                                  i.e. in [0,1,..,MLKEM_Q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
                          const polyvec *a)
 __contract__(
   requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(a, sizeof(polyvec)))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -53,14 +56,15 @@ __contract__(
  *              - const uint8_t *a: pointer to input byte array
  *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_decompress_du(polyvec *r,
                            const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
 __contract__(
   requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_tobytes MLKEM_NAMESPACE(polyvec_tobytes)
@@ -74,12 +78,13 @@ __contract__(
  *              - const polyvec *a: pointer to input vector of polynomials
  *                  Each polynomial must have coefficients in [0,..,q-1].
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a)
 __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
-  requires(forall(int, k0, 0, MLKEM_K - 1,
-         array_bound(a->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, (MLKEM_Q - 1))))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
   assigns(object_whole(r))
 );
 
@@ -95,13 +100,14 @@ __contract__(
  *                 normalized in [0..4095].
  *              - uint8_t *r: pointer to input byte array
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES])
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-        array_bound(r->vec[k0].coeffs, 0, (MLKEM_N - 1), 0, UINT12_MAX)))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, UINT12_MAX)))
 );
 
 #define polyvec_ntt MLKEM_NAMESPACE(polyvec_ntt)
@@ -119,14 +125,15 @@ __contract__(
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_ntt(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
-  requires(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (NTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (NTT_BOUND - 1))))
 );
 
 #define polyvec_invntt_tomont MLKEM_NAMESPACE(polyvec_invntt_tomont)
@@ -145,12 +152,13 @@ __contract__(
  *
  * Arguments:   - polyvec *r: pointer to in/output vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_invntt_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (INVNTT_BOUND - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (INVNTT_BOUND - 1))))
 );
 
 #define polyvec_basemul_acc_montgomery \
@@ -165,13 +173,14 @@ __contract__(
  *            - const polyvec *a: pointer to first input vector of polynomials
  *            - const polyvec *b: pointer to second input vector of polynomials
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(poly)))
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -195,6 +204,7 @@ __contract__(
  *                  for second input polynomial vector. Can be computed
  *                  via polyvec_mulcache_compute().
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_basemul_acc_montgomery_cached(poly *r, const polyvec *a,
                                            const polyvec *b,
                                            const polyvec_mulcache *b_cache)
@@ -203,8 +213,8 @@ __contract__(
   requires(memory_no_alias(a, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
   requires(memory_no_alias(b_cache, sizeof(polyvec_mulcache)))
-  requires(forall(int, k1, 0, MLKEM_K - 1,
-    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N - 1, UINT12_MAX)))
+  requires(forall(k1, 0, MLKEM_K,
+    array_abs_bound(a->vec[k1].coeffs, 0, MLKEM_N, UINT12_MAX)))
   assigns(memory_slice(r, sizeof(poly)))
 );
 
@@ -234,6 +244,7 @@ __contract__(
  * the mulcache with values in (-q,q), but this is not needed for the
  * higher level safety proofs, and thus not part of the spec.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_mulcache_compute(polyvec_mulcache *x, const polyvec *a)
 __contract__(
   requires(memory_no_alias(x, sizeof(polyvec_mulcache)))
@@ -258,12 +269,13 @@ __contract__(
  *       outputs are better suited to the only remaining
  *       use of poly_reduce() in the context of (de)serialization.
  */
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_reduce(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, k0, 0, MLKEM_K - 1,
-    array_bound(r->vec[k0].coeffs, 0, MLKEM_N - 1, 0, (MLKEM_Q - 1))))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, (MLKEM_Q - 1))))
 );
 
 #define polyvec_add MLKEM_NAMESPACE(polyvec_add)
@@ -283,15 +295,16 @@ __contract__(
  * to prove type-safety of calling units. Therefore, no stronger
  * ensures clause is required on this function.
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_add(polyvec *r, const polyvec *b)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   requires(memory_no_alias(b, sizeof(polyvec)))
-  requires(forall(int, j0, 0, MLKEM_K - 1,
-          forall(int, k0, 0, MLKEM_N - 1,
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
             (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX)))
-  requires(forall(int, j1, 0, MLKEM_K - 1,
-          forall(int, k1, 0, MLKEM_N - 1,
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
             (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN)))
   assigns(object_whole(r))
 );
@@ -306,13 +319,14 @@ __contract__(
  *              Bounds: Output < q in absolute value.
  *
  **************************************************/
+MLKEM_NATIVE_INTERNAL_API
 void polyvec_tomont(polyvec *r)
 __contract__(
   requires(memory_no_alias(r, sizeof(polyvec)))
   assigns(memory_slice(r, sizeof(polyvec)))
   assigns(object_whole(r))
-  ensures(forall(int, j, 0, MLKEM_K - 1,
-  array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N - 1, (MLKEM_Q - 1))))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, (MLKEM_Q - 1))))
 );
 
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h
index 515f706fa..ddbea6be5 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/reduce.h
@@ -10,6 +10,17 @@
 #include "common.h"
 #include "debug/debug.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define cast_uint16_to_int16 MLKEM_NAMESPACE(cast_uint16_to_int16)
+#define montgomery_reduce_generic MLKEM_NAMESPACE(montgomery_reduce_generic)
+#define montgomery_reduce MLKEM_NAMESPACE(montgomery_reduce)
+#define fqmul MLKEM_NAMESPACE(fqmul)
+#define barrett_reduce MLKEM_NAMESPACE(barrett_reduce)
+/* End of static namespacing */
+
 #define HALF_Q ((MLKEM_Q + 1) / 2) /* 1665 */
 
 /*************************************************
@@ -96,8 +107,7 @@ static INLINE int16_t montgomery_reduce_generic(int32_t a)
  * Returns:     integer congruent to a * R^-1 modulo q,
  *              smaller than 2 * q in absolute value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t montgomery_reduce(int32_t a)
+static INLINE int16_t montgomery_reduce(int32_t a)
 __contract__(
   requires(a > -(2 * 4096 * 32768))
   requires(a <  (2 * 4096 * 32768))
@@ -132,8 +142,7 @@ __contract__(
  * smaller than q in absolute value.
  *
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t fqmul(int16_t a, int16_t b)
+static INLINE int16_t fqmul(int16_t a, int16_t b)
 __contract__(
   requires(b > -HALF_Q)
   requires(b < HALF_Q)
@@ -166,8 +175,7 @@ __contract__(
  *
  * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
  **************************************************/
-STATIC_INLINE_TESTABLE
-int16_t barrett_reduce(int16_t a)
+static INLINE int16_t barrett_reduce(int16_t a)
 __contract__(
   ensures(return_value > -HALF_Q && return_value < HALF_Q)
 )
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c
index 1e2d6b7ed..c9900a335 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.c
@@ -6,6 +6,13 @@
 #include "rej_uniform.h"
 #include "arith_backend.h"
 
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define rej_uniform_scalar MLKEM_NAMESPACE(rej_uniform_scalar)
+/* End of static namespacing */
+
 /*************************************************
  * Name:        rej_uniform_scalar
  *
@@ -35,18 +42,17 @@
  * is guaranteed to have been consumed. If it is equal to len, no information
  * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-STATIC_TESTABLE
-unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
-                                unsigned int offset, const uint8_t *buf,
-                                unsigned int buflen)
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
+                                       unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 )
 {
   unsigned int ctr, pos;
@@ -58,7 +64,7 @@ __contract__(
   while (ctr < target && pos + 3 <= buflen)
   __loop__(
     invariant(offset <= ctr && ctr <= target && pos <= buflen)
-    invariant(ctr > 0 ==> array_bound(r, 0, ctr - 1, 0, (MLKEM_Q - 1))))
+    invariant(ctr > 0 ==> array_bound(r, 0, ctr, 0, (MLKEM_Q - 1))))
   {
     val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
     val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
@@ -84,6 +90,7 @@ unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
 }
 #else  /* MLKEM_USE_NATIVE_REJ_UNIFORM */
 
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 {
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h
index e422f73cf..5ebe434f6 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/rej_uniform.h
@@ -47,15 +47,16 @@
  * buffer. This avoids shifting the buffer base in the caller, which appears
  * tricky to reason about.
  */
+MLKEM_NATIVE_INTERNAL_API
 unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
                          const uint8_t *buf, unsigned int buflen)
 __contract__(
   requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
   requires(memory_no_alias(r, sizeof(int16_t) * target))
   requires(memory_no_alias(buf, buflen))
-  requires(offset > 0 ==> array_bound(r, 0, offset - 1, 0, (MLKEM_Q - 1)))
+  requires(offset > 0 ==> array_bound(r, 0, offset, 0, (MLKEM_Q - 1)))
   assigns(memory_slice(r, sizeof(int16_t) * target))
   ensures(offset <= return_value && return_value <= target)
-  ensures(return_value > 0 ==> array_bound(r, 0, return_value - 1, 0, (MLKEM_Q - 1)))
+  ensures(return_value > 0 ==> array_bound(r, 0, return_value, 0, (MLKEM_Q - 1)))
 );
 #endif
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/sys.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/sys.h
index be3070dc2..01abb6032 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/sys.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/sys.h
@@ -61,6 +61,7 @@
  */
 
 /* Do not use inline for C90 builds*/
+#if !defined(INLINE)
 #if !defined(inline)
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define INLINE inline
@@ -77,6 +78,7 @@
 #define INLINE inline
 #define ALWAYS_INLINE __attribute__((always_inline))
 #endif
+#endif
 
 /*
  * C90 does not have the restrict compiler directive yet.
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h
index 9760db927..8c47155dc 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/verify.h
@@ -9,7 +9,23 @@
 #include <stddef.h>
 #include <stdint.h>
 #include "cbmc.h"
-#include "params.h"
+#include "common.h"
+
+/* Static namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define value_barrier_u8 MLKEM_NAMESPACE(value_barrier_u8)
+#define value_barrier_u32 MLKEM_NAMESPACE(value_barrier_u32)
+#define value_barrier_i32 MLKEM_NAMESPACE(value_barrier_i32)
+#define ct_cmask_neg_i16 MLKEM_NAMESPACE(ct_cmask_neg_i16)
+#define ct_cmask_nonzero_u8 MLKEM_NAMESPACE(ct_cmask_nonzero_u8)
+#define ct_cmask_nonzero_u16 MLKEM_NAMESPACE(ct_cmask_nonzero_u16)
+#define ct_sel_uint8 MLKEM_NAMESPACE(ct_sel_uint8)
+#define ct_sel_int16 MLKEM_NAMESPACE(ct_sel_int16)
+#define ct_memcmp MLKEM_NAMESPACE(ct_memcmp)
+#define ct_cmov_zero MLKEM_NAMESPACE(ct_cmov_zero)
+/* End of static namespacing */
 
 /* Constant-time comparisons and conditional operations
 
@@ -58,41 +74,41 @@
 extern volatile uint64_t ct_opt_blocker_u64;
 
 /* Helper functions for obtaining masks of various sizes */
-STATIC_INLINE_TESTABLE uint8_t get_optblocker_u8(void)
+static INLINE uint8_t get_optblocker_u8(void)
 __contract__(ensures(return_value == 0)) { return (uint8_t)ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_u32(void)
+static INLINE uint32_t get_optblocker_u32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t get_optblocker_i32(void)
+static INLINE uint32_t get_optblocker_i32(void)
 __contract__(ensures(return_value == 0)) { return ct_opt_blocker_u64; }
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u32()); }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_i32()); }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b)) { return (b ^ get_optblocker_u8()); }
 
 #else /* !MLKEM_USE_ASM_VALUE_BARRIER */
 
-STATIC_INLINE_TESTABLE uint32_t value_barrier_u32(uint32_t b)
+static INLINE uint32_t value_barrier_u32(uint32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE int32_t value_barrier_i32(int32_t b)
+static INLINE int32_t value_barrier_i32(int32_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
   return b;
 }
 
-STATIC_INLINE_TESTABLE uint8_t value_barrier_u8(uint8_t b)
+static INLINE uint8_t value_barrier_u8(uint8_t b)
 __contract__(ensures(return_value == b))
 {
   asm("" : "+r"(b));
@@ -118,7 +134,7 @@ __contract__(ensures(return_value == b))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_nonzero_u16(uint16_t x)
+static INLINE uint16_t ct_cmask_nonzero_u16(uint16_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -133,7 +149,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
  *
  * Arguments:   uint8_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_cmask_nonzero_u8(uint8_t x)
+static INLINE uint8_t ct_cmask_nonzero_u8(uint8_t x)
 __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
 {
   uint32_t tmp = value_barrier_u32(-((uint32_t)x));
@@ -163,7 +179,7 @@ __contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
  *
  * Arguments:   uint16_t x: Value to be converted into a mask
  **************************************************/
-STATIC_INLINE_TESTABLE uint16_t ct_cmask_neg_i16(int16_t x)
+static INLINE uint16_t ct_cmask_neg_i16(int16_t x)
 __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
 {
   int32_t tmp = value_barrier_i32((int32_t)x);
@@ -198,7 +214,7 @@ __contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
  *              int16_t b:       Second alternative
  *              uint16_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+static INLINE int16_t ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   uint16_t au = a, bu = b;
@@ -222,7 +238,7 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *              uint8_t b:       Second alternative
  *              uuint8_t cond:   Condition variable.
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+static INLINE uint8_t ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
 __contract__(ensures(return_value == (cond ? a : b)))
 {
   return b ^ (ct_cmask_nonzero_u8(cond) & (a ^ b));
@@ -239,28 +255,21 @@ __contract__(ensures(return_value == (cond ? a : b)))
  *
  * Returns 0 if the byte arrays are equal, a non-zero value otherwise
  **************************************************/
-STATIC_INLINE_TESTABLE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
-                                         const size_t len)
+static INLINE uint8_t ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                const size_t len)
 __contract__(
   requires(memory_no_alias(a, len))
   requires(memory_no_alias(b, len))
   requires(len <= INT_MAX)
-  ensures((return_value == 0) == forall(int, i, 0, ((int)len - 1), (a[i] == b[i]))))
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
 {
   uint8_t r = 0, s = 0;
+  unsigned i;
 
-  /*
-   * Switch to a _signed_ ilen value, so that our loop counter
-   * can also be signed, and thus (i - 1) in the loop invariant
-   * can yield -1 as required.
-   */
-  const int ilen = (int)len;
-  int i;
-
-  for (i = 0; i < ilen; i++)
+  for (i = 0; i < len; i++)
   __loop__(
-    invariant(i >= 0 && i <= ilen)
-    invariant((r == 0) == (forall(int, k, 0, (i - 1), (a[k] == b[k])))))
+    invariant(i >= 0 && i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
   {
     r |= a[i] ^ b[i];
     /* s is useless, but prevents the loop from being aborted once r=0xff. */
@@ -290,8 +299,8 @@ __contract__(
  *              size_t len:       Amount of bytes to be copied
  *              uint8_t b:        Condition value.
  **************************************************/
-STATIC_INLINE_TESTABLE
-void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+static INLINE void ct_cmov_zero(uint8_t *r, const uint8_t *x, size_t len,
+                                uint8_t b)
 __contract__(
   requires(memory_no_alias(r, len))
   requires(memory_no_alias(x, len))
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h
index 4b78c004a..4fbf92beb 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/arith_native_x86_64.h
@@ -20,6 +20,9 @@
 #define rej_uniform_avx2 MLKEM_NAMESPACE(rej_uniform_avx2)
 unsigned int rej_uniform_avx2(int16_t *r, const uint8_t *buf);
 
+#define rej_uniform_table MLKEM_NAMESPACE(rej_uniform_table)
+extern const uint8_t rej_uniform_table[256][8];
+
 #define ntt_avx2 MLKEM_NAMESPACE(ntt_avx2)
 void ntt_avx2(__m256i *r, const __m256i *qdata);
 
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c
index 3f1653ed3..098f90ef3 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/basemul.c
@@ -25,7 +25,7 @@ static void poly_basemul_montgomery_avx2(poly *r, const poly *a, const poly *b)
  */
 static void poly_add_avx2(poly *r, const poly *a, const poly *b)
 {
-  unsigned int i;
+  unsigned i;
   __m256i f0, f1;
 
   for (i = 0; i < MLKEM_N; i += 16)
@@ -41,7 +41,7 @@ void polyvec_basemul_acc_montgomery_cached_avx2(poly *r, const polyvec *a,
                                                 const polyvec *b,
                                                 const polyvec_mulcache *b_cache)
 {
-  unsigned int i;
+  unsigned i;
   poly t;
 
   /* TODO: Use mulcache for AVX2. So far, it is unused. */
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c
index c3c8b8104..c65b3d3d8 100644
--- a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_avx2.c
@@ -17,139 +17,6 @@
 #include <string.h>
 #include "arith_native_x86_64.h"
 #include "consts.h"
-/* #define BMI */
-
-#ifndef BMI
-static const uint8_t idx[256][8] = {
-    {-1, -1, -1, -1, -1, -1, -1, -1}, {0, -1, -1, -1, -1, -1, -1, -1},
-    {2, -1, -1, -1, -1, -1, -1, -1},  {0, 2, -1, -1, -1, -1, -1, -1},
-    {4, -1, -1, -1, -1, -1, -1, -1},  {0, 4, -1, -1, -1, -1, -1, -1},
-    {2, 4, -1, -1, -1, -1, -1, -1},   {0, 2, 4, -1, -1, -1, -1, -1},
-    {6, -1, -1, -1, -1, -1, -1, -1},  {0, 6, -1, -1, -1, -1, -1, -1},
-    {2, 6, -1, -1, -1, -1, -1, -1},   {0, 2, 6, -1, -1, -1, -1, -1},
-    {4, 6, -1, -1, -1, -1, -1, -1},   {0, 4, 6, -1, -1, -1, -1, -1},
-    {2, 4, 6, -1, -1, -1, -1, -1},    {0, 2, 4, 6, -1, -1, -1, -1},
-    {8, -1, -1, -1, -1, -1, -1, -1},  {0, 8, -1, -1, -1, -1, -1, -1},
-    {2, 8, -1, -1, -1, -1, -1, -1},   {0, 2, 8, -1, -1, -1, -1, -1},
-    {4, 8, -1, -1, -1, -1, -1, -1},   {0, 4, 8, -1, -1, -1, -1, -1},
-    {2, 4, 8, -1, -1, -1, -1, -1},    {0, 2, 4, 8, -1, -1, -1, -1},
-    {6, 8, -1, -1, -1, -1, -1, -1},   {0, 6, 8, -1, -1, -1, -1, -1},
-    {2, 6, 8, -1, -1, -1, -1, -1},    {0, 2, 6, 8, -1, -1, -1, -1},
-    {4, 6, 8, -1, -1, -1, -1, -1},    {0, 4, 6, 8, -1, -1, -1, -1},
-    {2, 4, 6, 8, -1, -1, -1, -1},     {0, 2, 4, 6, 8, -1, -1, -1},
-    {10, -1, -1, -1, -1, -1, -1, -1}, {0, 10, -1, -1, -1, -1, -1, -1},
-    {2, 10, -1, -1, -1, -1, -1, -1},  {0, 2, 10, -1, -1, -1, -1, -1},
-    {4, 10, -1, -1, -1, -1, -1, -1},  {0, 4, 10, -1, -1, -1, -1, -1},
-    {2, 4, 10, -1, -1, -1, -1, -1},   {0, 2, 4, 10, -1, -1, -1, -1},
-    {6, 10, -1, -1, -1, -1, -1, -1},  {0, 6, 10, -1, -1, -1, -1, -1},
-    {2, 6, 10, -1, -1, -1, -1, -1},   {0, 2, 6, 10, -1, -1, -1, -1},
-    {4, 6, 10, -1, -1, -1, -1, -1},   {0, 4, 6, 10, -1, -1, -1, -1},
-    {2, 4, 6, 10, -1, -1, -1, -1},    {0, 2, 4, 6, 10, -1, -1, -1},
-    {8, 10, -1, -1, -1, -1, -1, -1},  {0, 8, 10, -1, -1, -1, -1, -1},
-    {2, 8, 10, -1, -1, -1, -1, -1},   {0, 2, 8, 10, -1, -1, -1, -1},
-    {4, 8, 10, -1, -1, -1, -1, -1},   {0, 4, 8, 10, -1, -1, -1, -1},
-    {2, 4, 8, 10, -1, -1, -1, -1},    {0, 2, 4, 8, 10, -1, -1, -1},
-    {6, 8, 10, -1, -1, -1, -1, -1},   {0, 6, 8, 10, -1, -1, -1, -1},
-    {2, 6, 8, 10, -1, -1, -1, -1},    {0, 2, 6, 8, 10, -1, -1, -1},
-    {4, 6, 8, 10, -1, -1, -1, -1},    {0, 4, 6, 8, 10, -1, -1, -1},
-    {2, 4, 6, 8, 10, -1, -1, -1},     {0, 2, 4, 6, 8, 10, -1, -1},
-    {12, -1, -1, -1, -1, -1, -1, -1}, {0, 12, -1, -1, -1, -1, -1, -1},
-    {2, 12, -1, -1, -1, -1, -1, -1},  {0, 2, 12, -1, -1, -1, -1, -1},
-    {4, 12, -1, -1, -1, -1, -1, -1},  {0, 4, 12, -1, -1, -1, -1, -1},
-    {2, 4, 12, -1, -1, -1, -1, -1},   {0, 2, 4, 12, -1, -1, -1, -1},
-    {6, 12, -1, -1, -1, -1, -1, -1},  {0, 6, 12, -1, -1, -1, -1, -1},
-    {2, 6, 12, -1, -1, -1, -1, -1},   {0, 2, 6, 12, -1, -1, -1, -1},
-    {4, 6, 12, -1, -1, -1, -1, -1},   {0, 4, 6, 12, -1, -1, -1, -1},
-    {2, 4, 6, 12, -1, -1, -1, -1},    {0, 2, 4, 6, 12, -1, -1, -1},
-    {8, 12, -1, -1, -1, -1, -1, -1},  {0, 8, 12, -1, -1, -1, -1, -1},
-    {2, 8, 12, -1, -1, -1, -1, -1},   {0, 2, 8, 12, -1, -1, -1, -1},
-    {4, 8, 12, -1, -1, -1, -1, -1},   {0, 4, 8, 12, -1, -1, -1, -1},
-    {2, 4, 8, 12, -1, -1, -1, -1},    {0, 2, 4, 8, 12, -1, -1, -1},
-    {6, 8, 12, -1, -1, -1, -1, -1},   {0, 6, 8, 12, -1, -1, -1, -1},
-    {2, 6, 8, 12, -1, -1, -1, -1},    {0, 2, 6, 8, 12, -1, -1, -1},
-    {4, 6, 8, 12, -1, -1, -1, -1},    {0, 4, 6, 8, 12, -1, -1, -1},
-    {2, 4, 6, 8, 12, -1, -1, -1},     {0, 2, 4, 6, 8, 12, -1, -1},
-    {10, 12, -1, -1, -1, -1, -1, -1}, {0, 10, 12, -1, -1, -1, -1, -1},
-    {2, 10, 12, -1, -1, -1, -1, -1},  {0, 2, 10, 12, -1, -1, -1, -1},
-    {4, 10, 12, -1, -1, -1, -1, -1},  {0, 4, 10, 12, -1, -1, -1, -1},
-    {2, 4, 10, 12, -1, -1, -1, -1},   {0, 2, 4, 10, 12, -1, -1, -1},
-    {6, 10, 12, -1, -1, -1, -1, -1},  {0, 6, 10, 12, -1, -1, -1, -1},
-    {2, 6, 10, 12, -1, -1, -1, -1},   {0, 2, 6, 10, 12, -1, -1, -1},
-    {4, 6, 10, 12, -1, -1, -1, -1},   {0, 4, 6, 10, 12, -1, -1, -1},
-    {2, 4, 6, 10, 12, -1, -1, -1},    {0, 2, 4, 6, 10, 12, -1, -1},
-    {8, 10, 12, -1, -1, -1, -1, -1},  {0, 8, 10, 12, -1, -1, -1, -1},
-    {2, 8, 10, 12, -1, -1, -1, -1},   {0, 2, 8, 10, 12, -1, -1, -1},
-    {4, 8, 10, 12, -1, -1, -1, -1},   {0, 4, 8, 10, 12, -1, -1, -1},
-    {2, 4, 8, 10, 12, -1, -1, -1},    {0, 2, 4, 8, 10, 12, -1, -1},
-    {6, 8, 10, 12, -1, -1, -1, -1},   {0, 6, 8, 10, 12, -1, -1, -1},
-    {2, 6, 8, 10, 12, -1, -1, -1},    {0, 2, 6, 8, 10, 12, -1, -1},
-    {4, 6, 8, 10, 12, -1, -1, -1},    {0, 4, 6, 8, 10, 12, -1, -1},
-    {2, 4, 6, 8, 10, 12, -1, -1},     {0, 2, 4, 6, 8, 10, 12, -1},
-    {14, -1, -1, -1, -1, -1, -1, -1}, {0, 14, -1, -1, -1, -1, -1, -1},
-    {2, 14, -1, -1, -1, -1, -1, -1},  {0, 2, 14, -1, -1, -1, -1, -1},
-    {4, 14, -1, -1, -1, -1, -1, -1},  {0, 4, 14, -1, -1, -1, -1, -1},
-    {2, 4, 14, -1, -1, -1, -1, -1},   {0, 2, 4, 14, -1, -1, -1, -1},
-    {6, 14, -1, -1, -1, -1, -1, -1},  {0, 6, 14, -1, -1, -1, -1, -1},
-    {2, 6, 14, -1, -1, -1, -1, -1},   {0, 2, 6, 14, -1, -1, -1, -1},
-    {4, 6, 14, -1, -1, -1, -1, -1},   {0, 4, 6, 14, -1, -1, -1, -1},
-    {2, 4, 6, 14, -1, -1, -1, -1},    {0, 2, 4, 6, 14, -1, -1, -1},
-    {8, 14, -1, -1, -1, -1, -1, -1},  {0, 8, 14, -1, -1, -1, -1, -1},
-    {2, 8, 14, -1, -1, -1, -1, -1},   {0, 2, 8, 14, -1, -1, -1, -1},
-    {4, 8, 14, -1, -1, -1, -1, -1},   {0, 4, 8, 14, -1, -1, -1, -1},
-    {2, 4, 8, 14, -1, -1, -1, -1},    {0, 2, 4, 8, 14, -1, -1, -1},
-    {6, 8, 14, -1, -1, -1, -1, -1},   {0, 6, 8, 14, -1, -1, -1, -1},
-    {2, 6, 8, 14, -1, -1, -1, -1},    {0, 2, 6, 8, 14, -1, -1, -1},
-    {4, 6, 8, 14, -1, -1, -1, -1},    {0, 4, 6, 8, 14, -1, -1, -1},
-    {2, 4, 6, 8, 14, -1, -1, -1},     {0, 2, 4, 6, 8, 14, -1, -1},
-    {10, 14, -1, -1, -1, -1, -1, -1}, {0, 10, 14, -1, -1, -1, -1, -1},
-    {2, 10, 14, -1, -1, -1, -1, -1},  {0, 2, 10, 14, -1, -1, -1, -1},
-    {4, 10, 14, -1, -1, -1, -1, -1},  {0, 4, 10, 14, -1, -1, -1, -1},
-    {2, 4, 10, 14, -1, -1, -1, -1},   {0, 2, 4, 10, 14, -1, -1, -1},
-    {6, 10, 14, -1, -1, -1, -1, -1},  {0, 6, 10, 14, -1, -1, -1, -1},
-    {2, 6, 10, 14, -1, -1, -1, -1},   {0, 2, 6, 10, 14, -1, -1, -1},
-    {4, 6, 10, 14, -1, -1, -1, -1},   {0, 4, 6, 10, 14, -1, -1, -1},
-    {2, 4, 6, 10, 14, -1, -1, -1},    {0, 2, 4, 6, 10, 14, -1, -1},
-    {8, 10, 14, -1, -1, -1, -1, -1},  {0, 8, 10, 14, -1, -1, -1, -1},
-    {2, 8, 10, 14, -1, -1, -1, -1},   {0, 2, 8, 10, 14, -1, -1, -1},
-    {4, 8, 10, 14, -1, -1, -1, -1},   {0, 4, 8, 10, 14, -1, -1, -1},
-    {2, 4, 8, 10, 14, -1, -1, -1},    {0, 2, 4, 8, 10, 14, -1, -1},
-    {6, 8, 10, 14, -1, -1, -1, -1},   {0, 6, 8, 10, 14, -1, -1, -1},
-    {2, 6, 8, 10, 14, -1, -1, -1},    {0, 2, 6, 8, 10, 14, -1, -1},
-    {4, 6, 8, 10, 14, -1, -1, -1},    {0, 4, 6, 8, 10, 14, -1, -1},
-    {2, 4, 6, 8, 10, 14, -1, -1},     {0, 2, 4, 6, 8, 10, 14, -1},
-    {12, 14, -1, -1, -1, -1, -1, -1}, {0, 12, 14, -1, -1, -1, -1, -1},
-    {2, 12, 14, -1, -1, -1, -1, -1},  {0, 2, 12, 14, -1, -1, -1, -1},
-    {4, 12, 14, -1, -1, -1, -1, -1},  {0, 4, 12, 14, -1, -1, -1, -1},
-    {2, 4, 12, 14, -1, -1, -1, -1},   {0, 2, 4, 12, 14, -1, -1, -1},
-    {6, 12, 14, -1, -1, -1, -1, -1},  {0, 6, 12, 14, -1, -1, -1, -1},
-    {2, 6, 12, 14, -1, -1, -1, -1},   {0, 2, 6, 12, 14, -1, -1, -1},
-    {4, 6, 12, 14, -1, -1, -1, -1},   {0, 4, 6, 12, 14, -1, -1, -1},
-    {2, 4, 6, 12, 14, -1, -1, -1},    {0, 2, 4, 6, 12, 14, -1, -1},
-    {8, 12, 14, -1, -1, -1, -1, -1},  {0, 8, 12, 14, -1, -1, -1, -1},
-    {2, 8, 12, 14, -1, -1, -1, -1},   {0, 2, 8, 12, 14, -1, -1, -1},
-    {4, 8, 12, 14, -1, -1, -1, -1},   {0, 4, 8, 12, 14, -1, -1, -1},
-    {2, 4, 8, 12, 14, -1, -1, -1},    {0, 2, 4, 8, 12, 14, -1, -1},
-    {6, 8, 12, 14, -1, -1, -1, -1},   {0, 6, 8, 12, 14, -1, -1, -1},
-    {2, 6, 8, 12, 14, -1, -1, -1},    {0, 2, 6, 8, 12, 14, -1, -1},
-    {4, 6, 8, 12, 14, -1, -1, -1},    {0, 4, 6, 8, 12, 14, -1, -1},
-    {2, 4, 6, 8, 12, 14, -1, -1},     {0, 2, 4, 6, 8, 12, 14, -1},
-    {10, 12, 14, -1, -1, -1, -1, -1}, {0, 10, 12, 14, -1, -1, -1, -1},
-    {2, 10, 12, 14, -1, -1, -1, -1},  {0, 2, 10, 12, 14, -1, -1, -1},
-    {4, 10, 12, 14, -1, -1, -1, -1},  {0, 4, 10, 12, 14, -1, -1, -1},
-    {2, 4, 10, 12, 14, -1, -1, -1},   {0, 2, 4, 10, 12, 14, -1, -1},
-    {6, 10, 12, 14, -1, -1, -1, -1},  {0, 6, 10, 12, 14, -1, -1, -1},
-    {2, 6, 10, 12, 14, -1, -1, -1},   {0, 2, 6, 10, 12, 14, -1, -1},
-    {4, 6, 10, 12, 14, -1, -1, -1},   {0, 4, 6, 10, 12, 14, -1, -1},
-    {2, 4, 6, 10, 12, 14, -1, -1},    {0, 2, 4, 6, 10, 12, 14, -1},
-    {8, 10, 12, 14, -1, -1, -1, -1},  {0, 8, 10, 12, 14, -1, -1, -1},
-    {2, 8, 10, 12, 14, -1, -1, -1},   {0, 2, 8, 10, 12, 14, -1, -1},
-    {4, 8, 10, 12, 14, -1, -1, -1},   {0, 4, 8, 10, 12, 14, -1, -1},
-    {2, 4, 8, 10, 12, 14, -1, -1},    {0, 2, 4, 8, 10, 12, 14, -1},
-    {6, 8, 10, 12, 14, -1, -1, -1},   {0, 6, 8, 10, 12, 14, -1, -1},
-    {2, 6, 8, 10, 12, 14, -1, -1},    {0, 2, 6, 8, 10, 12, 14, -1},
-    {4, 6, 8, 10, 12, 14, -1, -1},    {0, 4, 6, 8, 10, 12, 14, -1},
-    {2, 4, 6, 8, 10, 12, 14, -1},     {0, 2, 4, 6, 8, 10, 12, 14}};
-#endif
 
 #define _mm256_cmpge_epu16(a, b) _mm256_cmpeq_epi16(_mm256_max_epu16(a, b), a)
 #define _mm_cmpge_epu16(a, b) _mm_cmpeq_epi16(_mm_max_epu16(a, b), a)
@@ -159,9 +26,6 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
   unsigned int ctr, pos;
   uint16_t val0, val1;
   uint32_t good;
-#ifdef BMI
-  uint64_t idx0, idx1, idx2, idx3;
-#endif
   const __m256i bound = _mm256_load_si256(&qdata.vec[_16XQ / 16]);
   const __m256i ones = _mm256_set1_epi8(1);
   const __m256i mask = _mm256_set1_epi16(0xFFF);
@@ -195,34 +59,16 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
     g0 = _mm256_packs_epi16(g0, g1);
     good = _mm256_movemask_epi8(g0);
 
-#ifdef BMI
-    idx0 = _pdep_u64(good >> 0, 0x0101010101010101);
-    idx1 = _pdep_u64(good >> 8, 0x0101010101010101);
-    idx2 = _pdep_u64(good >> 16, 0x0101010101010101);
-    idx3 = _pdep_u64(good >> 24, 0x0101010101010101);
-    idx0 = (idx0 << 8) - idx0;
-    idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
-    idx1 = (idx1 << 8) - idx1;
-    idx1 = _pext_u64(0x0E0C0A0806040200, idx1);
-    idx2 = (idx2 << 8) - idx2;
-    idx2 = _pext_u64(0x0E0C0A0806040200, idx2);
-    idx3 = (idx3 << 8) - idx3;
-    idx3 = _pext_u64(0x0E0C0A0806040200, idx3);
-
-    g0 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx0));
-    g1 = _mm256_castsi128_si256(_mm_cvtsi64_si128(idx1));
-    g0 = _mm256_inserti128_si256(g0, _mm_cvtsi64_si128(idx2), 1);
-    g1 = _mm256_inserti128_si256(g1, _mm_cvtsi64_si128(idx3), 1);
-#else
     g0 = _mm256_castsi128_si256(
-        _mm_loadl_epi64((__m128i *)&idx[(good >> 0) & 0xFF]));
+        _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 0) & 0xFF]));
     g1 = _mm256_castsi128_si256(
-        _mm_loadl_epi64((__m128i *)&idx[(good >> 8) & 0xFF]));
+        _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 8) & 0xFF]));
     g0 = _mm256_inserti128_si256(
-        g0, _mm_loadl_epi64((__m128i *)&idx[(good >> 16) & 0xFF]), 1);
+        g0, _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 16) & 0xFF]),
+        1);
     g1 = _mm256_inserti128_si256(
-        g1, _mm_loadl_epi64((__m128i *)&idx[(good >> 24) & 0xFF]), 1);
-#endif
+        g1, _mm_loadl_epi64((__m128i *)&rej_uniform_table[(good >> 24) & 0xFF]),
+        1);
 
     g2 = _mm256_add_epi8(g0, ones);
     g3 = _mm256_add_epi8(g1, ones);
@@ -254,16 +100,8 @@ unsigned int rej_uniform_avx2(int16_t *RESTRICT r, const uint8_t *buf)
     t = _mm_cmpgt_epi16(_mm256_castsi256_si128(bound), f);
     good = _mm_movemask_epi8(t);
 
-#ifdef BMI
-    good &= 0x5555;
-    idx0 = _pdep_u64(good, 0x1111111111111111);
-    idx0 = (idx0 << 8) - idx0;
-    idx0 = _pext_u64(0x0E0C0A0806040200, idx0);
-    pilo = _mm_cvtsi64_si128(idx0);
-#else
     good = _pext_u32(good, 0x5555);
-    pilo = _mm_loadl_epi64((__m128i *)&idx[good]);
-#endif
+    pilo = _mm_loadl_epi64((__m128i *)&rej_uniform_table[good]);
 
     pihi = _mm_add_epi8(pilo, _mm256_castsi256_si128(ones));
     pilo = _mm_unpacklo_epi8(pilo, pihi);
diff --git a/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_table.c b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_table.c
new file mode 100644
index 000000000..e49029140
--- /dev/null
+++ b/src/kem/ml_kem/mlkem-native_ml-kem-768_x86_64/x86_64/src/rej_uniform_table.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2024 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * WARNING: This file is auto-generated from scripts/autogenerate_files.py
+ *          Do not modify it directly.
+ */
+
+#include "common.h"
+
+#if defined(MLKEM_NATIVE_ARITH_BACKEND_X86_64_DEFAULT)
+
+#include <stdint.h>
+#include "arith_native_x86_64.h"
+
+/*
+ * Lookup table used by rejection sampling of the public matrix.
+ * See autogenerate_files.py for details.
+ */
+ALIGN const uint8_t rej_uniform_table[256][8] = {
+    {-1, -1, -1, -1, -1, -1, -1, -1}, {0, -1, -1, -1, -1, -1, -1, -1},
+    {2, -1, -1, -1, -1, -1, -1, -1},  {0, 2, -1, -1, -1, -1, -1, -1},
+    {4, -1, -1, -1, -1, -1, -1, -1},  {0, 4, -1, -1, -1, -1, -1, -1},
+    {2, 4, -1, -1, -1, -1, -1, -1},   {0, 2, 4, -1, -1, -1, -1, -1},
+    {6, -1, -1, -1, -1, -1, -1, -1},  {0, 6, -1, -1, -1, -1, -1, -1},
+    {2, 6, -1, -1, -1, -1, -1, -1},   {0, 2, 6, -1, -1, -1, -1, -1},
+    {4, 6, -1, -1, -1, -1, -1, -1},   {0, 4, 6, -1, -1, -1, -1, -1},
+    {2, 4, 6, -1, -1, -1, -1, -1},    {0, 2, 4, 6, -1, -1, -1, -1},
+    {8, -1, -1, -1, -1, -1, -1, -1},  {0, 8, -1, -1, -1, -1, -1, -1},
+    {2, 8, -1, -1, -1, -1, -1, -1},   {0, 2, 8, -1, -1, -1, -1, -1},
+    {4, 8, -1, -1, -1, -1, -1, -1},   {0, 4, 8, -1, -1, -1, -1, -1},
+    {2, 4, 8, -1, -1, -1, -1, -1},    {0, 2, 4, 8, -1, -1, -1, -1},
+    {6, 8, -1, -1, -1, -1, -1, -1},   {0, 6, 8, -1, -1, -1, -1, -1},
+    {2, 6, 8, -1, -1, -1, -1, -1},    {0, 2, 6, 8, -1, -1, -1, -1},
+    {4, 6, 8, -1, -1, -1, -1, -1},    {0, 4, 6, 8, -1, -1, -1, -1},
+    {2, 4, 6, 8, -1, -1, -1, -1},     {0, 2, 4, 6, 8, -1, -1, -1},
+    {10, -1, -1, -1, -1, -1, -1, -1}, {0, 10, -1, -1, -1, -1, -1, -1},
+    {2, 10, -1, -1, -1, -1, -1, -1},  {0, 2, 10, -1, -1, -1, -1, -1},
+    {4, 10, -1, -1, -1, -1, -1, -1},  {0, 4, 10, -1, -1, -1, -1, -1},
+    {2, 4, 10, -1, -1, -1, -1, -1},   {0, 2, 4, 10, -1, -1, -1, -1},
+    {6, 10, -1, -1, -1, -1, -1, -1},  {0, 6, 10, -1, -1, -1, -1, -1},
+    {2, 6, 10, -1, -1, -1, -1, -1},   {0, 2, 6, 10, -1, -1, -1, -1},
+    {4, 6, 10, -1, -1, -1, -1, -1},   {0, 4, 6, 10, -1, -1, -1, -1},
+    {2, 4, 6, 10, -1, -1, -1, -1},    {0, 2, 4, 6, 10, -1, -1, -1},
+    {8, 10, -1, -1, -1, -1, -1, -1},  {0, 8, 10, -1, -1, -1, -1, -1},
+    {2, 8, 10, -1, -1, -1, -1, -1},   {0, 2, 8, 10, -1, -1, -1, -1},
+    {4, 8, 10, -1, -1, -1, -1, -1},   {0, 4, 8, 10, -1, -1, -1, -1},
+    {2, 4, 8, 10, -1, -1, -1, -1},    {0, 2, 4, 8, 10, -1, -1, -1},
+    {6, 8, 10, -1, -1, -1, -1, -1},   {0, 6, 8, 10, -1, -1, -1, -1},
+    {2, 6, 8, 10, -1, -1, -1, -1},    {0, 2, 6, 8, 10, -1, -1, -1},
+    {4, 6, 8, 10, -1, -1, -1, -1},    {0, 4, 6, 8, 10, -1, -1, -1},
+    {2, 4, 6, 8, 10, -1, -1, -1},     {0, 2, 4, 6, 8, 10, -1, -1},
+    {12, -1, -1, -1, -1, -1, -1, -1}, {0, 12, -1, -1, -1, -1, -1, -1},
+    {2, 12, -1, -1, -1, -1, -1, -1},  {0, 2, 12, -1, -1, -1, -1, -1},
+    {4, 12, -1, -1, -1, -1, -1, -1},  {0, 4, 12, -1, -1, -1, -1, -1},
+    {2, 4, 12, -1, -1, -1, -1, -1},   {0, 2, 4, 12, -1, -1, -1, -1},
+    {6, 12, -1, -1, -1, -1, -1, -1},  {0, 6, 12, -1, -1, -1, -1, -1},
+    {2, 6, 12, -1, -1, -1, -1, -1},   {0, 2, 6, 12, -1, -1, -1, -1},
+    {4, 6, 12, -1, -1, -1, -1, -1},   {0, 4, 6, 12, -1, -1, -1, -1},
+    {2, 4, 6, 12, -1, -1, -1, -1},    {0, 2, 4, 6, 12, -1, -1, -1},
+    {8, 12, -1, -1, -1, -1, -1, -1},  {0, 8, 12, -1, -1, -1, -1, -1},
+    {2, 8, 12, -1, -1, -1, -1, -1},   {0, 2, 8, 12, -1, -1, -1, -1},
+    {4, 8, 12, -1, -1, -1, -1, -1},   {0, 4, 8, 12, -1, -1, -1, -1},
+    {2, 4, 8, 12, -1, -1, -1, -1},    {0, 2, 4, 8, 12, -1, -1, -1},
+    {6, 8, 12, -1, -1, -1, -1, -1},   {0, 6, 8, 12, -1, -1, -1, -1},
+    {2, 6, 8, 12, -1, -1, -1, -1},    {0, 2, 6, 8, 12, -1, -1, -1},
+    {4, 6, 8, 12, -1, -1, -1, -1},    {0, 4, 6, 8, 12, -1, -1, -1},
+    {2, 4, 6, 8, 12, -1, -1, -1},     {0, 2, 4, 6, 8, 12, -1, -1},
+    {10, 12, -1, -1, -1, -1, -1, -1}, {0, 10, 12, -1, -1, -1, -1, -1},
+    {2, 10, 12, -1, -1, -1, -1, -1},  {0, 2, 10, 12, -1, -1, -1, -1},
+    {4, 10, 12, -1, -1, -1, -1, -1},  {0, 4, 10, 12, -1, -1, -1, -1},
+    {2, 4, 10, 12, -1, -1, -1, -1},   {0, 2, 4, 10, 12, -1, -1, -1},
+    {6, 10, 12, -1, -1, -1, -1, -1},  {0, 6, 10, 12, -1, -1, -1, -1},
+    {2, 6, 10, 12, -1, -1, -1, -1},   {0, 2, 6, 10, 12, -1, -1, -1},
+    {4, 6, 10, 12, -1, -1, -1, -1},   {0, 4, 6, 10, 12, -1, -1, -1},
+    {2, 4, 6, 10, 12, -1, -1, -1},    {0, 2, 4, 6, 10, 12, -1, -1},
+    {8, 10, 12, -1, -1, -1, -1, -1},  {0, 8, 10, 12, -1, -1, -1, -1},
+    {2, 8, 10, 12, -1, -1, -1, -1},   {0, 2, 8, 10, 12, -1, -1, -1},
+    {4, 8, 10, 12, -1, -1, -1, -1},   {0, 4, 8, 10, 12, -1, -1, -1},
+    {2, 4, 8, 10, 12, -1, -1, -1},    {0, 2, 4, 8, 10, 12, -1, -1},
+    {6, 8, 10, 12, -1, -1, -1, -1},   {0, 6, 8, 10, 12, -1, -1, -1},
+    {2, 6, 8, 10, 12, -1, -1, -1},    {0, 2, 6, 8, 10, 12, -1, -1},
+    {4, 6, 8, 10, 12, -1, -1, -1},    {0, 4, 6, 8, 10, 12, -1, -1},
+    {2, 4, 6, 8, 10, 12, -1, -1},     {0, 2, 4, 6, 8, 10, 12, -1},
+    {14, -1, -1, -1, -1, -1, -1, -1}, {0, 14, -1, -1, -1, -1, -1, -1},
+    {2, 14, -1, -1, -1, -1, -1, -1},  {0, 2, 14, -1, -1, -1, -1, -1},
+    {4, 14, -1, -1, -1, -1, -1, -1},  {0, 4, 14, -1, -1, -1, -1, -1},
+    {2, 4, 14, -1, -1, -1, -1, -1},   {0, 2, 4, 14, -1, -1, -1, -1},
+    {6, 14, -1, -1, -1, -1, -1, -1},  {0, 6, 14, -1, -1, -1, -1, -1},
+    {2, 6, 14, -1, -1, -1, -1, -1},   {0, 2, 6, 14, -1, -1, -1, -1},
+    {4, 6, 14, -1, -1, -1, -1, -1},   {0, 4, 6, 14, -1, -1, -1, -1},
+    {2, 4, 6, 14, -1, -1, -1, -1},    {0, 2, 4, 6, 14, -1, -1, -1},
+    {8, 14, -1, -1, -1, -1, -1, -1},  {0, 8, 14, -1, -1, -1, -1, -1},
+    {2, 8, 14, -1, -1, -1, -1, -1},   {0, 2, 8, 14, -1, -1, -1, -1},
+    {4, 8, 14, -1, -1, -1, -1, -1},   {0, 4, 8, 14, -1, -1, -1, -1},
+    {2, 4, 8, 14, -1, -1, -1, -1},    {0, 2, 4, 8, 14, -1, -1, -1},
+    {6, 8, 14, -1, -1, -1, -1, -1},   {0, 6, 8, 14, -1, -1, -1, -1},
+    {2, 6, 8, 14, -1, -1, -1, -1},    {0, 2, 6, 8, 14, -1, -1, -1},
+    {4, 6, 8, 14, -1, -1, -1, -1},    {0, 4, 6, 8, 14, -1, -1, -1},
+    {2, 4, 6, 8, 14, -1, -1, -1},     {0, 2, 4, 6, 8, 14, -1, -1},
+    {10, 14, -1, -1, -1, -1, -1, -1}, {0, 10, 14, -1, -1, -1, -1, -1},
+    {2, 10, 14, -1, -1, -1, -1, -1},  {0, 2, 10, 14, -1, -1, -1, -1},
+    {4, 10, 14, -1, -1, -1, -1, -1},  {0, 4, 10, 14, -1, -1, -1, -1},
+    {2, 4, 10, 14, -1, -1, -1, -1},   {0, 2, 4, 10, 14, -1, -1, -1},
+    {6, 10, 14, -1, -1, -1, -1, -1},  {0, 6, 10, 14, -1, -1, -1, -1},
+    {2, 6, 10, 14, -1, -1, -1, -1},   {0, 2, 6, 10, 14, -1, -1, -1},
+    {4, 6, 10, 14, -1, -1, -1, -1},   {0, 4, 6, 10, 14, -1, -1, -1},
+    {2, 4, 6, 10, 14, -1, -1, -1},    {0, 2, 4, 6, 10, 14, -1, -1},
+    {8, 10, 14, -1, -1, -1, -1, -1},  {0, 8, 10, 14, -1, -1, -1, -1},
+    {2, 8, 10, 14, -1, -1, -1, -1},   {0, 2, 8, 10, 14, -1, -1, -1},
+    {4, 8, 10, 14, -1, -1, -1, -1},   {0, 4, 8, 10, 14, -1, -1, -1},
+    {2, 4, 8, 10, 14, -1, -1, -1},    {0, 2, 4, 8, 10, 14, -1, -1},
+    {6, 8, 10, 14, -1, -1, -1, -1},   {0, 6, 8, 10, 14, -1, -1, -1},
+    {2, 6, 8, 10, 14, -1, -1, -1},    {0, 2, 6, 8, 10, 14, -1, -1},
+    {4, 6, 8, 10, 14, -1, -1, -1},    {0, 4, 6, 8, 10, 14, -1, -1},
+    {2, 4, 6, 8, 10, 14, -1, -1},     {0, 2, 4, 6, 8, 10, 14, -1},
+    {12, 14, -1, -1, -1, -1, -1, -1}, {0, 12, 14, -1, -1, -1, -1, -1},
+    {2, 12, 14, -1, -1, -1, -1, -1},  {0, 2, 12, 14, -1, -1, -1, -1},
+    {4, 12, 14, -1, -1, -1, -1, -1},  {0, 4, 12, 14, -1, -1, -1, -1},
+    {2, 4, 12, 14, -1, -1, -1, -1},   {0, 2, 4, 12, 14, -1, -1, -1},
+    {6, 12, 14, -1, -1, -1, -1, -1},  {0, 6, 12, 14, -1, -1, -1, -1},
+    {2, 6, 12, 14, -1, -1, -1, -1},   {0, 2, 6, 12, 14, -1, -1, -1},
+    {4, 6, 12, 14, -1, -1, -1, -1},   {0, 4, 6, 12, 14, -1, -1, -1},
+    {2, 4, 6, 12, 14, -1, -1, -1},    {0, 2, 4, 6, 12, 14, -1, -1},
+    {8, 12, 14, -1, -1, -1, -1, -1},  {0, 8, 12, 14, -1, -1, -1, -1},
+    {2, 8, 12, 14, -1, -1, -1, -1},   {0, 2, 8, 12, 14, -1, -1, -1},
+    {4, 8, 12, 14, -1, -1, -1, -1},   {0, 4, 8, 12, 14, -1, -1, -1},
+    {2, 4, 8, 12, 14, -1, -1, -1},    {0, 2, 4, 8, 12, 14, -1, -1},
+    {6, 8, 12, 14, -1, -1, -1, -1},   {0, 6, 8, 12, 14, -1, -1, -1},
+    {2, 6, 8, 12, 14, -1, -1, -1},    {0, 2, 6, 8, 12, 14, -1, -1},
+    {4, 6, 8, 12, 14, -1, -1, -1},    {0, 4, 6, 8, 12, 14, -1, -1},
+    {2, 4, 6, 8, 12, 14, -1, -1},     {0, 2, 4, 6, 8, 12, 14, -1},
+    {10, 12, 14, -1, -1, -1, -1, -1}, {0, 10, 12, 14, -1, -1, -1, -1},
+    {2, 10, 12, 14, -1, -1, -1, -1},  {0, 2, 10, 12, 14, -1, -1, -1},
+    {4, 10, 12, 14, -1, -1, -1, -1},  {0, 4, 10, 12, 14, -1, -1, -1},
+    {2, 4, 10, 12, 14, -1, -1, -1},   {0, 2, 4, 10, 12, 14, -1, -1},
+    {6, 10, 12, 14, -1, -1, -1, -1},  {0, 6, 10, 12, 14, -1, -1, -1},
+    {2, 6, 10, 12, 14, -1, -1, -1},   {0, 2, 6, 10, 12, 14, -1, -1},
+    {4, 6, 10, 12, 14, -1, -1, -1},   {0, 4, 6, 10, 12, 14, -1, -1},
+    {2, 4, 6, 10, 12, 14, -1, -1},    {0, 2, 4, 6, 10, 12, 14, -1},
+    {8, 10, 12, 14, -1, -1, -1, -1},  {0, 8, 10, 12, 14, -1, -1, -1},
+    {2, 8, 10, 12, 14, -1, -1, -1},   {0, 2, 8, 10, 12, 14, -1, -1},
+    {4, 8, 10, 12, 14, -1, -1, -1},   {0, 4, 8, 10, 12, 14, -1, -1},
+    {2, 4, 8, 10, 12, 14, -1, -1},    {0, 2, 4, 8, 10, 12, 14, -1},
+    {6, 8, 10, 12, 14, -1, -1, -1},   {0, 6, 8, 10, 12, 14, -1, -1},
+    {2, 6, 8, 10, 12, 14, -1, -1},    {0, 2, 6, 8, 10, 12, 14, -1},
+    {4, 6, 8, 10, 12, 14, -1, -1},    {0, 4, 6, 8, 10, 12, 14, -1},
+    {2, 4, 6, 8, 10, 12, 14, -1},     {0, 2, 4, 6, 8, 10, 12, 14},
+};
+
+#else
+
+/* Dummy declaration for compilers disliking empty compilation units */
+#define empty_cu_avx2_rej_uniform_table \
+  MLKEM_NAMESPACE(empty_cu_avx2_rej_uniform_table)
+int empty_cu_avx2_rej_uniform_table;
+#endif