diff --git a/.asf.yaml b/.asf.yaml
index f3a8ed9fee90f..685776c9a3872 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -20,7 +20,7 @@ github:
   homepage: https://arrow.apache.org/
   collaborators:
     - anjakefala
-    - benibus
+    - hiroyuki-sato
     - jbonofre
     - js8544
     - vibhatha
diff --git a/.env b/.env
index c18a3b066f9b2..bdb74d89e1c6e 100644
--- a/.env
+++ b/.env
@@ -54,6 +54,7 @@ UBUNTU=22.04
 
 # Default versions for various dependencies
 CLANG_TOOLS=14
+CMAKE=3.25.0
 CUDA=11.2.2
 DASK=latest
 DOTNET=8.0
@@ -62,7 +63,7 @@ HDFS=3.2.1
 JDK=11
 KARTOTHEK=latest
 # LLVM 12 and GCC 11 reports -Wmismatched-new-delete.
-LLVM=14
+LLVM=18
 MAVEN=3.8.7
 NODE=18
 NUMBA=latest
@@ -89,17 +90,17 @@ TZ=UTC
 # Used through docker-compose.yml and serves as the default version for the
 # ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the
 # docker tags more readable.
-VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01"    # 2024.04.26 Release
+VCPKG="f7423ee180c4b7f40d43402c2feb3859161ef625"    # 2024.06.15 Release
 
 # This must be updated when we update
 # ci/docker/python-*-windows-*.dockerfile or the vcpkg config.
 # This is a workaround for our CI problem that "archery docker build" doesn't
 # use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-01-27
-PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-01-27
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-02-25
+PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-02-25
 
 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan".
 # See https://github.com/conan-io/conan-docker-tools#readme and
 # https://hub.docker.com/u/conanio for available images.
-CONAN_BASE=gcc10
-CONAN_VERSION=1.62.0
+CONAN_BASE=gcc11-ubuntu16.04
+CONAN_VERSION=2.12.1
diff --git a/.github/ISSUE_TEMPLATE/usage_question.yaml b/.github/ISSUE_TEMPLATE/usage_question.yaml
index c711190540afa..3c232dd931a54 100644
--- a/.github/ISSUE_TEMPLATE/usage_question.yaml
+++ b/.github/ISSUE_TEMPLATE/usage_question.yaml
@@ -23,23 +23,26 @@ body:
   - type: markdown
     attributes:
       value: >
-        While we enable issues as a mechanism for new contributors and passers-by who 
-        are unfamiliar with Apache Software Foundation projects to ask questions and 
-        interact with the project, we encourage users to ask such questions on public 
-        mailing lists:
-        
-        * Development discussions: dev@arrow.apache.org (first subscribe by sending an 
-        e-mail to dev-subscribe@arrow.apache.org).
-        
-        * User discussions: user@arrow.apache.org (first subscribe by sending an e-mail 
-        to user-subscribe@arrow.apache.org).
-        
-        * Mailing list archives: https://arrow.apache.org/community/
-        
-        
-        Do not be surprised by responses to issues raised here directing you to those 
-        mailing lists, or to report a bug or feature request here.
+        While we enable issues as a mechanism for new contributors and
+        passers-by who are unfamiliar with Apache Software Foundation projects
+        to ask questions and interact with the project, we encourage users to
+        ask such questions on the [public mailing
+        lists](https://arrow.apache.org/community/) as these provide higher
+        visibility than GitHub issues:
+
+        * For usage questions, please email user@arrow.apache.org (first
+        subscribe by sending an e-mail to user-subscribe@arrow.apache.org).
+
+        * For discussions about contributing or development, please email
+        dev@arrow.apache.org (first subscribe by sending an e-mail to
+        dev-subscribe@arrow.apache.org).
 
+        Please see the [Apache Arrow Community
+        page](https://arrow.apache.org/community/) for more information on the
+        mailing lists as well as for a link to the searchable archives.
+
+        Do not be surprised by responses to issues raised here directing you to those
+        mailing lists, or to report a bug or feature request here.
 
         Thank you!
   - type: textarea
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 3839d3e2fc889..4b3eac2d43305 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,61 +1,20 @@
-
-<!--
 Thanks for opening a pull request!
-If this is your first pull request you can find detailed information on how 
-to contribute here:
-  * [New Contributor's Guide](https://arrow.apache.org/docs/dev/developers/guide/step_by_step/pr_lifecycle.html#reviews-and-merge-of-the-pull-request)
-  * [Contributing Overview](https://arrow.apache.org/docs/dev/developers/overview.html)
-
-
-If this is not a [minor PR](https://github.com/apache/arrow/blob/main/CONTRIBUTING.md#Minor-Fixes). Could you open an issue for this pull request on GitHub? https://github.com/apache/arrow/issues/new/choose
-
-Opening GitHub issues ahead of time contributes to the [Openness](http://theapacheway.com/open/#:~:text=Openness%20allows%20new%20users%20the,must%20happen%20in%20the%20open.) of the Apache Arrow project.
-
-Then could you also rename the pull request title in the following format?
-
-    GH-${GITHUB_ISSUE_ID}: [${COMPONENT}] ${SUMMARY}
 
-or
+If this is your first pull request you can find detailed information on how to contribute here:
 
-    MINOR: [${COMPONENT}] ${SUMMARY}
+  * [New Contributor's Guide](https://arrow.apache.org/docs/dev/developers/guide/step_by_step/pr_lifecycle.html#reviews-and-merge-of-the-pull-request)
+  * [Contributing Overview](https://arrow.apache.org/docs/dev/developers/overview.html)
 
--->
+Please remove this line and the above text before creating your pull request.
 
 ### Rationale for this change
 
-<!--
- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed.
- Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes.  
--->
-
 ### What changes are included in this PR?
 
-<!--
-There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR.
--->
-
 ### Are these changes tested?
 
-<!--
-We typically require tests for all PRs in order to:
-1. Prevent the code from being accidentally broken by subsequent changes
-2. Serve as another way to document the expected behavior of the code
-
-If tests are not included in your PR, please explain why (for example, are they covered by existing tests)?
--->
-
 ### Are there any user-facing changes?
 
-<!--
-If there are user-facing changes then we may require documentation to be updated before approving the PR.
--->
-
-<!--
-If there are any breaking changes to public APIs, please uncomment the line below and explain which changes are breaking.
--->
-<!-- **This PR includes breaking changes to public APIs.** -->
+**This PR includes breaking changes to public APIs.** (If there are any breaking changes to public APIs, please explain which changes are breaking. If not, you can remove this.)
 
-<!--
-Please uncomment the line below (and provide explanation) if the changes fix either (a) a security vulnerability, (b) a bug that caused incorrect or invalid data to be produced, or (c) a bug that causes a crash (even when the API contract is upheld). We use this to highlight fixes to issues that may affect users without their knowledge. For this reason, fixing bugs that cause errors don't count, since those are usually obvious.
--->
-<!-- **This PR contains a "Critical Fix".** -->
\ No newline at end of file
+**This PR contains a "Critical Fix".** (If the changes fix either (a) a security vulnerability, (b) a bug that caused incorrect or invalid data to be produced, or (c) a bug that causes a crash (even when the API contract is upheld), please provide explanation. If not, you can remove this.)
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 6dc4da306a1ea..e486ef0e16e59 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -65,7 +65,7 @@ jobs:
         shell: bash
         run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true
       - name: Setup Python
-        uses: actions/setup-python@v5.3.0
+        uses: actions/setup-python@v5.4.0
         with:
           python-version: '3.9'
       - name: Install pygit2 binary wheel
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 83b6f6e31ffc3..578b47361b71e 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -42,7 +42,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 7dfe987d2eaff..cbb448cfa07f2 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -94,12 +94,12 @@ jobs:
             title: AMD64 Ubuntu 22.04 C++ ASAN UBSAN
             ubuntu: 22.04
           - arch: arm64v8
-            clang-tools: 10
+            clang-tools: 14
             image: ubuntu-cpp
-            llvm: 10
+            llvm: 14
             runs-on: ubuntu-24.04-arm
-            title: ARM64 Ubuntu 20.04 C++
-            ubuntu: 20.04
+            title: ARM64 Ubuntu 22.04 C++
+            ubuntu: 22.04
     env:
       ARCH: ${{ matrix.arch }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
@@ -113,7 +113,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: .docker
           key: ${{ matrix.image }}-${{ hashFiles('cpp/**') }}
@@ -121,7 +121,7 @@ jobs:
       - name: Setup Python on hosted runner
         if: |
           matrix.runs-on == 'ubuntu-latest'
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3
       - name: Setup Python on self-hosted runner
@@ -156,7 +156,7 @@ jobs:
 
   build-example:
     name: C++ Minimal Build Example
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 45
     steps:
@@ -234,7 +234,7 @@ jobs:
           $(brew --prefix bash)/bin/bash \
             ci/scripts/install_minio.sh latest ${ARROW_HOME}
       - name: Set up Python
-        uses: actions/setup-python@v5.3.0
+        uses: actions/setup-python@v5.4.0
         with:
           python-version: 3.12
       - name: Install Google Cloud Storage Testbench
@@ -274,11 +274,11 @@ jobs:
       fail-fast: false
       matrix:
         os:
-          - windows-2019
+          - windows-2022
         include:
-          - os: windows-2019
+          - os: windows-2022
             simd-level: AVX2
-            title: AMD64 Windows 2019 C++17 AVX2
+            title: AMD64 Windows 2022 AVX2
     env:
       ARROW_BOOST_USE_SHARED: OFF
       ARROW_BUILD_BENCHMARKS: ON
@@ -352,7 +352,7 @@ jobs:
       - name: Build
         shell: cmd
         run: |
-          call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
+          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
           bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build"
       - name: Test
         shell: bash
@@ -453,7 +453,7 @@ jobs:
             https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z
           chmod +x /usr/local/bin/minio.exe
       - name: Set up Python
-        uses: actions/setup-python@v5.3.0
+        uses: actions/setup-python@v5.4.0
         id: python-install
         with:
           python-version: 3.9
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index 72ca0565ebd4c..6622323a7205d 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -54,11 +54,11 @@ jobs:
         dotnet: ['8.0.x']
     steps:
       - name: Install C#
-        uses: actions/setup-dotnet@v4.2.0
+        uses: actions/setup-dotnet@v4.3.0
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3
       - name: Checkout Arrow
@@ -86,7 +86,7 @@ jobs:
         dotnet: ['8.0.x']
     steps:
       - name: Install C#
-        uses: actions/setup-dotnet@v4.2.0
+        uses: actions/setup-dotnet@v4.3.0
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Checkout Arrow
@@ -113,11 +113,11 @@ jobs:
         dotnet: ['8.0.x']
     steps:
       - name: Install C#
-        uses: actions/setup-dotnet@v4.2.0
+        uses: actions/setup-dotnet@v4.3.0
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Checkout Arrow
@@ -182,7 +182,7 @@ jobs:
             "s/^    <Version>.+<\/Version>/    <Version>${semver}<\/Version>/" \
             csharp/Directory.Build.props
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3
       - name: Setup Archery
@@ -199,7 +199,7 @@ jobs:
             dev/release/utils-generate-checksum.sh "${artifact}"
           done
       - name: Upload
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:
           name: nuget
           path: |
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index f9718cbf7bb18..a62e06a7e29b4 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -41,7 +41,7 @@ jobs:
 
   lint:
     name: Lint C++, Python, R, Docker, RAT
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 15
     steps:
@@ -50,7 +50,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Install pre-commit
@@ -109,7 +109,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: '3.12'
       - name: Install Ruby
@@ -117,7 +117,7 @@ jobs:
         with:
           ruby-version: ruby
       - name: Install .NET
-        uses: actions/setup-dotnet@87b7050bc53ea08284295505d98d2aa94301e852 # v4.2.0
+        uses: actions/setup-dotnet@3951f0dfe7a07e2313ec93c75700083e2005cbab # v4.3.0
         with:
           dotnet-version: '8.0.x'
       - name: Install Dependencies
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 83f835d588af2..55ca6e6f2cc2c 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -46,13 +46,13 @@ jobs:
         run: |
           ci/scripts/util_free_space.sh
       - name: Cache Docker Volumes
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: .docker
           key: debian-docs-${{ hashFiles('cpp/**') }}
           restore-keys: debian-docs-
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index 0e23394e8a453..f406c7396c46e 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -53,13 +53,13 @@ jobs:
         with:
           fetch-depth: 0
       - name: Cache Docker Volumes
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: .docker
           key: conda-docs-${{ hashFiles('cpp/**') }}
           restore-keys: conda-docs-
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index aef81df074888..a6a6d22d09f92 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -100,13 +100,13 @@ jobs:
         run: |
           ci/scripts/util_free_space.sh
       - name: Cache Docker Volumes
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: .docker
           key: conda-${{ hashFiles('cpp/**') }}
           restore-keys: conda-
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 5ef5b37c98815..e100e26a05d50 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -61,7 +61,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml
index 2bdfd0743a547..101724b3e2cd3 100644
--- a/.github/workflows/matlab.yml
+++ b/.github/workflows/matlab.yml
@@ -46,24 +46,8 @@ permissions:
 jobs:
 
   ubuntu:
-    name: AMD64 Ubuntu 20.04 MATLAB
-    # Explicitly pin the Ubuntu version to 20.04 for the time being because:
-    #
-    #     1. The version of GLIBCXX shipped with Ubuntu 22.04 is not binary compatible
-    #        with the GLIBCXX bundled with MATLAB R2023a. This is a relatively common
-    #        issue.
-    #
-    #        For example, see:
-    #
-    #        https://www.mathworks.com/matlabcentral/answers/1907290-how-to-manually-select-the-libstdc-library-to-use-to-resolve-a-version-glibcxx_-not-found
-    #
-    #     2. The version of GLIBCXX shipped with Ubuntu 22.04 is not binary compatible with
-    #        the version of GLIBCXX shipped with Debian 11. Several of the Arrow community
-    #        members who work on the MATLAB bindings use Debian 11 locally for qualification.
-    #        Using Ubuntu 20.04 eases development workflows for these community members.
-    #
-    # In the future, we can investigate adding support for building against more Linux (e.g. `ubuntu-22.04`) and MATLAB versions (e.g. R2023b).
-    runs-on: ubuntu-20.04
+    name: AMD64 Ubuntu 22.04 MATLAB
+    runs-on: ubuntu-22.04
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     steps:
       - name: Check out repository
@@ -155,7 +139,7 @@ jobs:
     runs-on: windows-2022
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     steps:
-      - name: Check out repository        
+      - name: Check out repository
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml
index 1eedacf1abf31..a9ccdad0c52cb 100644
--- a/.github/workflows/pr_bot.yml
+++ b/.github/workflows/pr_bot.yml
@@ -82,7 +82,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/pr_review_trigger.yml b/.github/workflows/pr_review_trigger.yml
index a6dd5f1275331..4457e31cd986b 100644
--- a/.github/workflows/pr_review_trigger.yml
+++ b/.github/workflows/pr_review_trigger.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: "Upload PR review Payload"
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:
           path: "${{ github.event_path }}"
           name: "pr_review_payload"
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index ba05fab65ada2..19e7754fd87e1 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -108,13 +108,13 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: .docker
           key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
           restore-keys: ${{ matrix.cache }}-
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
@@ -183,7 +183,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@v5.3.0
+        uses: actions/setup-python@v5.4.0
         with:
           python-version: '3.11'
       - name: Install Dependencies
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index cb000f8b95c1b..f74abd8e58ab0 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -145,7 +145,7 @@ jobs:
         run: |
           ci/scripts/util_free_space.sh
       - name: Cache Docker Volumes
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: .docker
           # As this key is identical on both matrix builds only one will be able to successfully cache,
@@ -155,7 +155,7 @@ jobs:
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
@@ -177,7 +177,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:
           name: test-output-${{ matrix.ubuntu }}-${{ matrix.r }}
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
@@ -214,7 +214,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
@@ -237,7 +237,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:
           name: test-output-bundled
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
@@ -299,7 +299,7 @@ jobs:
         # So that they're unique when multiple are downloaded in the next step
         shell: bash
         run: mv libarrow.zip libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
+      - uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
         with:
           name: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
           path: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
@@ -337,7 +337,7 @@ jobs:
           echo "$HOME/.local/bin" >> $GITHUB_PATH
       - run: mkdir r/windows
       - name: Download artifacts
-        uses: actions/download-artifact@v4.1.8
+        uses: actions/download-artifact@v4.1.9
         with:
           name: libarrow-rtools40-ucrt64.zip
           path: r/windows
@@ -361,6 +361,7 @@ jobs:
           working-directory: 'r'
           extra-packages: |
             any::rcmdcheck
+            any::cyclocomp
       # TODO(ARROW-17149): figure out why the GCS tests are hanging on Windows
       # - name: Install Google Cloud Storage Testbench
       #   shell: bash
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index 4fcb399c91fc6..e8a3c58a8451b 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -60,7 +60,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           cache: 'pip'
           python-version: 3.12
@@ -86,7 +86,7 @@ jobs:
             exit 1
           fi
       - name: Cache Repo
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: repo
           key: r-nightly-${{ github.run_id }}
@@ -103,6 +103,7 @@ jobs:
           remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }}
           remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }}
       - run: tree repo
+      - uses: r-lib/actions/setup-r@v2
       - name: Build Repository
         shell: Rscript {0}
         run: |
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 589b74cd687fd..7a29d35ee7b1d 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -84,13 +84,13 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@v4
         with:
           path: .docker
           key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
           restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby-
       - name: Setup Python
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml
index 2fd55d457c208..971c2590c5af4 100644
--- a/.github/workflows/swift.yml
+++ b/.github/workflows/swift.yml
@@ -64,7 +64,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python on hosted runner
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
         with:
           python-version: 3
       - name: Setup Archery
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0ee2e233bb19f..54ee2a76b96bf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
         files: >-
           (
           ?^ci/docker/conda-python-emscripten\.dockerfile$|
-          ?^ci/docker/python-.*-wheel-windows-test-vs2019.*\.dockerfile$|
+          ?^ci/docker/python-.*-wheel-windows-test-vs2022.*\.dockerfile$|
           )
         types: []
   - repo: https://github.com/pycqa/flake8
@@ -141,6 +141,18 @@ repos:
           (
           ?^r/src/arrowExports\.cpp$|
           )
+  - repo: https://github.com/rubocop/rubocop
+    rev: "v1.71.0"
+    hooks:
+      - id: rubocop
+        name: Ruby Format
+        alias: ruby-format
+        args:
+          - "--autocorrect"
+        exclude: >-
+          (
+          ?^dev/tasks/homebrew-formulae/.*\.rb$|
+          )
   - repo: https://github.com/cheshirekow/cmake-format-precommit
     rev: v0.6.13
     hooks:
@@ -185,3 +197,8 @@ repos:
           ?^c_glib/test/run-test\.sh$|
           ?^dev/release/utils-generate-checksum\.sh$|
           )
+  - repo: https://github.com/trim21/pre-commit-mirror-meson
+    rev: v1.6.1
+    hooks:
+      - id: meson-fmt
+        args: ['--inplace']
diff --git a/ci/docker/ubuntu-20.04-verify-rc.dockerfile b/.rubocop.yml
similarity index 75%
rename from ci/docker/ubuntu-20.04-verify-rc.dockerfile
rename to .rubocop.yml
index cee1e50e080c5..3f48689796d2f 100644
--- a/ci/docker/ubuntu-20.04-verify-rc.dockerfile
+++ b/.rubocop.yml
@@ -15,12 +15,19 @@
 # specific language governing permissions and limitations
 # under the License.
 
-ARG arch=amd64
-FROM ${arch}/ubuntu:20.04
+# Ruby lint begins minimal.
+# All of checkings changed to disable by default.
+AllCops:
+  DisabledByDefault: true
 
-ENV DEBIAN_FRONTEND=noninteractive
-COPY dev/release/setup-ubuntu.sh /
-RUN /setup-ubuntu.sh && \
-    rm /setup-ubuntu.sh && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
+Lint:
+  Enabled: false
+
+Layout/LineLength:
+  Max: 100
+
+Layout/ArgumentAlignment:
+  Enabled: true
+
+Layout/SpaceAfterComma:
+  Enabled: true
diff --git a/LICENSE.txt b/LICENSE.txt
index 7bb1330a1002b..7d5de9e3bfeb7 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -2257,5 +2257,36 @@ SOFTWARE.
 java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectHashMap.java
 java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectMap.java
 
-These file are derived from code from Netty, which is made available under the
+These files are derived from code from Netty, which is made available under the
 Apache License 2.0.
+
+--------------------------------------------------------------------------------
+cpp/src/arrow/util/math_internal.cc (some portions)
+
+Some portions of this file are derived from
+
+https://github.com/ankane/dist-rust/
+
+which is made available under the MIT license
+
+The MIT License (MIT)
+
+Copyright (c) 2021-2023 Contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/README.md b/README.md
index f49ec4b8d98ee..c557716a4a88b 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ Major components of the project include:
  - [Gandiva](https://github.com/apache/arrow/tree/main/cpp/src/gandiva):
    an [LLVM](https://llvm.org)-based Arrow expression compiler, part of the C++ codebase
  - [Go libraries](https://github.com/apache/arrow-go)
- - [Java libraries](https://github.com/apache/arrow/tree/main/java)
+ - [Java libraries](https://github.com/apache/arrow-java)
  - [JavaScript libraries](https://github.com/apache/arrow/tree/main/js)
  - [Python libraries](https://github.com/apache/arrow/tree/main/python)
  - [R libraries](https://github.com/apache/arrow/tree/main/r)
diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build
index 36730dec6c4b7..0f93d95ca01f2 100644
--- a/c_glib/arrow-cuda-glib/meson.build
+++ b/c_glib/arrow-cuda-glib/meson.build
@@ -17,24 +17,27 @@
 # specific language governing permissions and limitations
 # under the License.
 
-sources = files(
-  'cuda.cpp',
-)
+sources = files('cuda.cpp')
 
-c_headers = files(
-  'arrow-cuda-glib.h',
-  'cuda.h',
-)
+c_headers = files('arrow-cuda-glib.h', 'cuda.h')
 
-cpp_headers = files(
-  'arrow-cuda-glib.hpp',
-  'cuda.hpp',
-)
+cpp_headers = files('arrow-cuda-glib.hpp', 'cuda.hpp')
 
 version_h = configure_file(
-  input: 'version.h.in',
-  output: 'version.h',
-  command: [python3, generate_version_header_py, '--library', 'GARROW_CUDA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'],
+    input: 'version.h.in',
+    output: 'version.h',
+    command: [
+        python3,
+        generate_version_header_py,
+        '--library',
+        'GARROW_CUDA',
+        '--version',
+        version,
+        '--input',
+        '@INPUT@',
+        '--output',
+        '@OUTPUT@',
+    ],
 )
 
 c_headers += version_h
@@ -42,63 +45,61 @@ c_headers += version_h
 headers = c_headers + cpp_headers
 install_headers(headers, subdir: 'arrow-cuda-glib')
 
-dependencies = [
-  arrow_cuda,
-  arrow_glib,
-]
-libarrow_cuda_glib = library('arrow-cuda-glib',
-                             sources: sources,
-                             install: true,
-                             dependencies: dependencies,
-                             implicit_include_directories: false,
-                             include_directories: base_include_directories,
-                             cpp_args: ['-DGARROW_CUDA_COMPILATION'],
-                             soversion: so_version,
-                             version: library_version)
-arrow_cuda_glib = declare_dependency(link_with: libarrow_cuda_glib,
-                                     include_directories: base_include_directories,
-                                     dependencies: dependencies)
+dependencies = [arrow_cuda, arrow_glib]
+libarrow_cuda_glib = library(
+    'arrow-cuda-glib',
+    sources: sources,
+    install: true,
+    dependencies: dependencies,
+    implicit_include_directories: false,
+    include_directories: base_include_directories,
+    cpp_args: ['-DGARROW_CUDA_COMPILATION'],
+    soversion: so_version,
+    version: library_version,
+)
+arrow_cuda_glib = declare_dependency(
+    link_with: libarrow_cuda_glib,
+    include_directories: base_include_directories,
+    dependencies: dependencies,
+)
 if target_machine.system() != 'windows'
-  pkgconfig.generate(libarrow_cuda_glib,
-                     description: 'C API for Apache Arrow CUDA based on GLib',
-                     filebase: 'arrow-cuda-glib',
-                     name: 'Apache Arrow CUDA GLib',
-                     requires: ['arrow-glib', 'arrow-cuda'],
-                     variables: pkgconfig_variables,
-                     version: version)
+    pkgconfig.generate(
+        libarrow_cuda_glib,
+        description: 'C API for Apache Arrow CUDA based on GLib',
+        filebase: 'arrow-cuda-glib',
+        name: 'Apache Arrow CUDA GLib',
+        requires: ['arrow-glib', 'arrow-cuda'],
+        variables: pkgconfig_variables,
+        version: version,
+    )
 endif
 
 if have_gi
-  gir_dependencies = [
-    declare_dependency(sources: arrow_glib_gir),
-  ]
-  gir_extra_args = [
-    '--warn-all',
-    '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-  ]
-  arrow_cuda_glib_gir = \
-    gnome.generate_gir(libarrow_cuda_glib,
-                       dependencies: gir_dependencies,
-                       export_packages: 'arrow-cuda-glib',
-                       extra_args: gir_extra_args,
-                       header: 'arrow-cuda-glib/arrow-cuda-glib.h',
-                       identifier_prefix: 'GArrowCUDA',
-                       includes: [
-                         'Arrow-1.0',
-                       ],
-                       kwargs: generate_gi_common_args,
-                       namespace: 'ArrowCUDA',
-                       sources: sources + c_headers,
-                       symbol_prefix: 'garrow_cuda')
+    gir_dependencies = [declare_dependency(sources: arrow_glib_gir)]
+    gir_extra_args = [
+        '--warn-all',
+        '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+    ]
+    arrow_cuda_glib_gir = gnome.generate_gir(
+        libarrow_cuda_glib,
+        dependencies: gir_dependencies,
+        export_packages: 'arrow-cuda-glib',
+        extra_args: gir_extra_args,
+        header: 'arrow-cuda-glib/arrow-cuda-glib.h',
+        identifier_prefix: 'GArrowCUDA',
+        includes: ['Arrow-1.0'],
+        kwargs: generate_gi_common_args,
+        namespace: 'ArrowCUDA',
+        sources: sources + c_headers,
+        symbol_prefix: 'garrow_cuda',
+    )
 
-  if generate_vapi
-    arrow_cuda_glib_vapi = \
-      gnome.generate_vapi('arrow-cuda-glib',
-                          install: true,
-                          packages: [
-                            arrow_glib_vapi,
-                            'gio-2.0',
-                          ],
-                          sources: [arrow_cuda_glib_gir[0]])
-  endif
+    if generate_vapi
+        arrow_cuda_glib_vapi = gnome.generate_vapi(
+            'arrow-cuda-glib',
+            install: true,
+            packages: [arrow_glib_vapi, 'gio-2.0'],
+            sources: [arrow_cuda_glib_gir[0]],
+        )
+    endif
 endif
diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build
index 3425efc5555c8..5cb61fc462ca4 100644
--- a/c_glib/arrow-dataset-glib/meson.build
+++ b/c_glib/arrow-dataset-glib/meson.build
@@ -20,110 +20,124 @@
 project_name = 'arrow-dataset-glib'
 
 sources = files(
-  'dataset-factory.cpp',
-  'dataset.cpp',
-  'file-format.cpp',
-  'fragment.cpp',
-  'partitioning.cpp',
-  'scanner.cpp',
+    'dataset-factory.cpp',
+    'dataset.cpp',
+    'file-format.cpp',
+    'fragment.cpp',
+    'partitioning.cpp',
+    'scanner.cpp',
 )
 
 c_headers = files(
-  'arrow-dataset-glib.h',
-  'dataset-definition.h',
-  'dataset-factory.h',
-  'dataset.h',
-  'file-format.h',
-  'fragment.h',
-  'partitioning.h',
-  'scanner.h',
+    'arrow-dataset-glib.h',
+    'dataset-definition.h',
+    'dataset-factory.h',
+    'dataset.h',
+    'file-format.h',
+    'fragment.h',
+    'partitioning.h',
+    'scanner.h',
 )
 
 cpp_headers = files(
-  'arrow-dataset-glib.hpp',
-  'dataset-factory.hpp',
-  'dataset.hpp',
-  'file-format.hpp',
-  'fragment.hpp',
-  'partitioning.hpp',
-  'scanner.hpp',
+    'arrow-dataset-glib.hpp',
+    'dataset-factory.hpp',
+    'dataset.hpp',
+    'file-format.hpp',
+    'fragment.hpp',
+    'partitioning.hpp',
+    'scanner.hpp',
 )
 
 version_h = configure_file(
-  input: 'version.h.in',
-  output: 'version.h',
-  command: [python3, generate_version_header_py, '--library', 'GADATASET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'],
+    input: 'version.h.in',
+    output: 'version.h',
+    command: [
+        python3,
+        generate_version_header_py,
+        '--library',
+        'GADATASET',
+        '--version',
+        version,
+        '--input',
+        '@INPUT@',
+        '--output',
+        '@OUTPUT@',
+    ],
 )
 
 c_headers += version_h
 
-enums = gnome.mkenums('enums',
-                      sources: c_headers,
-                      identifier_prefix: 'GADataset',
-                      symbol_prefix: 'gadataset',
-                      c_template: 'enums.c.template',
-                      h_template: 'enums.h.template',
-                      install_dir: join_paths(include_dir, project_name),
-                      install_header: true)
+enums = gnome.mkenums(
+    'enums',
+    sources: c_headers,
+    identifier_prefix: 'GADataset',
+    symbol_prefix: 'gadataset',
+    c_template: 'enums.c.template',
+    h_template: 'enums.h.template',
+    install_dir: join_paths(include_dir, project_name),
+    install_header: true,
+)
 enums_source = enums[0]
 enums_header = enums[1]
 
 headers = c_headers + cpp_headers
 install_headers(headers, subdir: project_name)
 
-dependencies = [
-  arrow_dataset,
-  arrow_glib,
-]
-libarrow_dataset_glib = library('arrow-dataset-glib',
-                                sources: sources + enums,
-                                install: true,
-                                dependencies: dependencies,
-                                implicit_include_directories: false,
-                                include_directories: base_include_directories,
-                                cpp_args: ['-DGADATASET_COMPILATION'],
-                                c_args: ['-DGADATASET_COMPILATION'],
-                                soversion: so_version,
-                                version: library_version)
-arrow_dataset_glib = declare_dependency(link_with: libarrow_dataset_glib,
-                                        include_directories: base_include_directories,
-                                        dependencies: dependencies,
-                                        sources: enums_header)
+dependencies = [arrow_dataset, arrow_glib]
+libarrow_dataset_glib = library(
+    'arrow-dataset-glib',
+    sources: sources + enums,
+    install: true,
+    dependencies: dependencies,
+    implicit_include_directories: false,
+    include_directories: base_include_directories,
+    cpp_args: ['-DGADATASET_COMPILATION'],
+    c_args: ['-DGADATASET_COMPILATION'],
+    soversion: so_version,
+    version: library_version,
+)
+arrow_dataset_glib = declare_dependency(
+    link_with: libarrow_dataset_glib,
+    include_directories: base_include_directories,
+    dependencies: dependencies,
+    sources: enums_header,
+)
 
-pkgconfig.generate(libarrow_dataset_glib,
-                   description: 'C API for Apache Arrow Dataset based on GLib',
-                   filebase: 'arrow-dataset-glib',
-                   name: 'Apache Arrow Dataset GLib',
-                   requires: ['arrow-glib', 'arrow-dataset'],
-                   variables: pkgconfig_variables,
-                   version: version)
+pkgconfig.generate(
+    libarrow_dataset_glib,
+    description: 'C API for Apache Arrow Dataset based on GLib',
+    filebase: 'arrow-dataset-glib',
+    name: 'Apache Arrow Dataset GLib',
+    requires: ['arrow-glib', 'arrow-dataset'],
+    variables: pkgconfig_variables,
+    version: version,
+)
 
 if have_gi
-  arrow_dataset_glib_gir = \
-    gnome.generate_gir(libarrow_dataset_glib,
-                       dependencies: declare_dependency(sources: arrow_glib_gir),
-                       export_packages: 'arrow-dataset-glib',
-                       extra_args: [
-                         '--warn-all',
-                         '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-                       ],
-                       header: 'arrow-dataset-glib/arrow-dataset-glib.h',
-                       identifier_prefix: 'GADataset',
-                       includes: [
-                         'Arrow-1.0',
-                       ],
-                       kwargs: generate_gi_common_args,
-                       namespace: 'ArrowDataset',
-                       sources: sources + c_headers + enums,
-                       symbol_prefix: 'gadataset')
+    arrow_dataset_glib_gir = gnome.generate_gir(
+        libarrow_dataset_glib,
+        dependencies: declare_dependency(sources: arrow_glib_gir),
+        export_packages: 'arrow-dataset-glib',
+        extra_args: [
+            '--warn-all',
+            '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+        ],
+        header: 'arrow-dataset-glib/arrow-dataset-glib.h',
+        identifier_prefix: 'GADataset',
+        includes: ['Arrow-1.0'],
+        kwargs: generate_gi_common_args,
+        namespace: 'ArrowDataset',
+        sources: sources + c_headers + enums,
+        symbol_prefix: 'gadataset',
+    )
 
-  if generate_vapi
-    gnome.generate_vapi('arrow-dataset-glib',
-                        install: true,
-                        packages: [
-                          arrow_glib_vapi,
-                          'gio-2.0',
-                        ],
-                        sources: [arrow_dataset_glib_gir[0]])
-  endif
+    if generate_vapi
+        gnome.generate_vapi(
+            'arrow-dataset-glib',
+            install: true,
+            packages: [arrow_glib_vapi, 'gio-2.0'],
+            sources: [arrow_dataset_glib_gir[0]],
+        )
+    endif
 endif
diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build
index c1422e0d10a7d..87fc473431240 100644
--- a/c_glib/arrow-flight-glib/meson.build
+++ b/c_glib/arrow-flight-glib/meson.build
@@ -17,30 +17,32 @@
 # specific language governing permissions and limitations
 # under the License.
 
-sources = files(
-  'client.cpp',
-  'common.cpp',
-  'server.cpp',
-)
+sources = files('client.cpp', 'common.cpp', 'server.cpp')
 
-c_headers = files(
-  'arrow-flight-glib.h',
-  'client.h',
-  'common.h',
-  'server.h',
-)
+c_headers = files('arrow-flight-glib.h', 'client.h', 'common.h', 'server.h')
 
 cpp_headers = files(
-  'arrow-flight-glib.hpp',
-  'client.hpp',
-  'common.hpp',
-  'server.hpp',
+    'arrow-flight-glib.hpp',
+    'client.hpp',
+    'common.hpp',
+    'server.hpp',
 )
 
 version_h = configure_file(
-  input: 'version.h.in',
-  output: 'version.h',
-  command: [python3, generate_version_header_py, '--library', 'GAFLIGHT', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'],
+    input: 'version.h.in',
+    output: 'version.h',
+    command: [
+        python3,
+        generate_version_header_py,
+        '--library',
+        'GAFLIGHT',
+        '--version',
+        version,
+        '--input',
+        '@INPUT@',
+        '--output',
+        '@OUTPUT@',
+    ],
 )
 
 c_headers += version_h
@@ -48,58 +50,58 @@ c_headers += version_h
 headers = c_headers + cpp_headers
 install_headers(headers, subdir: 'arrow-flight-glib')
 
-dependencies = [
-  arrow_flight,
-  arrow_glib,
-]
-libarrow_flight_glib = library('arrow-flight-glib',
-                               sources: sources,
-                               install: true,
-                               dependencies: dependencies,
-                               implicit_include_directories: false,
-                               include_directories: base_include_directories,
-                               cpp_args: ['-DGAFLIGHT_COMPILATION'],
-                               soversion: so_version,
-                               version: library_version)
-arrow_flight_glib = declare_dependency(link_with: libarrow_flight_glib,
-                                       include_directories: base_include_directories,
-                                       dependencies: dependencies)
+dependencies = [arrow_flight, arrow_glib]
+libarrow_flight_glib = library(
+    'arrow-flight-glib',
+    sources: sources,
+    install: true,
+    dependencies: dependencies,
+    implicit_include_directories: false,
+    include_directories: base_include_directories,
+    cpp_args: ['-DGAFLIGHT_COMPILATION'],
+    soversion: so_version,
+    version: library_version,
+)
+arrow_flight_glib = declare_dependency(
+    link_with: libarrow_flight_glib,
+    include_directories: base_include_directories,
+    dependencies: dependencies,
+)
 
-pkgconfig.generate(libarrow_flight_glib,
-                   description: 'C API for Apache Arrow Flight based on GLib',
-                   filebase: 'arrow-flight-glib',
-                   name: 'Apache Arrow Flight GLib',
-                   requires: ['arrow-glib', 'arrow-flight'],
-                   variables: pkgconfig_variables,
-                   version: version)
+pkgconfig.generate(
+    libarrow_flight_glib,
+    description: 'C API for Apache Arrow Flight based on GLib',
+    filebase: 'arrow-flight-glib',
+    name: 'Apache Arrow Flight GLib',
+    requires: ['arrow-glib', 'arrow-flight'],
+    variables: pkgconfig_variables,
+    version: version,
+)
 
 if have_gi
-  arrow_flight_glib_gir = \
-    gnome.generate_gir(libarrow_flight_glib,
-                       dependencies: declare_dependency(sources: arrow_glib_gir),
-                       export_packages: 'arrow-flight-glib',
-                       extra_args: [
-                         '--warn-all',
-                         '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-                       ],
-                       header: 'arrow-flight-glib/arrow-flight-glib.h',
-                       identifier_prefix: 'GAFlight',
-                       includes: [
-                         'Arrow-1.0',
-                       ],
-                       kwargs: generate_gi_common_args,
-                       namespace: 'ArrowFlight',
-                       sources: sources + c_headers,
-                       symbol_prefix: 'gaflight')
+    arrow_flight_glib_gir = gnome.generate_gir(
+        libarrow_flight_glib,
+        dependencies: declare_dependency(sources: arrow_glib_gir),
+        export_packages: 'arrow-flight-glib',
+        extra_args: [
+            '--warn-all',
+            '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+        ],
+        header: 'arrow-flight-glib/arrow-flight-glib.h',
+        identifier_prefix: 'GAFlight',
+        includes: ['Arrow-1.0'],
+        kwargs: generate_gi_common_args,
+        namespace: 'ArrowFlight',
+        sources: sources + c_headers,
+        symbol_prefix: 'gaflight',
+    )
 
-  if generate_vapi
-    arrow_flight_glib_vapi = \
-      gnome.generate_vapi('arrow-flight-glib',
-                          install: true,
-                          packages: [
-                            arrow_glib_vapi,
-                            'gio-2.0',
-                          ],
-                          sources: [arrow_flight_glib_gir[0]])
-  endif
+    if generate_vapi
+        arrow_flight_glib_vapi = gnome.generate_vapi(
+            'arrow-flight-glib',
+            install: true,
+            packages: [arrow_glib_vapi, 'gio-2.0'],
+            sources: [arrow_flight_glib_gir[0]],
+        )
+    endif
 endif
diff --git a/c_glib/arrow-flight-sql-glib/meson.build b/c_glib/arrow-flight-sql-glib/meson.build
index d588ba4917c76..aa6798e763c25 100644
--- a/c_glib/arrow-flight-sql-glib/meson.build
+++ b/c_glib/arrow-flight-sql-glib/meson.build
@@ -17,27 +17,27 @@
 # specific language governing permissions and limitations
 # under the License.
 
-sources = files(
-  'client.cpp',
-  'server.cpp',
-)
+sources = files('client.cpp', 'server.cpp')
 
-c_headers = files(
-  'arrow-flight-sql-glib.h',
-  'client.h',
-  'server.h',
-)
+c_headers = files('arrow-flight-sql-glib.h', 'client.h', 'server.h')
 
-cpp_headers = files(
-  'arrow-flight-sql-glib.hpp',
-  'client.hpp',
-  'server.hpp',
-)
+cpp_headers = files('arrow-flight-sql-glib.hpp', 'client.hpp', 'server.hpp')
 
 version_h = configure_file(
-  input: 'version.h.in',
-  output: 'version.h',
-  command: [python3, generate_version_header_py, '--library', 'GAFLIGHTSQL', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'],
+    input: 'version.h.in',
+    output: 'version.h',
+    command: [
+        python3,
+        generate_version_header_py,
+        '--library',
+        'GAFLIGHTSQL',
+        '--version',
+        version,
+        '--input',
+        '@INPUT@',
+        '--output',
+        '@OUTPUT@',
+    ],
 )
 
 c_headers += version_h
@@ -45,63 +45,62 @@ c_headers += version_h
 headers = c_headers + cpp_headers
 install_headers(headers, subdir: 'arrow-flight-sql-glib')
 
-dependencies = [
-  arrow_flight_sql,
-  arrow_flight_glib,
-]
-libarrow_flight_sql_glib = library('arrow-flight-sql-glib',
-                                   sources: sources,
-                                   install: true,
-                                   dependencies: dependencies,
-                                   implicit_include_directories: false,
-                                   include_directories: base_include_directories,
-                                   cpp_args: ['-DGAFLIGHTSQL_COMPILATION'],
-                                   soversion: so_version,
-                                   version: library_version)
-arrow_flight_sql_glib = \
-  declare_dependency(link_with: libarrow_flight_sql_glib,
-                     include_directories: base_include_directories,
-                     dependencies: dependencies)
+dependencies = [arrow_flight_sql, arrow_flight_glib]
+libarrow_flight_sql_glib = library(
+    'arrow-flight-sql-glib',
+    sources: sources,
+    install: true,
+    dependencies: dependencies,
+    implicit_include_directories: false,
+    include_directories: base_include_directories,
+    cpp_args: ['-DGAFLIGHTSQL_COMPILATION'],
+    soversion: so_version,
+    version: library_version,
+)
+arrow_flight_sql_glib = declare_dependency(
+    link_with: libarrow_flight_sql_glib,
+    include_directories: base_include_directories,
+    dependencies: dependencies,
+)
 
-pkgconfig.generate(libarrow_flight_sql_glib,
-                   description: 'C API for Apache Arrow Flight SQL based on GLib',
-                   filebase: 'arrow-flight-sql-glib',
-                   name: 'Apache Arrow Flight SQL GLib',
-                   requires: ['arrow-flight-glib', 'arrow-flight-sql'],
-                   variables: pkgconfig_variables,
-                   version: version)
+pkgconfig.generate(
+    libarrow_flight_sql_glib,
+    description: 'C API for Apache Arrow Flight SQL based on GLib',
+    filebase: 'arrow-flight-sql-glib',
+    name: 'Apache Arrow Flight SQL GLib',
+    requires: ['arrow-flight-glib', 'arrow-flight-sql'],
+    variables: pkgconfig_variables,
+    version: version,
+)
 
 if have_gi
-  arrow_flight_sql_glib_gir_dependencies = \
-    declare_dependency(sources: [arrow_glib_gir, arrow_flight_glib_gir])
-  arrow_flight_sql_glib_gir = \
-    gnome.generate_gir(libarrow_flight_sql_glib,
-                       dependencies: arrow_flight_sql_glib_gir_dependencies,
-                       export_packages: 'arrow-flight-sql-glib',
-                       extra_args: [
-                         '--warn-all',
-                         '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-                         '--include-uninstalled=./arrow-flight-glib/ArrowFlight-1.0.gir',
-                       ],
-                       header: 'arrow-flight-sql-glib/arrow-flight-sql-glib.h',
-                       identifier_prefix: 'GAFlightSQL',
-                       includes: [
-                         'Arrow-1.0',
-                         'ArrowFlight-1.0',
-                       ],
-                       kwargs: generate_gi_common_args,
-                       namespace: 'ArrowFlightSQL',
-                       sources: sources + c_headers,
-                       symbol_prefix: 'gaflightsql')
+    arrow_flight_sql_glib_gir_dependencies = declare_dependency(
+        sources: [arrow_glib_gir, arrow_flight_glib_gir],
+    )
+    arrow_flight_sql_glib_gir = gnome.generate_gir(
+        libarrow_flight_sql_glib,
+        dependencies: arrow_flight_sql_glib_gir_dependencies,
+        export_packages: 'arrow-flight-sql-glib',
+        extra_args: [
+            '--warn-all',
+            '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+            '--include-uninstalled=./arrow-flight-glib/ArrowFlight-1.0.gir',
+        ],
+        header: 'arrow-flight-sql-glib/arrow-flight-sql-glib.h',
+        identifier_prefix: 'GAFlightSQL',
+        includes: ['Arrow-1.0', 'ArrowFlight-1.0'],
+        kwargs: generate_gi_common_args,
+        namespace: 'ArrowFlightSQL',
+        sources: sources + c_headers,
+        symbol_prefix: 'gaflightsql',
+    )
 
-  if generate_vapi
-    gnome.generate_vapi('arrow-flight-sql-glib',
-                        install: true,
-                        packages: [
-                          arrow_flight_glib_vapi,
-                          arrow_glib_vapi,
-                          'gio-2.0',
-                        ],
-                        sources: [arrow_flight_sql_glib_gir[0]])
-  endif
+    if generate_vapi
+        gnome.generate_vapi(
+            'arrow-flight-sql-glib',
+            install: true,
+            packages: [arrow_flight_glib_vapi, arrow_glib_vapi, 'gio-2.0'],
+            sources: [arrow_flight_sql_glib_gir[0]],
+        )
+    endif
 endif
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index 9e9753c4e007d..19437b01db96b 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -37,6 +37,8 @@ G_BEGIN_DECLS
  * @title: Basic array classes
  * @include: arrow-glib/arrow-glib.h
  *
+ * #GArrowArrayStatistics is a class for statistics of an array.
+ *
  * #GArrowArray is a base class for all array classes such as
  * #GArrowBooleanArray.
  *
@@ -125,6 +127,11 @@ G_BEGIN_DECLS
  * string data. If you don't have Arrow format data, you need to
  * use #GArrowLargeStringArrayBuilder to create a new array.
  *
+ * #GArrayBinaryViewArray is a class for variable-size binary view array.
+ * It can store zero or more binary view data. If you don't have Arrow
+ * format data, you need to use #GArrowBinaryViewArrayBuilder to create
+ * a new array.
+ *
  * #GArrowFixedSizeBinaryArray is a class for fixed size binary array.
  * It can store zero or more fixed size binary data. If you don't have
  * Arrow format data, you need to use
@@ -364,6 +371,106 @@ garrow_equal_options_is_approx(GArrowEqualOptions *options)
   return priv->approx;
 }
 
+struct GArrowArrayStatisticsPrivate
+{
+  arrow::ArrayStatistics statistics;
+};
+
+enum {
+  PROP_STATISTICS = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowArrayStatistics, garrow_array_statistics, G_TYPE_OBJECT)
+
+#define GARROW_ARRAY_STATISTICS_GET_PRIVATE(object)                                      \
+  static_cast<GArrowArrayStatisticsPrivate *>(                                           \
+    garrow_array_statistics_get_instance_private(GARROW_ARRAY_STATISTICS(object)))
+
+static void
+garrow_array_statistics_finalize(GObject *object)
+{
+  auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(object);
+  priv->statistics.~ArrayStatistics();
+  G_OBJECT_CLASS(garrow_array_statistics_parent_class)->finalize(object);
+}
+
+static void
+garrow_array_statistics_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_STATISTICS:
+    priv->statistics = *static_cast<arrow::ArrayStatistics *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_array_statistics_init(GArrowArrayStatistics *object)
+{
+  auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(object);
+  new (&priv->statistics) arrow::ArrayStatistics;
+}
+
+static void
+garrow_array_statistics_class_init(GArrowArrayStatisticsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize = garrow_array_statistics_finalize;
+  gobject_class->set_property = garrow_array_statistics_set_property;
+
+  auto spec = g_param_spec_pointer(
+    "statistics",
+    "Statistics",
+    "The raw arrow::ArrayStatistics *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_STATISTICS, spec);
+}
+
+/**
+ * garrow_array_statistics_has_null_count:
+ * @statistics: A #GArrowArrayStatistics.
+ *
+ * Returns: %TRUE if @statistics has a valid null count value,
+ *   %FALSE otherwise.
+ *
+ * Since: 20.0.0
+ */
+gboolean
+garrow_array_statistics_has_null_count(GArrowArrayStatistics *statistics)
+{
+  auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(statistics);
+  return priv->statistics.null_count.has_value();
+}
+
+/**
+ * garrow_array_statistics_get_null_count:
+ * @statistics: A #GArrowArrayStatistics.
+ *
+ * Returns: 0 or larger value if @statistics has a valid null count value,
+ *   -1 otherwise.
+ *
+ * Since: 20.0.0
+ */
+gint64
+garrow_array_statistics_get_null_count(GArrowArrayStatistics *statistics)
+{
+  auto priv = GARROW_ARRAY_STATISTICS_GET_PRIVATE(statistics);
+  const auto &null_count = priv->statistics.null_count;
+  if (null_count) {
+    return null_count.value();
+  } else {
+    return -1;
+  }
+}
+
 typedef struct GArrowArrayPrivate_
 {
   std::shared_ptr<arrow::Array> array;
@@ -1046,7 +1153,28 @@ gboolean
 garrow_array_validate_full(GArrowArray *array, GError **error)
 {
   const auto arrow_array = garrow_array_get_raw(array);
-  return garrow::check(error, arrow_array->ValidateFull(), "[array][validate_full]");
+  return garrow::check(error, arrow_array->ValidateFull(), "[array][validate-full]");
+}
+
+/**
+ * garrow_array_get_statistics:
+ * @array: A #GArrowArray.
+ *
+ * Returns: (transfer full): The associated #GArrowArrayStatistics of @array,
+ *   %NULL if @array doesn't have any associated statistics.
+ *
+ * Since: 20.0.0
+ */
+GArrowArrayStatistics *
+garrow_array_get_statistics(GArrowArray *array)
+{
+  const auto arrow_array = garrow_array_get_raw(array);
+  const auto &statistics = arrow_array->statistics();
+  if (statistics) {
+    return garrow_array_statistics_new_raw(statistics.get());
+  } else {
+    return nullptr;
+  }
 }
 
 G_DEFINE_TYPE(GArrowNullArray, garrow_null_array, GARROW_TYPE_ARRAY)
@@ -2407,6 +2535,73 @@ garrow_large_string_array_get_string(GArrowLargeStringArray *array, gint64 i)
                                                                      i);
 }
 
+G_DEFINE_TYPE(GArrowBinaryViewArray, garrow_binary_view_array, GARROW_TYPE_ARRAY)
+static void
+garrow_binary_view_array_init(GArrowBinaryViewArray *object)
+{
+}
+
+static void
+garrow_binary_view_array_class_init(GArrowBinaryViewArrayClass *klass)
+{
+}
+
+/**
+ * garrow_binary_view_array_new:
+ * @length: The number of elements.
+ * @views: The view buffer.
+ * @data_buffers: (element-type GArrowBuffer): The data buffers.
+ * @null_bitmap: (nullable): The bitmap that shows null elements. The
+ *   N-th element is null when the N-th bit is 0, not null otherwise.
+ *   If the array has no null elements, the bitmap must be %NULL and
+ *   @n_nulls is 0.
+ * @n_nulls: The number of null elements. If -1 is specified, the
+ *   number of nulls are computed from @null_bitmap.
+ * @offset: The position of the first element.
+ *
+ * Returns: A newly created #GArrowBinaryViewArray.
+ *
+ * Since: 20.0.0
+ */
+GArrowBinaryViewArray *
+garrow_binary_view_array_new(gint64 length,
+                             GArrowBuffer *views,
+                             GList *data_buffers,
+                             GArrowBuffer *null_bitmap,
+                             gint64 n_nulls,
+                             gint64 offset)
+{
+  std::vector<std::shared_ptr<arrow::Buffer>> arrow_data_buffers;
+  for (GList *node = data_buffers; node; node = g_list_next(node)) {
+    arrow_data_buffers.push_back(garrow_buffer_get_raw(GARROW_BUFFER(node->data)));
+  }
+  auto binary_view_array =
+    std::make_shared<arrow::BinaryViewArray>(arrow::binary_view(),
+                                             length,
+                                             garrow_buffer_get_raw(views),
+                                             std::move(arrow_data_buffers),
+                                             garrow_buffer_get_raw(null_bitmap),
+                                             n_nulls,
+                                             offset);
+  return GARROW_BINARY_VIEW_ARRAY(
+    g_object_new(GARROW_TYPE_BINARY_VIEW_ARRAY, "array", &binary_view_array, nullptr));
+}
+
+/**
+ * garrow_binary_view_array_get_value:
+ * @array: A #GArrowBinaryViewArray.
+ * @i: The index of the target value.
+ *
+ * Returns: (transfer full): The @i-th value.
+ */
+GBytes *
+garrow_binary_view_array_get_value(GArrowBinaryViewArray *array, gint64 i)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  auto view = static_cast<arrow::BinaryViewArray *>(arrow_array.get())->GetView(i);
+  return g_bytes_new_static(view.data(), view.length());
+}
+
 G_DEFINE_TYPE(GArrowDate32Array, garrow_date32_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
@@ -3468,6 +3663,13 @@ garrow_equal_options_get_raw(GArrowEqualOptions *equal_options)
   return &(priv->options);
 }
 
+GArrowArrayStatistics *
+garrow_array_statistics_new_raw(arrow::ArrayStatistics *arrow_statistics)
+{
+  return GARROW_ARRAY_STATISTICS(
+    g_object_new(GARROW_TYPE_ARRAY_STATISTICS, "statistics", arrow_statistics, nullptr));
+}
+
 GArrowArray *
 garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
 {
@@ -3620,6 +3822,9 @@ garrow_array_new_raw_valist(std::shared_ptr<arrow::Array> *arrow_array,
   case arrow::Type::type::RUN_END_ENCODED:
     type = GARROW_TYPE_RUN_END_ENCODED_ARRAY;
     break;
+  case arrow::Type::type::BINARY_VIEW:
+    type = GARROW_TYPE_BINARY_VIEW_ARRAY;
+    break;
   default:
     type = GARROW_TYPE_ARRAY;
     break;
diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h
index bc597a8a93104..901af822353f6 100644
--- a/c_glib/arrow-glib/basic-array.h
+++ b/c_glib/arrow-glib/basic-array.h
@@ -42,6 +42,22 @@ GARROW_AVAILABLE_IN_5_0
 gboolean
 garrow_equal_options_is_approx(GArrowEqualOptions *options);
 
+#define GARROW_TYPE_ARRAY_STATISTICS (garrow_array_statistics_get_type())
+GARROW_AVAILABLE_IN_20_0
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowArrayStatistics, garrow_array_statistics, GARROW, ARRAY_STATISTICS, GObject)
+struct _GArrowArrayStatisticsClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_20_0
+gboolean
+garrow_array_statistics_has_null_count(GArrowArrayStatistics *statistics);
+GARROW_AVAILABLE_IN_20_0
+gint64
+garrow_array_statistics_get_null_count(GArrowArrayStatistics *statistics);
+
 GARROW_AVAILABLE_IN_6_0
 GArrowArray *
 garrow_array_import(gpointer c_abi_array, GArrowDataType *data_type, GError **error);
@@ -134,6 +150,10 @@ GARROW_AVAILABLE_IN_20_0
 gboolean
 garrow_array_validate_full(GArrowArray *array, GError **error);
 
+GARROW_AVAILABLE_IN_20_0
+GArrowArrayStatistics *
+garrow_array_get_statistics(GArrowArray *array);
+
 #define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type())
 GARROW_AVAILABLE_IN_ALL
 G_DECLARE_DERIVABLE_TYPE(
@@ -582,6 +602,28 @@ GARROW_AVAILABLE_IN_0_16
 gchar *
 garrow_large_string_array_get_string(GArrowLargeStringArray *array, gint64 i);
 
+#define GARROW_TYPE_BINARY_VIEW_ARRAY (garrow_binary_view_array_get_type())
+GARROW_AVAILABLE_IN_20_0
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowBinaryViewArray, garrow_binary_view_array, GARROW, BINARY_VIEW_ARRAY, GArrowArray)
+struct _GArrowBinaryViewArrayClass
+{
+  GArrowArrayClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_20_0
+GArrowBinaryViewArray *
+garrow_binary_view_array_new(gint64 length,
+                             GArrowBuffer *views,
+                             GList *data_buffers,
+                             GArrowBuffer *null_bitmap,
+                             gint64 n_nulls,
+                             gint64 offset);
+
+GARROW_AVAILABLE_IN_20_0
+GBytes *
+garrow_binary_view_array_get_value(GArrowBinaryViewArray *array, gint64 i);
+
 #define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type())
 GARROW_AVAILABLE_IN_ALL
 G_DECLARE_DERIVABLE_TYPE(
diff --git a/c_glib/arrow-glib/basic-array.hpp b/c_glib/arrow-glib/basic-array.hpp
index b2a7ed6ae075f..361d367773c2d 100644
--- a/c_glib/arrow-glib/basic-array.hpp
+++ b/c_glib/arrow-glib/basic-array.hpp
@@ -27,6 +27,10 @@ GARROW_EXTERN
 arrow::EqualOptions *
 garrow_equal_options_get_raw(GArrowEqualOptions *equal_options);
 
+GARROW_EXTERN
+GArrowArrayStatistics *
+garrow_array_statistics_new_raw(arrow::ArrayStatistics *arrow_statistics);
+
 GARROW_EXTERN
 GArrowArray *
 garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array);
diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index f5130e9344bec..c195af7de0313 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -2402,6 +2402,12 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
   case arrow::Type::type::RUN_END_ENCODED:
     type = GARROW_TYPE_RUN_END_ENCODED_DATA_TYPE;
     break;
+  case arrow::Type::type::STRING_VIEW:
+    type = GARROW_TYPE_STRING_VIEW_DATA_TYPE;
+    break;
+  case arrow::Type::type::BINARY_VIEW:
+    type = GARROW_TYPE_BINARY_VIEW_DATA_TYPE;
+    break;
   default:
     type = GARROW_TYPE_DATA_TYPE;
     break;
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index 854988e348986..a5e67463102d1 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -18,212 +18,195 @@
 # under the License.
 
 sources = files(
-  'array-builder.cpp',
-  'basic-array.cpp',
-  'basic-data-type.cpp',
-  'buffer.cpp',
-  'chunked-array.cpp',
-  'codec.cpp',
-  'composite-array.cpp',
-  'composite-data-type.cpp',
-  'datum.cpp',
-  'decimal.cpp',
-  'decoder.cpp',
-  'error.cpp',
-  'expression.cpp',
-  'field.cpp',
-  'interval.cpp',
-  'memory-pool.cpp',
-  'record-batch.cpp',
-  'scalar.cpp',
-  'schema.cpp',
-  'table-builder.cpp',
-  'table.cpp',
-  'tensor.cpp',
-  'timestamp-parser.cpp',
-  'type.cpp',
+    'array-builder.cpp',
+    'basic-array.cpp',
+    'basic-data-type.cpp',
+    'buffer.cpp',
+    'chunked-array.cpp',
+    'codec.cpp',
+    'composite-array.cpp',
+    'composite-data-type.cpp',
+    'datum.cpp',
+    'decimal.cpp',
+    'decoder.cpp',
+    'error.cpp',
+    'expression.cpp',
+    'field.cpp',
+    'interval.cpp',
+    'memory-pool.cpp',
+    'record-batch.cpp',
+    'scalar.cpp',
+    'schema.cpp',
+    'table-builder.cpp',
+    'table.cpp',
+    'tensor.cpp',
+    'timestamp-parser.cpp',
+    'type.cpp',
 )
 
 sources += files(
-  'file.cpp',
-  'file-mode.cpp',
-  'input-stream.cpp',
-  'output-stream.cpp',
-  'readable.cpp',
-  'writable.cpp',
-  'writable-file.cpp',
+    'file-mode.cpp',
+    'file.cpp',
+    'input-stream.cpp',
+    'output-stream.cpp',
+    'readable.cpp',
+    'writable-file.cpp',
+    'writable.cpp',
 )
 
 sources += files(
-  'ipc-options.cpp',
-  'metadata-version.cpp',
-  'reader.cpp',
-  'writer.cpp',
+    'ipc-options.cpp',
+    'metadata-version.cpp',
+    'reader.cpp',
+    'writer.cpp',
 )
 
-sources += files(
-  'compute.cpp',
-)
+sources += files('compute.cpp')
 
-sources += files(
-  'file-system.cpp',
-  'local-file-system.cpp',
-)
+sources += files('file-system.cpp', 'local-file-system.cpp')
 
 if have_arrow_orc
-  sources += files(
-    'orc-file-reader.cpp',
-  )
+    sources += files('orc-file-reader.cpp')
 endif
 
 c_headers = files(
-  'array.h',
-  'array-builder.h',
-  'arrow-glib.h',
-  'basic-array-definition.h',
-  'basic-array.h',
-  'basic-data-type.h',
-  'buffer.h',
-  'chunked-array-definition.h',
-  'chunked-array.h',
-  'codec.h',
-  'composite-array.h',
-  'composite-data-type.h',
-  'data-type.h',
-  'datum.h',
-  'decimal.h',
-  'decoder.h',
-  'error.h',
-  'expression.h',
-  'field.h',
-  'interval.h',
-  'memory-pool.h',
-  'record-batch.h',
-  'scalar.h',
-  'schema.h',
-  'table-builder.h',
-  'table.h',
-  'tensor.h',
-  'timestamp-parser.h',
-  'type.h',
+    'array-builder.h',
+    'array.h',
+    'arrow-glib.h',
+    'basic-array-definition.h',
+    'basic-array.h',
+    'basic-data-type.h',
+    'buffer.h',
+    'chunked-array-definition.h',
+    'chunked-array.h',
+    'codec.h',
+    'composite-array.h',
+    'composite-data-type.h',
+    'data-type.h',
+    'datum.h',
+    'decimal.h',
+    'decoder.h',
+    'error.h',
+    'expression.h',
+    'field.h',
+    'interval.h',
+    'memory-pool.h',
+    'record-batch.h',
+    'scalar.h',
+    'schema.h',
+    'table-builder.h',
+    'table.h',
+    'tensor.h',
+    'timestamp-parser.h',
+    'type.h',
 )
 
 
 c_headers += files(
-  'file.h',
-  'file-mode.h',
-  'input-stream.h',
-  'output-stream.h',
-  'readable.h',
-  'writable.h',
-  'writable-file.h',
+    'file-mode.h',
+    'file.h',
+    'input-stream.h',
+    'output-stream.h',
+    'readable.h',
+    'writable-file.h',
+    'writable.h',
 )
 
-c_headers += files(
-  'ipc-options.h',
-  'metadata-version.h',
-  'reader.h',
-  'writer.h',
-)
+c_headers += files('ipc-options.h', 'metadata-version.h', 'reader.h', 'writer.h')
 
-c_headers += files(
-  'compute-definition.h',
-  'compute.h',
-)
+c_headers += files('compute-definition.h', 'compute.h')
 
-c_headers += files(
-  'file-system.h',
-  'local-file-system.h',
-)
+c_headers += files('file-system.h', 'local-file-system.h')
 
 if have_arrow_orc
-  c_headers += files(
-    'orc-file-reader.h',
-  )
+    c_headers += files('orc-file-reader.h')
 endif
 
 
 cpp_headers = files(
-  'array.hpp',
-  'array-builder.hpp',
-  'arrow-glib.hpp',
-  'basic-array.hpp',
-  'basic-data-type.hpp',
-  'buffer.hpp',
-  'chunked-array.hpp',
-  'codec.hpp',
-  'data-type.hpp',
-  'datum.hpp',
-  'decimal.hpp',
-  'decoder.hpp',
-  'error.hpp',
-  'expression.hpp',
-  'field.hpp',
-  'interval.hpp',
-  'memory-pool.hpp',
-  'record-batch.hpp',
-  'scalar.hpp',
-  'schema.hpp',
-  'table-builder.hpp',
-  'table.hpp',
-  'tensor.hpp',
-  'timestamp-parser.hpp',
-  'type.hpp',
+    'array-builder.hpp',
+    'array.hpp',
+    'arrow-glib.hpp',
+    'basic-array.hpp',
+    'basic-data-type.hpp',
+    'buffer.hpp',
+    'chunked-array.hpp',
+    'codec.hpp',
+    'data-type.hpp',
+    'datum.hpp',
+    'decimal.hpp',
+    'decoder.hpp',
+    'error.hpp',
+    'expression.hpp',
+    'field.hpp',
+    'interval.hpp',
+    'memory-pool.hpp',
+    'record-batch.hpp',
+    'scalar.hpp',
+    'schema.hpp',
+    'table-builder.hpp',
+    'table.hpp',
+    'tensor.hpp',
+    'timestamp-parser.hpp',
+    'type.hpp',
 )
 
 cpp_headers += files(
-  'file.hpp',
-  'file-mode.hpp',
-  'input-stream.hpp',
-  'output-stream.hpp',
-  'readable.hpp',
-  'writable.hpp',
-  'writable-file.hpp',
+    'file-mode.hpp',
+    'file.hpp',
+    'input-stream.hpp',
+    'output-stream.hpp',
+    'readable.hpp',
+    'writable-file.hpp',
+    'writable.hpp',
 )
 
 cpp_headers += files(
-  'ipc-options.hpp',
-  'metadata-version.hpp',
-  'reader.hpp',
-  'writer.hpp',
+    'ipc-options.hpp',
+    'metadata-version.hpp',
+    'reader.hpp',
+    'writer.hpp',
 )
 
-cpp_headers += files(
-  'compute.hpp',
-)
+cpp_headers += files('compute.hpp')
 
-cpp_headers += files(
-  'file-system.hpp',
-  'local-file-system.hpp',
-)
+cpp_headers += files('file-system.hpp', 'local-file-system.hpp')
 
 if have_arrow_orc
-  cpp_headers += files(
-    'orc-file-reader.hpp',
-  )
+    cpp_headers += files('orc-file-reader.hpp')
 endif
 
-cpp_internal_headers = files(
-  'internal-hash-table.hpp',
-  'internal-index.hpp',
-)
+cpp_internal_headers = files('internal-hash-table.hpp', 'internal-index.hpp')
 
 version_h = configure_file(
-  input: 'version.h.in',
-  output: 'version.h',
-  command: [python3, generate_version_header_py, '--library', 'GARROW', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'],
+    input: 'version.h.in',
+    output: 'version.h',
+    command: [
+        python3,
+        generate_version_header_py,
+        '--library',
+        'GARROW',
+        '--version',
+        version,
+        '--input',
+        '@INPUT@',
+        '--output',
+        '@OUTPUT@',
+    ],
 )
 
 c_headers += version_h
 
-enums = gnome.mkenums('enums',
-                      sources: c_headers,
-                      identifier_prefix: 'GArrow',
-                      symbol_prefix: 'garrow',
-                      c_template: 'enums.c.template',
-                      h_template: 'enums.h.template',
-                      install_dir: join_paths(include_dir, 'arrow-glib'),
-                      install_header: true)
+enums = gnome.mkenums(
+    'enums',
+    sources: c_headers,
+    identifier_prefix: 'GArrow',
+    symbol_prefix: 'garrow',
+    c_template: 'enums.c.template',
+    h_template: 'enums.h.template',
+    install_dir: join_paths(include_dir, 'arrow-glib'),
+    install_header: true,
+)
 enums_source = enums[0]
 enums_header = enums[1]
 
@@ -238,65 +221,67 @@ gobject_libdir = gobject.get_variable(pkgconfig: 'libdir')
 # confuses clang++ (/usr/bin/c++).
 gio = cxx.find_library('gio-2.0', dirs: [gobject_libdir], required: false)
 if not gio.found()
-  gio = dependency('gio-2.0')
+    gio = dependency('gio-2.0')
 endif
-dependencies = [
-  arrow,
-  arrow_acero,
-  gobject,
-  gio,
-]
-libarrow_glib = library('arrow-glib',
-                        sources: sources + enums,
-                        install: true,
-                        dependencies: dependencies,
-                        implicit_include_directories: false,
-                        include_directories: base_include_directories,
-                        cpp_args: ['-DGARROW_COMPILATION'],
-                        c_args: ['-DGARROW_COMPILATION'],
-                        soversion: so_version,
-                        version: library_version)
-arrow_glib = declare_dependency(link_with: libarrow_glib,
-                                include_directories: base_include_directories,
-                                dependencies: dependencies,
-                                sources: enums_header)
+dependencies = [arrow, arrow_acero, gobject, gio]
+libarrow_glib = library(
+    'arrow-glib',
+    sources: sources + enums,
+    install: true,
+    dependencies: dependencies,
+    implicit_include_directories: false,
+    include_directories: base_include_directories,
+    cpp_args: ['-DGARROW_COMPILATION'],
+    c_args: ['-DGARROW_COMPILATION'],
+    soversion: so_version,
+    version: library_version,
+)
+arrow_glib = declare_dependency(
+    link_with: libarrow_glib,
+    include_directories: base_include_directories,
+    dependencies: dependencies,
+    sources: enums_header,
+)
 
-pkgconfig.generate(libarrow_glib,
-                   description: 'C API for Apache Arrow based on GLib',
-                   filebase: meson.project_name(),
-                   name: 'Apache Arrow GLib',
-                   requires: ['gobject-2.0', 'arrow'],
-                   variables: pkgconfig_variables,
-                   version: version)
+pkgconfig.generate(
+    libarrow_glib,
+    description: 'C API for Apache Arrow based on GLib',
+    filebase: meson.project_name(),
+    name: 'Apache Arrow GLib',
+    requires: ['gobject-2.0', 'arrow'],
+    variables: pkgconfig_variables,
+    version: version,
+)
 if have_arrow_orc
-  pkgconfig.generate(filebase: 'arrow-orc-glib',
-                     description: 'ORC modules for Apache Arrow GLib',
-                     name: 'Apache Arrow GLib ORC',
-                     requires: ['arrow-glib'],
-                     version: version)
+    pkgconfig.generate(
+        filebase: 'arrow-orc-glib',
+        description: 'ORC modules for Apache Arrow GLib',
+        name: 'Apache Arrow GLib ORC',
+        requires: ['arrow-glib'],
+        version: version,
+    )
 endif
 
 if have_gi
-  arrow_glib_gir = gnome.generate_gir(libarrow_glib,
-                                      export_packages: 'arrow-glib',
-                                      extra_args: [
-                                        '--warn-all',
-                                      ],
-                                      header: 'arrow-glib/arrow-glib.h',
-                                      identifier_prefix: 'GArrow',
-                                      includes: [
-                                        'GObject-2.0',
-                                        'Gio-2.0',
-                                      ],
-                                      namespace: 'Arrow',
-                                      sources: sources + c_headers + enums,
-                                      symbol_prefix: 'garrow',
-                                      kwargs: generate_gi_common_args)
+    arrow_glib_gir = gnome.generate_gir(
+        libarrow_glib,
+        export_packages: 'arrow-glib',
+        extra_args: ['--warn-all'],
+        header: 'arrow-glib/arrow-glib.h',
+        identifier_prefix: 'GArrow',
+        includes: ['GObject-2.0', 'Gio-2.0'],
+        namespace: 'Arrow',
+        sources: sources + c_headers + enums,
+        symbol_prefix: 'garrow',
+        kwargs: generate_gi_common_args,
+    )
 
-  if generate_vapi
-    arrow_glib_vapi = gnome.generate_vapi('arrow-glib',
-                                          install: true,
-                                          packages: ['gio-2.0'],
-                                          sources: [arrow_glib_gir[0]])
-  endif
+    if generate_vapi
+        arrow_glib_vapi = gnome.generate_vapi(
+            'arrow-glib',
+            install: true,
+            packages: ['gio-2.0'],
+            sources: [arrow_glib_gir[0]],
+        )
+    endif
 endif
diff --git a/c_glib/arrow-glib/record-batch.cpp b/c_glib/arrow-glib/record-batch.cpp
index 07e83c9f23ad0..2c94919d96609 100644
--- a/c_glib/arrow-glib/record-batch.cpp
+++ b/c_glib/arrow-glib/record-batch.cpp
@@ -516,6 +516,24 @@ garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error)
   return garrow::check(error, arrow_record_batch->Validate(), "[record-batch][validate]");
 }
 
+/**
+ * garrow_record_batch_validate_full
+ * @record_batch: A #GArrowRecordBatch
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 20.0.0
+ */
+gboolean
+garrow_record_batch_validate_full(GArrowRecordBatch *record_batch, GError **error)
+{
+  const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  return garrow::check(error,
+                       arrow_record_batch->ValidateFull(),
+                       "[record-batch][validate-full]");
+}
+
 typedef struct GArrowRecordBatchIteratorPrivate_
 {
   arrow::RecordBatchIterator iterator;
diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h
index 8d17a44be5883..5a51ad983bbee 100644
--- a/c_glib/arrow-glib/record-batch.h
+++ b/c_glib/arrow-glib/record-batch.h
@@ -113,6 +113,10 @@ GARROW_AVAILABLE_IN_20_0
 gboolean
 garrow_record_batch_validate(GArrowRecordBatch *record_batch, GError **error);
 
+GARROW_AVAILABLE_IN_20_0
+gboolean
+garrow_record_batch_validate_full(GArrowRecordBatch *record_batch, GError **error);
+
 #define GARROW_TYPE_RECORD_BATCH_ITERATOR (garrow_record_batch_iterator_get_type())
 GARROW_AVAILABLE_IN_0_17
 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator,
diff --git a/c_glib/arrow-glib/table.cpp b/c_glib/arrow-glib/table.cpp
index f8569366685a2..4595ae7593998 100644
--- a/c_glib/arrow-glib/table.cpp
+++ b/c_glib/arrow-glib/table.cpp
@@ -339,20 +339,10 @@ garrow_table_new_values(GArrowSchema *schema, GList *values, GError **error)
 
   if (!arrow_chunked_arrays.empty()) {
     auto arrow_table = arrow::Table::Make(arrow_schema, std::move(arrow_chunked_arrays));
-    auto status = arrow_table->Validate();
-    if (garrow_error_check(error, status, context)) {
-      return garrow_table_new_raw(&arrow_table);
-    } else {
-      return NULL;
-    }
+    return garrow_table_new_raw(&arrow_table);
   } else if (!arrow_arrays.empty()) {
     auto arrow_table = arrow::Table::Make(arrow_schema, std::move(arrow_arrays));
-    auto status = arrow_table->Validate();
-    if (garrow_error_check(error, status, context)) {
-      return garrow_table_new_raw(&arrow_table);
-    } else {
-      return NULL;
-    }
+    return garrow_table_new_raw(&arrow_table);
   } else {
     auto maybe_table =
       arrow::Table::FromRecordBatches(arrow_schema, std::move(arrow_record_batches));
@@ -390,12 +380,7 @@ garrow_table_new_chunked_arrays(GArrowSchema *schema,
   }
 
   auto arrow_table = arrow::Table::Make(arrow_schema, arrow_chunked_arrays);
-  auto status = arrow_table->Validate();
-  if (garrow_error_check(error, status, "[table][new][chunked-arrays]")) {
-    return garrow_table_new_raw(&arrow_table);
-  } else {
-    return NULL;
-  }
+  return garrow_table_new_raw(&arrow_table);
 }
 
 /**
@@ -422,12 +407,7 @@ garrow_table_new_arrays(GArrowSchema *schema,
   }
 
   auto arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays);
-  auto status = arrow_table->Validate();
-  if (garrow_error_check(error, status, "[table][new][arrays]")) {
-    return garrow_table_new_raw(&arrow_table);
-  } else {
-    return NULL;
-  }
+  return garrow_table_new_raw(&arrow_table);
 }
 
 /**
@@ -756,6 +736,42 @@ garrow_table_combine_chunks(GArrowTable *table, GError **error)
   }
 }
 
+/**
+ * garrow_table_validate
+ * @table: A #GArrowTable
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Validate the given table. This is a cheap validation.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 20.0.0
+ */
+gboolean
+garrow_table_validate(GArrowTable *table, GError **error)
+{
+  const auto arrow_table = garrow_table_get_raw(table);
+  return garrow::check(error, arrow_table->Validate(), "[table][validate]");
+}
+
+/**
+ * garrow_table_validate_full
+ * @table: A #GArrowTable
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Validate the given table. This is an extensive validation.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 20.0.0
+ */
+gboolean
+garrow_table_validate_full(GArrowTable *table, GError **error)
+{
+  const auto arrow_table = garrow_table_get_raw(table);
+  return garrow::check(error, arrow_table->ValidateFull(), "[table][validate-full]");
+}
+
 typedef struct GArrowFeatherWritePropertiesPrivate_
 {
   arrow::ipc::feather::WriteProperties properties;
diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h
index d790e413df5fc..a78ee47fc9c40 100644
--- a/c_glib/arrow-glib/table.h
+++ b/c_glib/arrow-glib/table.h
@@ -142,6 +142,14 @@ GARROW_AVAILABLE_IN_0_16
 GArrowTable *
 garrow_table_combine_chunks(GArrowTable *table, GError **error);
 
+GARROW_AVAILABLE_IN_20_0
+gboolean
+garrow_table_validate(GArrowTable *table, GError **error);
+
+GARROW_AVAILABLE_IN_20_0
+gboolean
+garrow_table_validate_full(GArrowTable *table, GError **error);
+
 #define GARROW_TYPE_FEATHER_WRITE_PROPERTIES (garrow_feather_write_properties_get_type())
 GARROW_AVAILABLE_IN_0_17
 G_DECLARE_DERIVABLE_TYPE(GArrowFeatherWriteProperties,
diff --git a/c_glib/doc/meson.build b/c_glib/doc/meson.build
index 8d0ac4229b2a6..2cdefd48195db 100644
--- a/c_glib/doc/meson.build
+++ b/c_glib/doc/meson.build
@@ -25,15 +25,17 @@ gi_docgen_toml_conf.set('SOURCE_REFERENCE', source_reference)
 # We can't use "version.replace('-SNAPSHOT', '.dev')" here because
 # Ubuntu 20.04's Meson is < 0.58.0.
 if version_tag == ''
-  gi_docgen_version_tag = ''
+    gi_docgen_version_tag = ''
 else
-  # GI-DocGen doesn't like MAJOR.MINOR.PATCH-SNAPSHOT format.
-  gi_docgen_version_tag = '.dev'
+    # GI-DocGen doesn't like MAJOR.MINOR.PATCH-SNAPSHOT format.
+    gi_docgen_version_tag = '.dev'
 endif
-gi_docgen_version = '@0@.@1@.@2@@3@'.format(version_major,
-                                            version_minor,
-                                            version_micro,
-                                            gi_docgen_version_tag)
+gi_docgen_version = '@0@.@1@.@2@@3@'.format(
+    version_major,
+    version_minor,
+    version_micro,
+    gi_docgen_version_tag,
+)
 gi_docgen_toml_conf.set('VERSION', gi_docgen_version)
 
 gir_top_build_dir = meson.current_build_dir() / '..'
@@ -41,53 +43,57 @@ arrow_glib_gir_dir = gir_top_build_dir / 'arrow-glib'
 arrow_flight_glib_gir_dir = gir_top_build_dir / 'arrow-flight-glib'
 entries = [['arrow-glib', arrow_glib_gir[0]]]
 if arrow_cuda.found()
-  entries += [['arrow-cuda-glib', arrow_cuda_glib_gir[0]]]
+    entries += [['arrow-cuda-glib', arrow_cuda_glib_gir[0]]]
 endif
 if arrow_dataset.found()
-  entries += [['arrow-dataset-glib', arrow_dataset_glib_gir[0]]]
+    entries += [['arrow-dataset-glib', arrow_dataset_glib_gir[0]]]
 endif
 if arrow_flight.found()
-  entries += [['arrow-flight-glib', arrow_flight_glib_gir[0]]]
+    entries += [['arrow-flight-glib', arrow_flight_glib_gir[0]]]
 endif
 if arrow_flight_sql.found()
-  entries += [['arrow-flight-sql-glib', arrow_flight_sql_glib_gir[0]]]
+    entries += [['arrow-flight-sql-glib', arrow_flight_sql_glib_gir[0]]]
 endif
 if gandiva.found()
-  entries += [['gandiva-glib', gandiva_glib_gir[0]]]
+    entries += [['gandiva-glib', gandiva_glib_gir[0]]]
 endif
 if parquet.found()
-  entries += [['parquet-glib', parquet_glib_gir[0]]]
+    entries += [['parquet-glib', parquet_glib_gir[0]]]
 endif
 foreach entry : entries
-  module_name = entry[0]
-  gir = entry[1]
-  gi_docgen_toml = configure_file(input: '@0@.toml.in'.format(module_name),
-                                  output: '@0@.toml'.format(module_name),
-                                  configuration: gi_docgen_toml_conf)
-  gir_dir = gir_top_build_dir / module_name
-  current_source_dir = meson.current_source_dir()
-  command = [
-    gi_docgen,
-    'generate',
-    '--add-include-path=@0@'.format(arrow_flight_glib_gir_dir),
-    '--add-include-path=@0@'.format(arrow_glib_gir_dir),
-    '--add-include-path=@0@'.format(gir_dir),
-    '--config=@INPUT0@',
-    '--content-dir=@0@'.format(current_source_dir),
-    '--no-namespace-dir',
-    '--output-dir=@OUTPUT@',
-    '--quiet',
-  ]
-  if get_option('werror')
-    command += ['--fatal-warnings']
-  endif
-  command += ['@INPUT1@']
-  custom_target('@0@-doc'.format(module_name),
-                input: [gi_docgen_toml, gir],
-                depend_files: ['urlmap.js'],
-                output: module_name,
-                command: command,
-                build_by_default: true,
-                install: true,
-                install_dir: doc_dir)
+    module_name = entry[0]
+    gir = entry[1]
+    gi_docgen_toml = configure_file(
+        input: '@0@.toml.in'.format(module_name),
+        output: '@0@.toml'.format(module_name),
+        configuration: gi_docgen_toml_conf,
+    )
+    gir_dir = gir_top_build_dir / module_name
+    current_source_dir = meson.current_source_dir()
+    command = [
+        gi_docgen,
+        'generate',
+        '--add-include-path=@0@'.format(arrow_flight_glib_gir_dir),
+        '--add-include-path=@0@'.format(arrow_glib_gir_dir),
+        '--add-include-path=@0@'.format(gir_dir),
+        '--config=@INPUT0@',
+        '--content-dir=@0@'.format(current_source_dir),
+        '--no-namespace-dir',
+        '--output-dir=@OUTPUT@',
+        '--quiet',
+    ]
+    if get_option('werror')
+        command += ['--fatal-warnings']
+    endif
+    command += ['@INPUT1@']
+    custom_target(
+        '@0@-doc'.format(module_name),
+        input: [gi_docgen_toml, gir],
+        depend_files: ['urlmap.js'],
+        output: module_name,
+        command: command,
+        build_by_default: true,
+        install: true,
+        install_dir: doc_dir,
+    )
 endforeach
diff --git a/c_glib/example/lua/meson.build b/c_glib/example/lua/meson.build
index 4836001287579..8994da1517305 100644
--- a/c_glib/example/lua/meson.build
+++ b/c_glib/example/lua/meson.build
@@ -17,12 +17,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-install_data('README.md',
-             'read-file.lua',
-             'read-stream.lua',
-             'write-file.lua',
-             'write-stream.lua',
-             install_dir: join_paths(data_dir,
-                                     meson.project_name(),
-                                     'example',
-                                     'lua'))
+install_data(
+    'README.md',
+    'read-file.lua',
+    'read-stream.lua',
+    'write-file.lua',
+    'write-stream.lua',
+    install_dir: join_paths(data_dir, meson.project_name(), 'example', 'lua'),
+)
diff --git a/c_glib/example/meson.build b/c_glib/example/meson.build
index e2d55d4788ab5..99b9b8ae22f11 100644
--- a/c_glib/example/meson.build
+++ b/c_glib/example/meson.build
@@ -17,33 +17,48 @@
 # specific language governing permissions and limitations
 # under the License.
 
-executable('build', 'build.c',
-           dependencies: [arrow_glib],
-           link_language: 'c')
-executable('extension-type', 'extension-type.c',
-           dependencies: [arrow_glib],
-           link_language: 'c')
-executable('read-file', 'read-file.c',
-           dependencies: [arrow_glib],
-           link_language: 'c')
-executable('read-stream', 'read-stream.c',
-           dependencies: [arrow_glib],
-           link_language: 'c')
-executable('receive-network', 'receive-network.c',
-           dependencies: [arrow_glib],
-           link_language: 'c')
-executable('send-network', 'send-network.c',
-           dependencies: [arrow_glib],
-           link_language: 'c')
+executable('build', 'build.c', dependencies: [arrow_glib], link_language: 'c')
+executable(
+    'extension-type',
+    'extension-type.c',
+    dependencies: [arrow_glib],
+    link_language: 'c',
+)
+executable(
+    'read-file',
+    'read-file.c',
+    dependencies: [arrow_glib],
+    link_language: 'c',
+)
+executable(
+    'read-stream',
+    'read-stream.c',
+    dependencies: [arrow_glib],
+    link_language: 'c',
+)
+executable(
+    'receive-network',
+    'receive-network.c',
+    dependencies: [arrow_glib],
+    link_language: 'c',
+)
+executable(
+    'send-network',
+    'send-network.c',
+    dependencies: [arrow_glib],
+    link_language: 'c',
+)
 
-install_data('README.md',
-             'build.c',
-             'extension-type.c',
-             'read-file.c',
-             'read-stream.c',
-             'receive-network.c',
-             'send-network.c',
-             install_dir: join_paths(data_dir, meson.project_name(), 'example'))
+install_data(
+    'README.md',
+    'build.c',
+    'extension-type.c',
+    'read-file.c',
+    'read-stream.c',
+    'receive-network.c',
+    'send-network.c',
+    install_dir: join_paths(data_dir, meson.project_name(), 'example'),
+)
 
 subdir('lua')
 subdir('vala')
diff --git a/c_glib/example/vala/meson.build b/c_glib/example/vala/meson.build
index b7eb86200ddd6..893b7a5198c17 100644
--- a/c_glib/example/vala/meson.build
+++ b/c_glib/example/vala/meson.build
@@ -18,41 +18,41 @@
 # under the License.
 
 if generate_vapi
-  c_flags = [
-    '-Wno-unused-but-set-variable',
-  ]
-  c_flags = meson.get_compiler('c').get_supported_arguments(c_flags)
-  vala_example_executable_kwargs = {
-    'c_args': [
-      '-I' + project_build_root,
-      '-I' + project_source_root,
-    ] + c_flags,
-    'dependencies': [
-      arrow_glib_vapi,
-      dependency('gio-2.0'),
-    ],
-    'vala_args': [
-      '--pkg', 'posix',
-    ],
-  }
-  executable('build', 'build.vala',
-             kwargs: vala_example_executable_kwargs)
-  executable('read-file', 'read-file.vala',
-             kwargs: vala_example_executable_kwargs)
-  executable('read-stream', 'read-stream.vala',
-             kwargs: vala_example_executable_kwargs)
-  executable('write-file', 'write-file.vala',
-             kwargs: vala_example_executable_kwargs)
-  executable('write-stream', 'write-stream.vala',
-             kwargs: vala_example_executable_kwargs)
+    c_flags = ['-Wno-unused-but-set-variable']
+    c_flags = meson.get_compiler('c').get_supported_arguments(c_flags)
+    vala_example_executable_kwargs = {
+        'c_args': ['-I' + project_build_root, '-I' + project_source_root] + c_flags,
+        'dependencies': [arrow_glib_vapi, dependency('gio-2.0')],
+        'vala_args': ['--pkg', 'posix'],
+    }
+    executable('build', 'build.vala', kwargs: vala_example_executable_kwargs)
+    executable(
+        'read-file',
+        'read-file.vala',
+        kwargs: vala_example_executable_kwargs,
+    )
+    executable(
+        'read-stream',
+        'read-stream.vala',
+        kwargs: vala_example_executable_kwargs,
+    )
+    executable(
+        'write-file',
+        'write-file.vala',
+        kwargs: vala_example_executable_kwargs,
+    )
+    executable(
+        'write-stream',
+        'write-stream.vala',
+        kwargs: vala_example_executable_kwargs,
+    )
 endif
 
-install_data('README.md',
-             'read-file.vala',
-             'read-stream.vala',
-             'write-file.vala',
-             'write-stream.vala',
-             install_dir: join_paths(data_dir,
-                                     meson.project_name(),
-                                     'example',
-                                     'vala'))
+install_data(
+    'README.md',
+    'read-file.vala',
+    'read-stream.vala',
+    'write-file.vala',
+    'write-stream.vala',
+    install_dir: join_paths(data_dir, meson.project_name(), 'example', 'vala'),
+)
diff --git a/c_glib/gandiva-glib/meson.build b/c_glib/gandiva-glib/meson.build
index 94b923388b7f2..267b01344f524 100644
--- a/c_glib/gandiva-glib/meson.build
+++ b/c_glib/gandiva-glib/meson.build
@@ -20,114 +20,128 @@
 project_name = 'gandiva-glib'
 
 sources = files(
-  'expression.cpp',
-  'filter.cpp',
-  'function-registry.cpp',
-  'function-signature.cpp',
-  'native-function.cpp',
-  'node.cpp',
-  'projector.cpp',
-  'selection-vector.cpp',
+    'expression.cpp',
+    'filter.cpp',
+    'function-registry.cpp',
+    'function-signature.cpp',
+    'native-function.cpp',
+    'node.cpp',
+    'projector.cpp',
+    'selection-vector.cpp',
 )
 
 c_headers = files(
-  'expression.h',
-  'filter.h',
-  'function-registry.h',
-  'function-signature.h',
-  'gandiva-glib.h',
-  'native-function.h',
-  'node.h',
-  'projector.h',
-  'selection-vector.h',
+    'expression.h',
+    'filter.h',
+    'function-registry.h',
+    'function-signature.h',
+    'gandiva-glib.h',
+    'native-function.h',
+    'node.h',
+    'projector.h',
+    'selection-vector.h',
 )
 
 cpp_headers = files(
-  'expression.hpp',
-  'filter.hpp',
-  'function-signature.hpp',
-  'gandiva-glib.hpp',
-  'native-function.hpp',
-  'node.hpp',
-  'projector.hpp',
-  'selection-vector.hpp',
+    'expression.hpp',
+    'filter.hpp',
+    'function-signature.hpp',
+    'gandiva-glib.hpp',
+    'native-function.hpp',
+    'node.hpp',
+    'projector.hpp',
+    'selection-vector.hpp',
 )
 
 version_h = configure_file(
-  input: 'version.h.in',
-  output: 'version.h',
-  command: [python3, generate_version_header_py, '--library', 'GGANDIVA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'],
+    input: 'version.h.in',
+    output: 'version.h',
+    command: [
+        python3,
+        generate_version_header_py,
+        '--library',
+        'GGANDIVA',
+        '--version',
+        version,
+        '--input',
+        '@INPUT@',
+        '--output',
+        '@OUTPUT@',
+    ],
 )
 
 c_headers += version_h
 
-enums = gnome.mkenums('enums',
-                      sources: c_headers,
-                      identifier_prefix: 'GGandiva',
-                      symbol_prefix: 'ggandiva',
-                      c_template: 'enums.c.template',
-                      h_template: 'enums.h.template',
-                      install_dir: join_paths(include_dir, 'gandiva-glib'),
-                      install_header: true)
+enums = gnome.mkenums(
+    'enums',
+    sources: c_headers,
+    identifier_prefix: 'GGandiva',
+    symbol_prefix: 'ggandiva',
+    c_template: 'enums.c.template',
+    h_template: 'enums.h.template',
+    install_dir: join_paths(include_dir, 'gandiva-glib'),
+    install_header: true,
+)
 enums_source = enums[0]
 enums_header = enums[1]
 
 headers = c_headers + cpp_headers
 install_headers(headers, subdir: project_name)
 
-dependencies = [
-  gandiva,
-  arrow_glib,
-]
-libgandiva_glib = library('gandiva-glib',
-                          sources: sources + enums,
-                          install: true,
-                          dependencies: dependencies,
-                          implicit_include_directories: false,
-                          include_directories: base_include_directories,
-                          cpp_args: ['-DGGANDIVA_COMPILATION'],
-                          c_args: ['-DGGANDIVA_COMPILATION'],
-                          soversion: so_version,
-                          version: library_version)
-gandiva_glib = declare_dependency(link_with: libgandiva_glib,
-                                  include_directories: base_include_directories,
-                                  dependencies: dependencies,
-                                  sources: enums_header)
+dependencies = [gandiva, arrow_glib]
+libgandiva_glib = library(
+    'gandiva-glib',
+    sources: sources + enums,
+    install: true,
+    dependencies: dependencies,
+    implicit_include_directories: false,
+    include_directories: base_include_directories,
+    cpp_args: ['-DGGANDIVA_COMPILATION'],
+    c_args: ['-DGGANDIVA_COMPILATION'],
+    soversion: so_version,
+    version: library_version,
+)
+gandiva_glib = declare_dependency(
+    link_with: libgandiva_glib,
+    include_directories: base_include_directories,
+    dependencies: dependencies,
+    sources: enums_header,
+)
 
-pkgconfig.generate(libgandiva_glib,
-                   description: 'C API for Apache Arrow Gandiva based on GLib',
-                   filebase: project_name,
-                   name: 'Apache Arrow Gandiva GLib',
-                   requires: ['gandiva', 'arrow-glib'],
-                   variables: pkgconfig_variables,
-                   version: version)
+pkgconfig.generate(
+    libgandiva_glib,
+    description: 'C API for Apache Arrow Gandiva based on GLib',
+    filebase: project_name,
+    name: 'Apache Arrow Gandiva GLib',
+    requires: ['gandiva', 'arrow-glib'],
+    variables: pkgconfig_variables,
+    version: version,
+)
 
 if have_gi
-  gandiva_glib_gir = \
-    gnome.generate_gir(libgandiva_glib,
-                       dependencies: declare_dependency(sources: arrow_glib_gir),
-                       export_packages: 'gandiva-glib',
-                       extra_args: [
-                         '--warn-all',
-                         '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-                       ],
-                       header: 'gandiva-glib/gandiva-glib.h',
-                       identifier_prefix: 'GGandiva',
-                       includes: [
-                         'Arrow-1.0'
-                       ],
-                       kwargs: generate_gi_common_args,
-                       namespace: 'Gandiva',
-                       sources: sources + c_headers + enums,
-                       symbol_prefix: 'ggandiva')
+    gandiva_glib_gir = gnome.generate_gir(
+        libgandiva_glib,
+        dependencies: declare_dependency(sources: arrow_glib_gir),
+        export_packages: 'gandiva-glib',
+        extra_args: [
+            '--warn-all',
+            '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+        ],
+        header: 'gandiva-glib/gandiva-glib.h',
+        identifier_prefix: 'GGandiva',
+        includes: ['Arrow-1.0'],
+        kwargs: generate_gi_common_args,
+        namespace: 'Gandiva',
+        sources: sources + c_headers + enums,
+        symbol_prefix: 'ggandiva',
+    )
 
-  if generate_vapi
-    gnome.generate_vapi('gandiva-glib',
-                        install: true,
-                        packages: [
-                          arrow_glib_vapi,
-                          'gio-2.0',
-                        ],
-                        sources: [gandiva_glib_gir[0]])
-  endif
+    if generate_vapi
+        gnome.generate_vapi(
+            'gandiva-glib',
+            install: true,
+            packages: [arrow_glib_vapi, 'gio-2.0'],
+            sources: [gandiva_glib_gir[0]],
+        )
+    endif
 endif
diff --git a/c_glib/meson.build b/c_glib/meson.build
index 017765cd14626..11b6ba2f476f8 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -17,31 +17,33 @@
 # specific language governing permissions and limitations
 # under the License.
 
-project('arrow-glib', 'c', 'cpp',
-        default_options: [
-          'c_std=c99',
-          'cpp_std=c++17',
-        ],
-        license: 'Apache-2.0',
-        # Debian:
-        #   https://packages.debian.org/search?keywords=meson
-        #
-        #   * bookworm: 1.0.0
-        #
-        # Ubuntu:
-        #   https://packages.ubuntu.com/search?keywords=meson
-        #
-        #   * 20.04: 0.53.2
-        #   * 22.04: 0.61.2
-        meson_version: '>=0.53.2')
-
-version = '20.0.0-SNAPSHOT'
+project(
+    'arrow-glib',
+    'c',
+    'cpp',
+    default_options: ['c_std=c99', 'cpp_std=c++17'],
+    license: 'Apache-2.0',
+    # Debian:
+    #   https://packages.debian.org/search?keywords=meson
+    #
+    #   * bookworm: 1.0.0
+    #
+    # Ubuntu:
+    #   https://packages.ubuntu.com/search?keywords=meson
+    #
+    #   * 22.04: 0.61.2
+    #   * 24.04: 1.3.2
+    meson_version: '>=0.61.2',
+    version: '20.0.0-SNAPSHOT',
+)
+
+version = meson.project_version()
 if version.endswith('-SNAPSHOT')
-  version_numbers = version.split('-')[0].split('.')
-  version_tag = version.split('-')[1]
+    version_numbers = version.split('-')[0].split('.')
+    version_tag = version.split('-')[1]
 else
-  version_numbers = version.split('.')
-  version_tag = ''
+    version_numbers = version.split('.')
+    version_tag = ''
 endif
 version_major = version_numbers[0].to_int()
 version_minor = version_numbers[1].to_int()
@@ -52,13 +54,8 @@ so_version = version_major * 100 + version_minor
 so_version_patch = version_micro
 library_version = '@0@.@1@.@2@'.format(so_version, so_version_patch, 0)
 
-if meson.version().version_compare('>=0.56.0')
-  project_build_root = meson.project_build_root()
-  project_source_root = meson.project_source_root()
-else
-  project_build_root = meson.build_root()
-  project_source_root = meson.source_root()
-endif
+project_build_root = meson.project_build_root()
+project_source_root = meson.project_source_root()
 
 prefix = get_option('prefix')
 include_dir = join_paths(prefix, get_option('includedir'))
@@ -71,64 +68,84 @@ gnome = import('gnome')
 pkgconfig = import('pkgconfig')
 pkgconfig_variables = []
 
-base_include_directories = [
-  include_directories('.')
-]
+base_include_directories = [include_directories('.')]
 
-generate_gi_common_args = {
-  'install': true,
-  'nsversion': api_version,
-}
-if get_option('werror') and meson.version().version_compare('>=0.55.0')
-  generate_gi_common_args += {'fatal_warnings': true}
+generate_gi_common_args = {'install': true, 'nsversion': api_version}
+if get_option('werror')
+    generate_gi_common_args += {'fatal_warnings': true}
 endif
 have_gi = dependency('gobject-introspection-1.0', required: false).found()
 if have_gi
-  pkgconfig_variables += ['girdir=@0@'.format(gir_dir)]
+    pkgconfig_variables += ['girdir=@0@'.format(gir_dir)]
 endif
 generate_vapi = have_gi and get_option('vapi')
 if generate_vapi
-  pkgconfig_variables += ['vapidir=@0@'.format(vapi_dir)]
-  add_languages('vala')
+    pkgconfig_variables += ['vapidir=@0@'.format(vapi_dir)]
+    add_languages('vala')
 endif
 
 arrow_cpp_build_dir = get_option('arrow_cpp_build_dir')
 arrow_cpp_build_type = get_option('arrow_cpp_build_type')
 if arrow_cpp_build_dir == ''
-  arrow_cpp_build_lib_dir = ''
+    arrow_cpp_build_lib_dir = ''
 else
-  arrow_cpp_build_lib_dir = join_paths(project_source_root,
-                                       arrow_cpp_build_dir,
-                                       arrow_cpp_build_type.to_lower())
+    arrow_cpp_build_lib_dir = join_paths(
+        project_source_root,
+        arrow_cpp_build_dir,
+        arrow_cpp_build_type.to_lower(),
+    )
 endif
 
 if arrow_cpp_build_lib_dir == ''
-  arrow = dependency('arrow', version: ['>=' + version])
-  # They are just for checking required modules are enabled. They are built into
-  # libarrow.so. So we don't need additional build flags for them.
-  dependency('arrow-compute', version: ['>=' + version])
-  dependency('arrow-csv', version: ['>=' + version])
-  dependency('arrow-filesystem', version: ['>=' + version])
-  dependency('arrow-json', version: ['>=' + version])
-
-  have_arrow_orc = dependency('arrow-orc', required: false, version: ['>=' + version]).found()
-  arrow_cuda = dependency('arrow-cuda', required: false, version: ['>=' + version])
-  # we do not support compiling glib without acero engine
-  arrow_acero = dependency('arrow-acero', required: true, version: ['>=' + version])
-  arrow_dataset = dependency('arrow-dataset', required: false, version: ['>=' + version])
-  arrow_flight = dependency('arrow-flight', required: false, version: ['>=' + version])
-  arrow_flight_sql = dependency('arrow-flight-sql', required: false, version: ['>=' + version])
-  gandiva = dependency('gandiva', required: false, version: ['>=' + version])
-  parquet = dependency('parquet', required: false, version: ['>=' + version])
+    arrow = dependency('arrow', version: ['>=' + version])
+    # They are just for checking required modules are enabled. They are built into
+    # libarrow.so. So we don't need additional build flags for them.
+    dependency('arrow-compute', version: ['>=' + version])
+    dependency('arrow-csv', version: ['>=' + version])
+    dependency('arrow-filesystem', version: ['>=' + version])
+    dependency('arrow-json', version: ['>=' + version])
+
+    have_arrow_orc = dependency(
+        'arrow-orc',
+        required: false,
+        version: ['>=' + version],
+    ).found()
+    arrow_cuda = dependency(
+        'arrow-cuda',
+        required: false,
+        version: ['>=' + version],
+    )
+    # we do not support compiling glib without acero engine
+    arrow_acero = dependency(
+        'arrow-acero',
+        required: true,
+        version: ['>=' + version],
+    )
+    arrow_dataset = dependency(
+        'arrow-dataset',
+        required: false,
+        version: ['>=' + version],
+    )
+    arrow_flight = dependency(
+        'arrow-flight',
+        required: false,
+        version: ['>=' + version],
+    )
+    arrow_flight_sql = dependency(
+        'arrow-flight-sql',
+        required: false,
+        version: ['>=' + version],
+    )
+    gandiva = dependency('gandiva', required: false, version: ['>=' + version])
+    parquet = dependency('parquet', required: false, version: ['>=' + version])
 else
-  base_include_directories += [
-    include_directories(join_paths(arrow_cpp_build_dir, 'src')),
-    include_directories('../cpp/src'),
-  ]
-  cpp_compiler = meson.get_compiler('cpp')
-  arrow = cpp_compiler.find_library('arrow',
-                                    dirs: [arrow_cpp_build_lib_dir])
-  arrow_orc_code = '''
+    base_include_directories += [
+        include_directories(join_paths(arrow_cpp_build_dir, 'src')),
+        include_directories('../cpp/src'),
+    ]
+    cpp_compiler = meson.get_compiler('cpp')
+    arrow = cpp_compiler.find_library('arrow', dirs: [arrow_cpp_build_lib_dir])
+    arrow_orc_code = '''
 #include <arrow/adapters/orc/adapter.h>
 
 int
@@ -138,36 +155,52 @@ main(void)
   return 0;
 }
   '''
-  have_arrow_orc = cpp_compiler.links(arrow_orc_code,
-                                      include_directories: base_include_directories,
-                                      dependencies: [arrow])
-  arrow_cuda = cpp_compiler.find_library('arrow_cuda',
-                                         dirs: [arrow_cpp_build_lib_dir],
-                                         required: false)
-  arrow_acero = cpp_compiler.find_library('arrow_acero',
-                                          dirs: [arrow_cpp_build_lib_dir],
-                                          required: true)
-  arrow_dataset = cpp_compiler.find_library('arrow_dataset',
-                                            dirs: [arrow_cpp_build_lib_dir],
-                                            required: false)
-  arrow_flight = cpp_compiler.find_library('arrow_flight',
-                                           dirs: [arrow_cpp_build_lib_dir],
-                                           required: false)
-  arrow_flight_sql = cpp_compiler.find_library('arrow_flight_sql',
-                                               dirs: [arrow_cpp_build_lib_dir],
-                                               required: false)
-  gandiva = cpp_compiler.find_library('gandiva',
-                                      dirs: [arrow_cpp_build_lib_dir],
-                                      required: false)
-  parquet = cpp_compiler.find_library('parquet',
-                                      dirs: [arrow_cpp_build_lib_dir],
-                                      required: false)
+    have_arrow_orc = cpp_compiler.links(
+        arrow_orc_code,
+        include_directories: base_include_directories,
+        dependencies: [arrow],
+    )
+    arrow_cuda = cpp_compiler.find_library(
+        'arrow_cuda',
+        dirs: [arrow_cpp_build_lib_dir],
+        required: false,
+    )
+    arrow_acero = cpp_compiler.find_library(
+        'arrow_acero',
+        dirs: [arrow_cpp_build_lib_dir],
+        required: true,
+    )
+    arrow_dataset = cpp_compiler.find_library(
+        'arrow_dataset',
+        dirs: [arrow_cpp_build_lib_dir],
+        required: false,
+    )
+    arrow_flight = cpp_compiler.find_library(
+        'arrow_flight',
+        dirs: [arrow_cpp_build_lib_dir],
+        required: false,
+    )
+    arrow_flight_sql = cpp_compiler.find_library(
+        'arrow_flight_sql',
+        dirs: [arrow_cpp_build_lib_dir],
+        required: false,
+    )
+    gandiva = cpp_compiler.find_library(
+        'gandiva',
+        dirs: [arrow_cpp_build_lib_dir],
+        required: false,
+    )
+    parquet = cpp_compiler.find_library(
+        'parquet',
+        dirs: [arrow_cpp_build_lib_dir],
+        required: false,
+    )
 endif
 
 cxx = meson.get_compiler('cpp')
 cxx_flags = []
 if cxx.get_id() != 'msvc'
-  cxx_flags += ['-Wmissing-declarations']
+    cxx_flags += ['-Wmissing-declarations']
 endif
 add_project_arguments(cxx.get_supported_arguments(cxx_flags), language: 'cpp')
 
@@ -177,42 +210,54 @@ generate_version_header_py = project_source_root / 'tool' / 'generate-version-he
 
 subdir('arrow-glib')
 if arrow_cuda.found()
-  subdir('arrow-cuda-glib')
+    subdir('arrow-cuda-glib')
 endif
 if arrow_dataset.found()
-  subdir('arrow-dataset-glib')
+    subdir('arrow-dataset-glib')
 endif
 if arrow_flight.found()
-  subdir('arrow-flight-glib')
+    subdir('arrow-flight-glib')
 endif
 if arrow_flight_sql.found()
-  subdir('arrow-flight-sql-glib')
+    subdir('arrow-flight-sql-glib')
 endif
 if gandiva.found()
-  subdir('gandiva-glib')
+    subdir('gandiva-glib')
 endif
 if parquet.found()
-  subdir('parquet-glib')
+    subdir('parquet-glib')
 endif
 subdir('example')
 
 if get_option('doc')
-  subdir('doc')
+    subdir('doc')
 endif
 
-install_data('../LICENSE.txt',
-             'README.md',
-             install_dir: data_dir / meson.project_name())
+install_data(
+    '../LICENSE.txt',
+    'README.md',
+    install_dir: data_dir / meson.project_name(),
+)
 
 run_test = find_program('test/run-test.sh')
-test('unit test',
-     run_test,
-     env: [
-       'ARROW_CUDA_GLIB_TYPELIB_DIR=@0@/arrow-cuda-glib'.format(project_build_root),
-       'ARROW_DATASET_GLIB_TYPELIB_DIR=@0@/arrow-dataset-glib'.format(project_build_root),
-       'ARROW_FLIGHT_GLIB_TYPELIB_DIR=@0@/arrow-flight-glib'.format(project_build_root),
-       'ARROW_FLIGHT_SQL_GLIB_TYPELIB_DIR=@0@/arrow-flight-sql-glib'.format(project_build_root),
-       'ARROW_GLIB_TYPELIB_DIR=@0@/arrow-glib'.format(project_build_root),
-       'GANDIVA_GLIB_TYPELIB_DIR=@0@/gandiva-glib'.format(project_build_root),
-       'PARQUET_GLIB_TYPELIB_DIR=@0@/parquet-glib'.format(project_build_root),
-     ])
+test(
+    'unit test',
+    run_test,
+    env: [
+        'ARROW_CUDA_GLIB_TYPELIB_DIR=@0@/arrow-cuda-glib'.format(
+            project_build_root,
+        ),
+        'ARROW_DATASET_GLIB_TYPELIB_DIR=@0@/arrow-dataset-glib'.format(
+            project_build_root,
+        ),
+        'ARROW_FLIGHT_GLIB_TYPELIB_DIR=@0@/arrow-flight-glib'.format(
+            project_build_root,
+        ),
+        'ARROW_FLIGHT_SQL_GLIB_TYPELIB_DIR=@0@/arrow-flight-sql-glib'.format(
+            project_build_root,
+        ),
+        'ARROW_GLIB_TYPELIB_DIR=@0@/arrow-glib'.format(project_build_root),
+        'GANDIVA_GLIB_TYPELIB_DIR=@0@/gandiva-glib'.format(project_build_root),
+        'PARQUET_GLIB_TYPELIB_DIR=@0@/parquet-glib'.format(project_build_root),
+    ],
+)
diff --git a/c_glib/parquet-glib/meson.build b/c_glib/parquet-glib/meson.build
index a3de1d0933f7f..a85ba18f30f9c 100644
--- a/c_glib/parquet-glib/meson.build
+++ b/c_glib/parquet-glib/meson.build
@@ -20,32 +20,43 @@
 project_name = 'parquet-glib'
 
 sources = files(
-  'arrow-file-reader.cpp',
-  'arrow-file-writer.cpp',
-  'metadata.cpp',
-  'statistics.cpp',
+    'arrow-file-reader.cpp',
+    'arrow-file-writer.cpp',
+    'metadata.cpp',
+    'statistics.cpp',
 )
 
 c_headers = files(
-  'arrow-file-reader.h',
-  'arrow-file-writer.h',
-  'metadata.h',
-  'statistics.h',
-  'parquet-glib.h',
+    'arrow-file-reader.h',
+    'arrow-file-writer.h',
+    'metadata.h',
+    'parquet-glib.h',
+    'statistics.h',
 )
 
 cpp_headers = files(
-  'arrow-file-reader.hpp',
-  'arrow-file-writer.hpp',
-  'metadata.hpp',
-  'statistics.hpp',
-  'parquet-glib.hpp',
+    'arrow-file-reader.hpp',
+    'arrow-file-writer.hpp',
+    'metadata.hpp',
+    'parquet-glib.hpp',
+    'statistics.hpp',
 )
 
 version_h = configure_file(
-  input: 'version.h.in',
-  output: 'version.h',
-  command: [python3, generate_version_header_py, '--library', 'GPARQUET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'],
+    input: 'version.h.in',
+    output: 'version.h',
+    command: [
+        python3,
+        generate_version_header_py,
+        '--library',
+        'GPARQUET',
+        '--version',
+        version,
+        '--input',
+        '@INPUT@',
+        '--output',
+        '@OUTPUT@',
+    ],
 )
 
 c_headers += version_h
@@ -53,58 +64,58 @@ c_headers += version_h
 headers = c_headers + cpp_headers
 install_headers(headers, subdir: project_name)
 
-dependencies = [
-  arrow,
-  parquet,
-  arrow_glib,
-]
-libparquet_glib = library('parquet-glib',
-                          sources: sources,
-                          install: true,
-                          dependencies: dependencies,
-                          implicit_include_directories: false,
-                          include_directories: base_include_directories,
-                          cpp_args: ['-DGPARQUET_COMPILATION'],
-                          soversion: so_version,
-                          version: library_version)
-parquet_glib = declare_dependency(link_with: libparquet_glib,
-                                  include_directories: base_include_directories,
-                                  dependencies: dependencies)
+dependencies = [arrow, parquet, arrow_glib]
+libparquet_glib = library(
+    'parquet-glib',
+    sources: sources,
+    install: true,
+    dependencies: dependencies,
+    implicit_include_directories: false,
+    include_directories: base_include_directories,
+    cpp_args: ['-DGPARQUET_COMPILATION'],
+    soversion: so_version,
+    version: library_version,
+)
+parquet_glib = declare_dependency(
+    link_with: libparquet_glib,
+    include_directories: base_include_directories,
+    dependencies: dependencies,
+)
 
-pkgconfig.generate(libparquet_glib,
-                   description: 'C API for Apache Parquet based on GLib',
-                   filebase: project_name,
-                   name: 'Apache Parquet GLib',
-                   requires: ['parquet', 'arrow-glib'],
-                   variables: pkgconfig_variables,
-                   version: version)
+pkgconfig.generate(
+    libparquet_glib,
+    description: 'C API for Apache Parquet based on GLib',
+    filebase: project_name,
+    name: 'Apache Parquet GLib',
+    requires: ['parquet', 'arrow-glib'],
+    variables: pkgconfig_variables,
+    version: version,
+)
 
 if have_gi
-  parquet_glib_gir = \
-    gnome.generate_gir(libparquet_glib,
-                       dependencies: declare_dependency(sources: arrow_glib_gir),
-                       export_packages: 'parquet-glib',
-                       extra_args: [
-                         '--warn-all',
-                         '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
-                       ],
-                       header: 'parquet-glib/parquet-glib.h',
-                       identifier_prefix: 'GParquet',
-                       includes: [
-                         'Arrow-1.0',
-                       ],
-                       kwargs: generate_gi_common_args,
-                       namespace: 'Parquet',
-                       sources: sources + c_headers,
-                       symbol_prefix: 'gparquet')
+    parquet_glib_gir = gnome.generate_gir(
+        libparquet_glib,
+        dependencies: declare_dependency(sources: arrow_glib_gir),
+        export_packages: 'parquet-glib',
+        extra_args: [
+            '--warn-all',
+            '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
+        ],
+        header: 'parquet-glib/parquet-glib.h',
+        identifier_prefix: 'GParquet',
+        includes: ['Arrow-1.0'],
+        kwargs: generate_gi_common_args,
+        namespace: 'Parquet',
+        sources: sources + c_headers,
+        symbol_prefix: 'gparquet',
+    )
 
-  if generate_vapi
-    gnome.generate_vapi('parquet-glib',
-                        install: true,
-                        packages: [
-                          arrow_glib_vapi,
-                          'gio-2.0',
-                        ],
-                        sources: [parquet_glib_gir[0]])
-  endif
+    if generate_vapi
+        gnome.generate_vapi(
+            'parquet-glib',
+            install: true,
+            packages: [arrow_glib_vapi, 'gio-2.0'],
+            sources: [parquet_glib_gir[0]],
+        )
+    endif
 endif
diff --git a/c_glib/test/dataset/test-file-system-dataset.rb b/c_glib/test/dataset/test-file-system-dataset.rb
index 96deedf6b4eb0..25c50ef9e4ff9 100644
--- a/c_glib/test/dataset/test-file-system-dataset.rb
+++ b/c_glib/test/dataset/test-file-system-dataset.rb
@@ -91,15 +91,15 @@ def create_dataset
     dataset = @factory.finish
 
     expected_table = build_table(count: [
-                               build_int32_array([1, 10]),
-                               build_int32_array([2]),
-                               build_int32_array([3]),
-                             ],
-                             label: [
-                               build_string_array(["a", "a"]),
-                               build_string_array(["b"]),
-                               build_string_array(["c"]),
-                             ])
+                                   build_int32_array([1, 10]),
+                                   build_int32_array([2]),
+                                   build_int32_array([3]),
+                                 ],
+                                 label: [
+                                   build_string_array(["a", "a"]),
+                                   build_string_array(["b"]),
+                                   build_string_array(["c"]),
+                                 ])
 
     return dataset, expected_table
   end
diff --git a/c_glib/test/test-array-statistics.rb b/c_glib/test/test-array-statistics.rb
new file mode 100644
index 0000000000000..bf470b4e72235
--- /dev/null
+++ b/c_glib/test/test-array-statistics.rb
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestArrayStatistics < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    omit("Parquet is required") unless defined?(::Parquet)
+
+    Tempfile.create(["data", ".parquet"]) do |file|
+      @file = file
+      array = build_int64_array([nil, -(2 ** 32), 2 ** 32])
+      @table = build_table("int64" => array)
+      writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path)
+      chunk_size = 1024
+      writer.write_table(@table, chunk_size)
+      writer.close
+      reader = Parquet::ArrowFileReader.new(@file.path)
+      begin
+        @statistics = reader.read_table.get_column_data(0).get_chunk(0).statistics
+        yield
+      ensure
+        reader.unref
+      end
+    end
+  end
+
+  test("#has_null_count?") do
+    assert do
+      @statistics.has_null_count?
+    end
+  end
+
+  test("#null_count") do
+    assert_equal(1, @statistics.null_count)
+  end
+end
diff --git a/c_glib/test/test-array.rb b/c_glib/test/test-array.rb
index cd62d917cf664..18a54a2963134 100644
--- a/c_glib/test/test-array.rb
+++ b/c_glib/test/test-array.rb
@@ -118,8 +118,9 @@ def test_to_s
 
   sub_test_case("#view") do
     def test_valid
+      int32_array = build_int32_array([0, 1069547520, -1071644672, nil])
       assert_equal(build_float_array([0.0, 1.5, -2.5, nil]),
-                   build_int32_array([0, 1069547520, -1071644672, nil]).view(Arrow::FloatDataType.new))
+                   int32_array.view(Arrow::FloatDataType.new))
     end
 
     def test_invalid
@@ -212,7 +213,7 @@ def test_valid
     end
 
     def test_invalid
-      message = "[array][validate_full]: Invalid: Invalid UTF8 sequence at string index 0"
+      message = "[array][validate-full]: Invalid: Invalid UTF8 sequence at string index 0"
 
       # U+3042 HIRAGANA LETTER A, U+3044 HIRAGANA LETTER I
       data = "\u3042\u3044".b[0..-2]
diff --git a/c_glib/test/test-binary-view-array.rb b/c_glib/test/test-binary-view-array.rb
new file mode 100644
index 0000000000000..e1c97ecdced30
--- /dev/null
+++ b/c_glib/test/test-binary-view-array.rb
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestBinaryViewArray < Test::Unit::TestCase
+  def test_new
+    short_binary_data = "test"
+    short_view_buffer_space = 12
+    short_view_buffer = [short_binary_data.size].pack("l")
+    short_view_buffer += short_binary_data.ljust(short_view_buffer_space, "\x00")
+
+    arrow_view_buffer = Arrow::Buffer.new(short_view_buffer)
+    arrow_data_buffer = Arrow::Buffer.new(short_binary_data)
+    bitmap = Arrow::Buffer.new([0b1].pack("C*"))
+
+    binary_view_array = Arrow::BinaryViewArray.new(1,
+                                                   arrow_view_buffer,
+                                                   [arrow_data_buffer],
+                                                   bitmap,
+                                                   0,
+                                                   0)
+    assert do
+      binary_view_array.validate_full
+    end
+    assert_equal(short_binary_data, binary_view_array.get_value(0).to_s)
+  end
+end
diff --git a/c_glib/test/test-binary-view-data-type.rb b/c_glib/test/test-binary-view-data-type.rb
index f143b62df4ebc..9c97982862fe3 100644
--- a/c_glib/test/test-binary-view-data-type.rb
+++ b/c_glib/test/test-binary-view-data-type.rb
@@ -30,4 +30,11 @@ def test_to_s
     data_type = Arrow::BinaryViewDataType.new
     assert_equal("binary_view", data_type.to_s)
   end
+
+  def test_export
+    data_type = Arrow::BinaryViewDataType.new
+    c_abi_schema = data_type.export
+    assert_equal(data_type,
+                 Arrow::DataType.import(c_abi_schema))
+  end
 end
diff --git a/c_glib/test/test-chunked-array-datum.rb b/c_glib/test/test-chunked-array-datum.rb
index b82f3eed8a7af..99e35fc57b085 100644
--- a/c_glib/test/test-chunked-array-datum.rb
+++ b/c_glib/test/test-chunked-array-datum.rb
@@ -49,7 +49,14 @@ def test_false
   end
 
   def test_to_string
-    assert_equal("ChunkedArray([\n" + "  [\n" + "    true,\n" + "    false\n" + "  ]\n" + "])", @datum.to_s)
+    assert_equal(<<-DATUM.chomp, @datum.to_s)
+ChunkedArray([
+  [
+    true,
+    false
+  ]
+])
+    DATUM
   end
 
   def test_value
diff --git a/c_glib/test/test-large-list-array.rb b/c_glib/test/test-large-list-array.rb
index 2f7efab5a074a..fa9c92ec87d0c 100644
--- a/c_glib/test/test-large-list-array.rb
+++ b/c_glib/test/test-large-list-array.rb
@@ -88,10 +88,10 @@ def test_value_length
 
   def test_value_offsets
     array = build_large_list_array(Arrow::Int8DataType.new,
-                             [
-                               [-29, 29],
-                               [-1, 0, 1],
-                             ])
+                                   [
+                                     [-29, 29],
+                                     [-1, 0, 1],
+                                   ])
     assert_equal([0, 2, 5],
                  array.value_offsets)
   end
diff --git a/c_glib/test/test-memory-pool.rb b/c_glib/test/test-memory-pool.rb
index 515edb0161399..b471437208503 100644
--- a/c_glib/test/test-memory-pool.rb
+++ b/c_glib/test/test-memory-pool.rb
@@ -20,6 +20,8 @@ class TestMemoryPool < Test::Unit::TestCase
 
   def setup
     @memory_pool = Arrow::MemoryPool.default
+    # Our tests assume that some memory is allocated.
+    @buffer = Arrow::ResizableBuffer.new(1)
   end
 
   def test_bytes_allocated
diff --git a/c_glib/test/test-record-batch-datum.rb b/c_glib/test/test-record-batch-datum.rb
index ec572e0f13023..c50e50f9029e8 100644
--- a/c_glib/test/test-record-batch-datum.rb
+++ b/c_glib/test/test-record-batch-datum.rb
@@ -49,7 +49,13 @@ def test_false
   end
 
   def test_to_string
-    assert_equal("RecordBatch(visible:   [\n" + "    true,\n" + "    false\n" + "  ]\n" + ")", @datum.to_s)
+    assert_equal(<<-DATUM.chomp, @datum.to_s)
+RecordBatch(visible:   [
+    true,
+    false
+  ]
+)
+    DATUM
   end
 
   def test_value
diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb
index ba4b15a67782a..86ae5b2e2d4aa 100644
--- a/c_glib/test/test-record-batch.rb
+++ b/c_glib/test/test-record-batch.rb
@@ -221,5 +221,46 @@ def test_invalid
         end
       end
     end
+
+    sub_test_case("#validate_full") do
+      def setup
+        @id_field = Arrow::Field.new("uint8", Arrow::UInt8DataType.new)
+        @name_field = Arrow::Field.new("string", Arrow::StringDataType.new)
+        @schema = Arrow::Schema.new([@id_field, @name_field])
+
+        @uint8_value = build_uint_array([1])
+        @valid_name_value = build_string_array(["abc"])
+        @n_rows = @uint8_value.length
+
+        # U+3042 HIRAGANA LETTER A, U+3044 HIRAGANA LETTER I
+        data = "\u3042\u3044".b[0..-2]
+        value_offsets = Arrow::Buffer.new([0, data.size].pack("l*"))
+        @invalid_name_value = Arrow::StringArray.new(1,
+                                                     value_offsets,
+                                                     Arrow::Buffer.new(data),
+                                                     nil,
+                                                     -1)
+      end
+
+      def test_valid
+        columns = [@uint8_value, @valid_name_value]
+        record_batch = Arrow::RecordBatch.new(@schema, @n_rows, columns)
+
+        assert do
+          record_batch.validate_full
+        end
+      end
+
+      def test_invalid
+        message = "[record-batch][validate-full]: Invalid: " +
+          "In column 1: Invalid: Invalid UTF8 sequence at string index 0"
+        columns = [@uint8_value, @invalid_name_value]
+        record_batch = Arrow::RecordBatch.new(@schema, @n_rows, columns)
+
+        assert_raise(Arrow::Error::Invalid.new(message)) do
+          record_batch.validate_full
+        end
+      end
+    end
   end
 end
diff --git a/c_glib/test/test-string-view-data-type.rb b/c_glib/test/test-string-view-data-type.rb
index f1f3655493c8e..bddc9eeec265b 100644
--- a/c_glib/test/test-string-view-data-type.rb
+++ b/c_glib/test/test-string-view-data-type.rb
@@ -30,4 +30,11 @@ def test_to_s
     data_type = Arrow::StringViewDataType.new
     assert_equal("string_view", data_type.to_s)
   end
+
+  def test_export
+    data_type = Arrow::StringViewDataType.new
+    c_abi_schema = data_type.export
+    assert_equal(data_type,
+                 Arrow::DataType.import(c_abi_schema))
+  end
 end
diff --git a/c_glib/test/test-struct-field-options.rb b/c_glib/test/test-struct-field-options.rb
index 4a614de6df6e7..f9b492fc4da94 100644
--- a/c_glib/test/test-struct-field-options.rb
+++ b/c_glib/test/test-struct-field-options.rb
@@ -42,7 +42,8 @@ def test_set_dot_path
   end
 
   def test_set_invalid
-    message = "[struct-field-options][set-field-ref]: Invalid: Dot path '[foo]' contained an unterminated index"
+    message = "[struct-field-options][set-field-ref]: " +
+              "Invalid: Dot path '[foo]' contained an unterminated index"
     assert_raise(Arrow::Error::Invalid.new(message)) do
       @options.field_ref = "[foo]"
     end
diff --git a/c_glib/test/test-table.rb b/c_glib/test/test-table.rb
index 615a90c2f0baf..1c8c6fb3f796f 100644
--- a/c_glib/test/test-table.rb
+++ b/c_glib/test/test-table.rb
@@ -243,6 +243,77 @@ def test_combine_chunks
                    all_values)
     end
 
+    sub_test_case("#validate") do
+      def setup
+        @id_field = Arrow::Field.new("id", Arrow::UInt8DataType.new)
+        @name_field = Arrow::Field.new("name", Arrow::StringDataType.new)
+        @schema = Arrow::Schema.new([@id_field, @name_field])
+
+        @id_array = build_uint_array([1])
+        @name_array = build_string_array(["abc"])
+        @arrays = [@id_array, @name_array]
+      end
+
+      def test_valid
+        table = Arrow::Table.new(@schema, @arrays)
+
+        assert do
+          table.validate
+        end
+      end
+
+      def test_invalid
+        message = "[table][validate]: Invalid: " +
+          "Column 1 named name expected length 1 but got length 2"
+
+        invalid_values = [@id_array, build_string_array(["abc", "def"])]
+        table = Arrow::Table.new(@schema, invalid_values)
+        assert_raise(Arrow::Error::Invalid.new(message)) do
+          table.validate
+        end
+      end
+    end
+
+    sub_test_case("#validate_full") do
+      def setup
+        @id_field = Arrow::Field.new("uint8", Arrow::UInt8DataType.new)
+        @name_field = Arrow::Field.new("string", Arrow::StringDataType.new)
+        @schema = Arrow::Schema.new([@id_field, @name_field])
+
+        @id_values = build_uint_array([1])
+        @valid_name_values = build_string_array(["abc"])
+
+        # U+3042 HIRAGANA LETTER A, U+3044 HIRAGANA LETTER I
+        data = "\u3042\u3044".b[0..-2]
+        value_offsets = Arrow::Buffer.new([0, data.size].pack("l*"))
+        @invalid_name_values = Arrow::StringArray.new(1,
+                                                      value_offsets,
+                                                      Arrow::Buffer.new(data),
+                                                      nil,
+                                                      -1)
+      end
+
+      def test_valid
+        columns = [@id_values, @valid_name_values]
+        table = Arrow::Table.new(@schema, columns)
+
+        assert do
+          table.validate_full
+        end
+      end
+
+      def test_invalid
+        message = "[table][validate-full]: Invalid: " +
+          "Column 1: In chunk 0: Invalid: Invalid UTF8 sequence at string index 0"
+        columns = [@id_values, @invalid_name_values]
+        table = Arrow::Table.new(@schema, columns)
+
+        assert_raise(Arrow::Error::Invalid.new(message)) do
+          table.validate_full
+        end
+      end
+    end
+
     sub_test_case("#write_as_feather") do
       def setup
         super
diff --git a/c_glib/test/test-take.rb b/c_glib/test/test-take.rb
index f97c7ad730bc6..5b7af2d21f376 100644
--- a/c_glib/test/test-take.rb
+++ b/c_glib/test/test-take.rb
@@ -23,7 +23,7 @@ class TestTake < Test::Unit::TestCase
     def test_no_null
       indices = build_int16_array([1, 0, 2])
       assert_equal(build_int16_array([0, 1, 2]),
-                   build_int16_array([1, 0 ,2]).take(indices))
+                   build_int16_array([1, 0, 2]).take(indices))
     end
 
     def test_null
diff --git a/c_glib/test/test-uint-array-builder.rb b/c_glib/test/test-uint-array-builder.rb
index 89621189b4571..3aa3a1c488d83 100644
--- a/c_glib/test/test-uint-array-builder.rb
+++ b/c_glib/test/test-uint-array-builder.rb
@@ -32,9 +32,9 @@ def test_uint16
     values = [0, border_value]
     assert_equal(build_uint_array([*values, nil]),
                  Arrow::UInt16Array.new(3,
-                                       Arrow::Buffer.new(values.pack("S*")),
-                                       Arrow::Buffer.new([0b011].pack("C*")),
-                                       -1))
+                                        Arrow::Buffer.new(values.pack("S*")),
+                                        Arrow::Buffer.new([0b011].pack("C*")),
+                                        -1))
   end
 
   def test_uint32
@@ -42,9 +42,9 @@ def test_uint32
     values = [0, border_value]
     assert_equal(build_uint_array([*values, nil]),
                  Arrow::UInt32Array.new(3,
-                                       Arrow::Buffer.new(values.pack("L*")),
-                                       Arrow::Buffer.new([0b011].pack("C*")),
-                                       -1))
+                                        Arrow::Buffer.new(values.pack("L*")),
+                                        Arrow::Buffer.new([0b011].pack("C*")),
+                                        -1))
   end
 
   def test_uint64
diff --git a/ci/conan/all/conandata.yml b/ci/conan/all/conandata.yml
index fb75f3995c62e..a13b31c2e82df 100644
--- a/ci/conan/all/conandata.yml
+++ b/ci/conan/all/conandata.yml
@@ -21,64 +21,47 @@
 # SOFTWARE.
 
 sources:
+  "18.1.0":
+    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-18.1.0/apache-arrow-18.1.0.tar.gz?action=download"
+    sha256: "2dc8da5f8796afe213ecc5e5aba85bb82d91520eff3cf315784a52d0fa61d7fc"
+  "18.0.0":
+    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-18.0.0/apache-arrow-18.0.0.tar.gz?action=download"
+    sha256: "abcf1934cd0cdddd33664e9f2d9a251d6c55239d1122ad0ed223b13a583c82a9"
+  "17.0.0":
+    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-17.0.0/apache-arrow-17.0.0.tar.gz?action=download"
+    sha256: "9d280d8042e7cf526f8c28d170d93bfab65e50f94569f6a790982a878d8d898d"
+  "16.1.0":
+    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-16.1.0/apache-arrow-16.1.0.tar.gz?action=download"
+    sha256: "c9e60c7e87e59383d21b20dc874b17153729ee153264af6d21654b7dff2c60d7"
   "15.0.0":
     url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-15.0.0/apache-arrow-15.0.0.tar.gz?action=download"
     sha256: "01dd3f70e85d9b5b933ec92c0db8a4ef504a5105f78d2d8622e84279fb45c25d"
   "14.0.2":
     url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-14.0.2/apache-arrow-14.0.2.tar.gz?action=download"
     sha256: "1304dedb41896008b89fe0738c71a95d9b81752efc77fa70f264cb1da15d9bc2"
-  "14.0.1":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-14.0.1/apache-arrow-14.0.1.tar.gz?action=download"
-    sha256: "5c70eafb1011f9d124bafb328afe54f62cc5b9280b7080e1e3d668f78c0e407e"
-  "14.0.0":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-14.0.0/apache-arrow-14.0.0.tar.gz?action=download"
-    sha256: "4eb0da50ec071baf15fc163cb48058931e006f1c862c8def0e180fd07d531021"
-  "13.0.0":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-13.0.0/apache-arrow-13.0.0.tar.gz?action=download"
-    sha256: "35dfda191262a756be934eef8afee8d09762cad25021daa626eb249e251ac9e6"
-  "12.0.1":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-12.0.1/apache-arrow-12.0.1.tar.gz?action=download"
-    sha256: "3481c411393aa15c75e88d93cf8315faf7f43e180fe0790128d3840d417de858"
-  "12.0.0":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-12.0.0/apache-arrow-12.0.0.tar.gz?action=download"
-    sha256: "ddd8347882775e53af7d0965a1902b7d8fcd0a030fd14f783d4f85e821352d52"
-  "11.0.0":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-11.0.0/apache-arrow-11.0.0.tar.gz?action=download"
-    sha256: "2dd8f0ea0848a58785628ee3a57675548d509e17213a2f5d72b0d900b43f5430"
-  "10.0.1":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-10.0.1/apache-arrow-10.0.1.tar.gz?action=download"
-    sha256: "c814e0670112a22c1a6ec03ab420a52ae236a9a42e9e438c3cbd37f37e658fb3"
-  "10.0.0":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-10.0.0/apache-arrow-10.0.0.tar.gz?action=download"
-    sha256: "5b46fa4c54f53e5df0019fe0f9d421e93fc906b625ebe8e89eed010d561f1f12"
-  "8.0.1":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-8.0.1/apache-arrow-8.0.1.tar.gz?action=download"
-    sha256: "82d46929f7574715551da21700f100b39f99c3c4d6790f26cac86d869d64e94e"
-  "8.0.0":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-8.0.0/apache-arrow-8.0.0.tar.gz?action=download"
-    sha256: "ad9a05705117c989c116bae9ac70492fe015050e1b80fb0e38fde4b5d863aaa3"
-  "7.0.0":
-    url: "https://www.apache.org/dyn/closer.lua/arrow/arrow-7.0.0/apache-arrow-7.0.0.tar.gz?action=download"
-    sha256: "e8f49b149a15ecef4e40fcfab1b87c113c6b1ee186005c169e5cdf95d31a99de"
 patches:
-  "8.0.1":
-    - patch_file: "patches/8.0.0-0005-install-utils.patch"
-      patch_description: "enable utils installation"
+  "18.1.0":
+    - patch_file: "patches/18.0.0-0001-fix-cmake.patch"
+      patch_description: "use cci package"
       patch_type: "conan"
-    - patch_file: "patches/8.0.0-0006-fix-cmake.patch"
+  "18.0.0":
+    - patch_file: "patches/18.0.0-0001-fix-cmake.patch"
       patch_description: "use cci package"
       patch_type: "conan"
-  "8.0.0":
-    - patch_file: "patches/8.0.0-0005-install-utils.patch"
-      patch_description: "enable utils installation"
+  "17.0.0":
+    - patch_file: "patches/16.0.0-0001-fix-cmake.patch"
+      patch_description: "use cci package"
       patch_type: "conan"
-    - patch_file: "patches/8.0.0-0006-fix-cmake.patch"
+  "16.1.0":
+    - patch_file: "patches/16.0.0-0001-fix-cmake.patch"
       patch_description: "use cci package"
       patch_type: "conan"
-  "7.0.0":
-    - patch_file: "patches/7.0.0-0006-install-utils.patch"
-      patch_description: "enable utils installation"
+  "15.0.0":
+    - patch_file: "patches/11.0.0-0001-fix-cmake.patch"
+      patch_description: "use cci package"
       patch_type: "conan"
-    - patch_file: "patches/7.0.0-0007-fix-cmake.patch"
+  "14.0.2":
+    - patch_file: "patches/11.0.0-0001-fix-cmake.patch"
       patch_description: "use cci package"
       patch_type: "conan"
+  
\ No newline at end of file
diff --git a/ci/conan/all/conanfile.py b/ci/conan/all/conanfile.py
index 178cd03da1555..5db9fe356726a 100644
--- a/ci/conan/all/conanfile.py
+++ b/ci/conan/all/conanfile.py
@@ -31,7 +31,7 @@
 import os
 import glob
 
-required_conan_version = ">=1.53.0"
+required_conan_version = ">=2.1.0"
 
 class ArrowConan(ConanFile):
     name = "arrow"
@@ -93,7 +93,7 @@ class ArrowConan(ConanFile):
         "shared": False,
         "fPIC": True,
         "gandiva": False,
-        "parquet": False,
+        "parquet": True,
         "skyhook": False,
         "substrait": False,
         "acero": False,
@@ -108,7 +108,7 @@ class ArrowConan(ConanFile):
         "simd_level": "default",
         "runtime_simd_level": "max",
         "with_backtrace": False,
-        "with_boost": False,
+        "with_boost": True,
         "with_brotli": False,
         "with_bz2": False,
         "with_csv": False,
@@ -122,7 +122,7 @@ class ArrowConan(ConanFile):
         "with_glog": False,
         "with_grpc": False,
         "with_json": False,
-        "with_thrift": False,
+        "with_thrift": True,
         "with_llvm": False,
         "with_openssl": False,
         "with_opentelemetry": False,
@@ -133,7 +133,7 @@ class ArrowConan(ConanFile):
         "with_utf8proc": False,
         "with_lz4": False,
         "with_snappy": False,
-        "with_zlib": False,
+        "with_zlib": True,
         "with_zstd": False,
     }
     short_paths = True
@@ -144,21 +144,6 @@ def _min_cppstd(self):
         # https://github.com/apache/arrow/pull/13991
         return "11" if Version(self.version) < "10.0.0" else "17"
 
-    @property
-    def _compilers_minimum_version(self):
-        return {
-            "11": {
-                "clang": "3.9",
-            },
-            "17": {
-                "gcc": "8",
-                "clang": "7",
-                "apple-clang": "10",
-                "Visual Studio": "15",
-                "msvc": "191",
-            },
-        }.get(self._min_cppstd, {})
-
     def export_sources(self):
         export_conandata_patches(self)
         copy(self, "conan_cmake_project_include.cmake", self.recipe_folder, os.path.join(self.export_sources_folder, "src"))
@@ -183,15 +168,15 @@ def _requires_rapidjson(self):
 
     def requirements(self):
         if self.options.with_thrift:
-            self.requires("thrift/0.17.0")
+            self.requires("thrift/0.20.0")
         if self.options.with_protobuf:
-            self.requires("protobuf/3.21.9")
+            self.requires("protobuf/3.21.12")
         if self.options.with_jemalloc:
             self.requires("jemalloc/5.3.0")
         if self.options.with_mimalloc:
             self.requires("mimalloc/1.7.6")
         if self.options.with_boost:
-            self.requires("boost/1.84.0")
+            self.requires("boost/1.85.0")
         if self.options.with_gflags:
             self.requires("gflags/2.2.2")
         if self.options.with_glog:
@@ -223,18 +208,23 @@ def requirements(self):
         if self.options.with_snappy:
             self.requires("snappy/1.1.9")
         if self.options.get_safe("simd_level") != None or \
-            self.options.get_safe("runtime_simd_level") != None:
-            self.requires("xsimd/9.0.1")
+                self.options.get_safe("runtime_simd_level") != None:
+            if Version(self.version) < 8:
+                self.requires("xsimd/9.0.1")
+            else:
+                self.requires("xsimd/13.0.0")
         if self.options.with_zlib:
             self.requires("zlib/[>=1.2.11 <2]")
         if self.options.with_zstd:
-            self.requires("zstd/1.5.5")
+            self.requires("zstd/[>=1.5 <1.6]")
         if self.options.with_re2:
             self.requires("re2/20230301")
         if self.options.with_utf8proc:
             self.requires("utf8proc/2.8.0")
         if self.options.with_backtrace:
             self.requires("libbacktrace/cci.20210118")
+        if self.options.with_orc:
+            self.requires("orc/2.0.0")
 
     def validate(self):
         # Do not allow options with 'auto' value
@@ -247,27 +237,35 @@ def validate(self):
         # From https://github.com/conan-io/conan-center-index/pull/23163#issuecomment-2039808851
         if self.options.gandiva:
             if not self.options.with_re2:
-                raise ConanException("'with_re2' option should be True when'gandiva=True'")
+                raise ConanException("'with_re2' option should be True when 'gandiva=True'")
             if not self.options.with_boost:
-                raise ConanException("'with_boost' option should be True when'gandiva=True'")
+                raise ConanException("'with_boost' option should be True when 'gandiva=True'")
             if not self.options.with_utf8proc:
-                raise ConanException("'with_utf8proc' option should be True when'gandiva=True'")
+                raise ConanException("'with_utf8proc' option should be True when 'gandiva=True'")
+        if self.options.with_thrift and not self.options.with_boost:
+            raise ConanException("'with_boost' option should be True when 'thrift=True'")
+        if self.options.parquet:
+            if not self.options.with_thrift:
+                raise ConanException("'with_thrift' option should be True when 'parquet=True'")
+        if self.options.with_flight_rpc and not self.options.with_protobuf:
+            raise ConanException("'with_protobuf' option should be True when 'with_flight_rpc=True'")
 
         if self.settings.compiler.get_safe("cppstd"):
             check_min_cppstd(self, self._min_cppstd)
 
-        minimum_version = self._compilers_minimum_version.get(str(self.settings.compiler), False)
-        if minimum_version and Version(self.settings.compiler.version) < minimum_version:
+        if (
+            Version(self.version) < "10.0.0"
+            and self.settings.compiler == "clang"
+            and Version(self.settings.compiler.version) < "3.9"
+        ):
             raise ConanInvalidConfiguration(
-                f"{self.ref} requires C++{self._min_cppstd}, which your compiler does not support."
+                f"{self.ref} requires C++11, which needs at least clang-3.9"
             )
 
         if self.options.get_safe("skyhook", False):
             raise ConanInvalidConfiguration("CCI has no librados recipe (yet)")
         if self.options.with_cuda:
             raise ConanInvalidConfiguration("CCI has no cuda recipe (yet)")
-        if self.options.with_orc:
-            raise ConanInvalidConfiguration("CCI has no orc recipe (yet)")
         if self.options.with_s3 and not self.dependencies["aws-sdk-cpp"].options.config:
             raise ConanInvalidConfiguration("arrow:with_s3 requires aws-sdk-cpp:config is True.")
 
@@ -275,6 +273,11 @@ def validate(self):
             if self.dependencies["jemalloc"].options.enable_cxx:
                 raise ConanInvalidConfiguration("jemmalloc.enable_cxx of a static jemalloc must be disabled")
 
+        if self.options.with_thrift and not self.options.with_zlib:
+            raise ConanInvalidConfiguration("arrow:with_thrift requires arrow:with_zlib")
+
+        if self.options.parquet and not self.options.with_thrift:
+            raise ConanInvalidConfiguration("arrow:parquet requires arrow:with_thrift")
 
     def build_requirements(self):
         if Version(self.version) >= "13.0.0":
@@ -352,6 +355,7 @@ def generate(self):
         tc.variables["GLOG_SOURCE"] = "SYSTEM"
         tc.variables["ARROW_WITH_BACKTRACE"] = bool(self.options.with_backtrace)
         tc.variables["ARROW_WITH_BROTLI"] = bool(self.options.with_brotli)
+        tc.variables["ARROW_WITH_RE2"] = bool(self.options.with_re2)
         tc.variables["brotli_SOURCE"] = "SYSTEM"
         if self.options.with_brotli:
             tc.variables["ARROW_BROTLI_USE_SHARED"] = bool(self.dependencies["brotli"].options.shared)
@@ -383,8 +387,10 @@ def generate(self):
             tc.variables["ARROW_ZSTD_USE_SHARED"] = bool(self.dependencies["zstd"].options.shared)
         tc.variables["ORC_SOURCE"] = "SYSTEM"
         tc.variables["ARROW_WITH_THRIFT"] = bool(self.options.with_thrift)
+        tc.variables["ARROW_THRIFT"] = bool(self.options.with_thrift)
         tc.variables["Thrift_SOURCE"] = "SYSTEM"
         if self.options.with_thrift:
+            tc.variables["ARROW_THRIFT"] = True
             tc.variables["THRIFT_VERSION"] = bool(self.dependencies["thrift"].ref.version) # a recent thrift does not require boost
             tc.variables["ARROW_THRIFT_USE_SHARED"] = bool(self.dependencies["thrift"].options.shared)
         tc.variables["ARROW_USE_OPENSSL"] = self.options.with_openssl
@@ -444,28 +450,53 @@ def _patch_sources(self):
 
     def build(self):
         self._patch_sources()
-        cmake =CMake(self)
+        cmake = CMake(self)
         cmake.configure(build_script_folder=os.path.join(self.source_folder, "cpp"))
         cmake.build()
 
     def package(self):
         copy(self, pattern="LICENSE.txt", dst=os.path.join(self.package_folder, "licenses"), src=self.source_folder)
         copy(self, pattern="NOTICE.txt", dst=os.path.join(self.package_folder, "licenses"), src=self.source_folder)
-        cmake =CMake(self)
+        cmake = CMake(self)
         cmake.install()
 
         rmdir(self, os.path.join(self.package_folder, "lib", "cmake"))
         rmdir(self, os.path.join(self.package_folder, "lib", "pkgconfig"))
         rmdir(self, os.path.join(self.package_folder, "share"))
 
+        cmake_suffix = "shared" if self.options.shared else "static"
+
+        alias_map = { f"Arrow::arrow_{cmake_suffix}": f"arrow::arrow_{cmake_suffix}" }
+
+        if self.options.parquet:
+            alias_map[f"Parquet::parquet_{cmake_suffix}"] = f"arrow::parquet_{cmake_suffix}"
+
+        if self.options.get_safe("substrait"):
+            alias_map[f"Arrow::arrow_substrait_{cmake_suffix}"] = f"arrow::arrow_substrait_{cmake_suffix}"
+
+        if self.options.acero:
+            alias_map[f"Arrow::arrow_acero_{cmake_suffix}"] = f"arrow::arrow_acero_{cmake_suffix}"
+
+        if self.options.gandiva:
+            alias_map[f"Gandiva::gandiva_{cmake_suffix}"] = f"arrow::gandiva_{cmake_suffix}"
+
+        if self.options.with_flight_rpc:
+            alias_map[f"ArrowFlight::arrow_flight_sql_{cmake_suffix}"] = f"arrow::arrow_flight_sql_{cmake_suffix}"
+
+    @property
+    def _module_subfolder(self):
+        return os.path.join("lib", "cmake")
+
     def package_info(self):
         # FIXME: fix CMake targets of components
 
         self.cpp_info.set_property("cmake_file_name", "Arrow")
 
         suffix = "_static" if is_msvc(self) and not self.options.shared else ""
+        cmake_suffix = "shared" if self.options.shared else "static"
 
         self.cpp_info.components["libarrow"].set_property("pkg_config_name", "arrow")
+        self.cpp_info.components["libarrow"].set_property("cmake_target_name", f"Arrow::arrow_{cmake_suffix}")
         self.cpp_info.components["libarrow"].libs = [f"arrow{suffix}"]
         if not self.options.shared:
             self.cpp_info.components["libarrow"].defines = ["ARROW_STATIC"]
@@ -474,6 +505,7 @@ def package_info(self):
 
         if self.options.parquet:
             self.cpp_info.components["libparquet"].set_property("pkg_config_name", "parquet")
+            self.cpp_info.components["libparquet"].set_property("cmake_target_name", f"Parquet::parquet_{cmake_suffix}")
             self.cpp_info.components["libparquet"].libs = [f"parquet{suffix}"]
             self.cpp_info.components["libparquet"].requires = ["libarrow"]
             if not self.options.shared:
@@ -481,6 +513,7 @@ def package_info(self):
 
         if self.options.get_safe("substrait"):
             self.cpp_info.components["libarrow_substrait"].set_property("pkg_config_name", "arrow_substrait")
+            self.cpp_info.components["libarrow_substrait"].set_property("cmake_target_name", f"Arrow::arrow_substrait_{cmake_suffix}")
             self.cpp_info.components["libarrow_substrait"].libs = [f"arrow_substrait{suffix}"]
             self.cpp_info.components["libarrow_substrait"].requires = ["libparquet", "dataset"]
 
@@ -488,6 +521,8 @@ def package_info(self):
         del self.options.plasma
 
         if self.options.acero:
+            self.cpp_info.components["libacero"].set_property("pkg_config_name", "acero")
+            self.cpp_info.components["libacero"].set_property("cmake_target_name", f"Acero::arrow_acero_{cmake_suffix}")
             self.cpp_info.components["libacero"].libs = [f"arrow_acero{suffix}"]
             self.cpp_info.components["libacero"].names["cmake_find_package"] = "acero"
             self.cpp_info.components["libacero"].names["cmake_find_package_multi"] = "acero"
@@ -496,6 +531,7 @@ def package_info(self):
 
         if self.options.gandiva:
             self.cpp_info.components["libgandiva"].set_property("pkg_config_name", "gandiva")
+            self.cpp_info.components["libgandiva"].set_property("cmake_target_name", f"Gandiva::gandiva_{cmake_suffix}")
             self.cpp_info.components["libgandiva"].libs = [f"gandiva{suffix}"]
             self.cpp_info.components["libgandiva"].requires = ["libarrow"]
             if not self.options.shared:
@@ -503,11 +539,16 @@ def package_info(self):
 
         if self.options.with_flight_rpc:
             self.cpp_info.components["libarrow_flight"].set_property("pkg_config_name", "flight_rpc")
+            self.cpp_info.components["libarrow_flight"].set_property("cmake_target_name", f"ArrowFlight::arrow_flight_{cmake_suffix}")
             self.cpp_info.components["libarrow_flight"].libs = [f"arrow_flight{suffix}"]
             self.cpp_info.components["libarrow_flight"].requires = ["libarrow"]
+            # https://github.com/apache/arrow/pull/43137#pullrequestreview-2267476893
+            if Version(self.version) >= "18.0.0" and self.options.with_openssl:
+                self.cpp_info.components["libarrow_flight"].requires.append("openssl::openssl")
 
         if self.options.get_safe("with_flight_sql"):
             self.cpp_info.components["libarrow_flight_sql"].set_property("pkg_config_name", "flight_sql")
+            self.cpp_info.components["libarrow_flight_sql"].set_property("cmake_target_name", f"ArrowFlightSql::arrow_flight_sql_{cmake_suffix}")
             self.cpp_info.components["libarrow_flight_sql"].libs = [f"arrow_flight_sql{suffix}"]
             self.cpp_info.components["libarrow_flight_sql"].requires = ["libarrow", "libarrow_flight"]
 
@@ -560,7 +601,8 @@ def package_info(self):
         if self._requires_rapidjson():
             self.cpp_info.components["libarrow"].requires.append("rapidjson::rapidjson")
         if self.options.with_s3:
-            self.cpp_info.components["libarrow"].requires.append("aws-sdk-cpp::s3")
+            # https://github.com/apache/arrow/blob/6b268f62a8a172249ef35f093009c740c32e1f36/cpp/src/arrow/CMakeLists.txt#L98
+            self.cpp_info.components["libarrow"].requires.extend([f"aws-sdk-cpp::{x}" for x in ["cognito-identity", "core", "identity-management", "s3", "sts"]])
         if self.options.get_safe("with_gcs"):
             self.cpp_info.components["libarrow"].requires.append("google-cloud-cpp::storage")
         if self.options.with_orc:
@@ -581,32 +623,7 @@ def package_info(self):
             self.cpp_info.components["libarrow"].requires.append("zlib::zlib")
         if self.options.with_zstd:
             self.cpp_info.components["libarrow"].requires.append("zstd::zstd")
-        if self.options.with_boost:
-            self.cpp_info.components["libarrow"].requires.append("boost::boost")
         if self.options.with_grpc:
             self.cpp_info.components["libarrow"].requires.append("grpc::grpc")
         if self.options.with_flight_rpc:
             self.cpp_info.components["libarrow_flight"].requires.append("protobuf::protobuf")
-
-        # TODO: to remove in conan v2
-        self.cpp_info.filenames["cmake_find_package"] = "Arrow"
-        self.cpp_info.filenames["cmake_find_package_multi"] = "Arrow"
-        self.cpp_info.components["libarrow"].names["cmake_find_package"] = "arrow"
-        self.cpp_info.components["libarrow"].names["cmake_find_package_multi"] = "arrow"
-        if self.options.parquet:
-            self.cpp_info.components["libparquet"].names["cmake_find_package"] = "parquet"
-            self.cpp_info.components["libparquet"].names["cmake_find_package_multi"] = "parquet"
-        if self.options.get_safe("substrait"):
-            self.cpp_info.components["libarrow_substrait"].names["cmake_find_package"] = "arrow_substrait"
-            self.cpp_info.components["libarrow_substrait"].names["cmake_find_package_multi"] = "arrow_substrait"
-        if self.options.gandiva:
-            self.cpp_info.components["libgandiva"].names["cmake_find_package"] = "gandiva"
-            self.cpp_info.components["libgandiva"].names["cmake_find_package_multi"] = "gandiva"
-        if self.options.with_flight_rpc:
-            self.cpp_info.components["libarrow_flight"].names["cmake_find_package"] = "flight_rpc"
-            self.cpp_info.components["libarrow_flight"].names["cmake_find_package_multi"] = "flight_rpc"
-        if self.options.get_safe("with_flight_sql"):
-            self.cpp_info.components["libarrow_flight_sql"].names["cmake_find_package"] = "flight_sql"
-            self.cpp_info.components["libarrow_flight_sql"].names["cmake_find_package_multi"] = "flight_sql"
-        if self.options.cli and (self.options.with_cuda or self.options.with_flight_rpc or self.options.parquet):
-            self.env_info.PATH.append(os.path.join(self.package_folder, "bin"))
diff --git a/ci/conan/all/patches/1.0.0-0003-fix-shared-msvc.patch b/ci/conan/all/patches/1.0.0-0003-fix-shared-msvc.patch
deleted file mode 100644
index 45210d1b8cc51..0000000000000
--- a/ci/conan/all/patches/1.0.0-0003-fix-shared-msvc.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
---- cpp/src/arrow/CMakeLists.txt
-+++ cpp/src/arrow/CMakeLists.txt
-@@ -490,6 +490,10 @@
-   target_compile_definitions(arrow_static PUBLIC ARROW_STATIC)
- endif()
- 
-+if(ARROW_BUILD_SHARED AND WIN32)
-+target_compile_definitions(arrow_shared PRIVATE ARROW_EXPORTING)
-+endif()
-+
- if(ARROW_WITH_BACKTRACE)
-   find_package(Backtrace)
- 
diff --git a/ci/conan/all/patches/1.0.0-0005-fix-make12-namespace.patch b/ci/conan/all/patches/1.0.0-0005-fix-make12-namespace.patch
deleted file mode 100644
index 199804bff00ab..0000000000000
--- a/ci/conan/all/patches/1.0.0-0005-fix-make12-namespace.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h
-index 02a4909..2b168d2 100644
---- a/cpp/src/arrow/vendored/datetime/date.h
-+++ b/cpp/src/arrow/vendored/datetime/date.h
-@@ -5152,7 +5152,7 @@ to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-                     if (modified == CharT{})
- #endif
-                     {
--                        auto h = *fmt == CharT{'I'} ? make12(hms.hours()) : hms.hours();
-+                        auto h = *fmt == CharT{'I'} ? arrow_vendored::date::make12(hms.hours()) : hms.hours();
-                         if (h < hours{10})
-                             os << CharT{'0'};
-                         os << h.count();
-@@ -5366,7 +5366,7 @@ to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-                     save_ostream<CharT, Traits> _(os);
-                     os.fill('0');
-                     os.width(2);
--                    os << make12(tod.hours()).count() << CharT{':'};
-+                    os << arrow_vendored::date::make12(tod.hours()).count() << CharT{':'};
-                     os.width(2);
-                     os << tod.minutes().count() << CharT{':'};
-                     os.width(2);
diff --git a/ci/conan/all/patches/1.0.0-0006-fix-cmake.patch b/ci/conan/all/patches/1.0.0-0006-fix-cmake.patch
deleted file mode 100644
index 3ecd0bf9f3968..0000000000000
--- a/ci/conan/all/patches/1.0.0-0006-fix-cmake.patch
+++ /dev/null
@@ -1,355 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
-index 300f043..0127a7a 100644
---- a/cpp/CMakeLists.txt
-+++ b/cpp/CMakeLists.txt
-@@ -654,7 +654,7 @@ endif()
- 
- if(ARROW_WITH_BROTLI)
-   # Order is important for static linking
--  set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon)
-+  set(ARROW_BROTLI_LIBS brotli::brotlienc brotli::brotlidec brotli::brotlicommon)
-   list(APPEND ARROW_LINK_LIBS ${ARROW_BROTLI_LIBS})
-   list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS})
- endif()
-@@ -664,7 +664,7 @@ if(ARROW_WITH_BZ2)
- endif()
- 
- if(ARROW_WITH_LZ4)
--  list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4)
-+  list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4_static)
- endif()
- 
- if(ARROW_WITH_SNAPPY)
-@@ -800,8 +800,11 @@ endif()
- 
- if(ARROW_MIMALLOC)
-   add_definitions(-DARROW_MIMALLOC)
--  list(APPEND ARROW_LINK_LIBS mimalloc::mimalloc)
--  list(APPEND ARROW_STATIC_LINK_LIBS mimalloc::mimalloc)
-+  if (TARGET mimalloc-static)
-+    list(APPEND ARROW_LINK_LIBS mimalloc-static)
-+  else()
-+    list(APPEND ARROW_STATIC_LINK_LIBS mimalloc)
-+  endif()
- endif()
- 
- # ----------------------------------------------------------------------
-diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
-index eb10ebe..9c81017 100644
---- a/cpp/cmake_modules/BuildUtils.cmake
-+++ b/cpp/cmake_modules/BuildUtils.cmake
-@@ -165,10 +165,10 @@ function(create_merged_static_lib output_target)
-     set(ar_script_path ${CMAKE_BINARY_DIR}/${ARG_NAME}.ar)
- 
-     file(WRITE ${ar_script_path}.in "CREATE ${output_lib_path}\n")
--    file(APPEND ${ar_script_path}.in "ADDLIB $<TARGET_FILE:${ARG_ROOT}>\n")
-+    file(APPEND ${ar_script_path}.in "ADDLIB $<TARGET_LINKER_FILE:${ARG_ROOT}>\n")
- 
-     foreach(lib ${ARG_TO_MERGE})
--      file(APPEND ${ar_script_path}.in "ADDLIB $<TARGET_FILE:${lib}>\n")
-+      file(APPEND ${ar_script_path}.in "ADDLIB $<TARGET_LINKER_FILE:${lib}>\n")
-     endforeach()
- 
-     file(APPEND ${ar_script_path}.in "SAVE\nEND\n")
-diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-index 807e2b9..016c8db 100644
---- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
-+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-@@ -154,16 +154,7 @@ macro(build_dependency DEPENDENCY_NAME)
- endmacro()
- 
- macro(resolve_dependency DEPENDENCY_NAME)
--  if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO")
--    find_package(${DEPENDENCY_NAME} MODULE)
--    if(NOT ${${DEPENDENCY_NAME}_FOUND})
--      build_dependency(${DEPENDENCY_NAME})
--    endif()
--  elseif(${DEPENDENCY_NAME}_SOURCE STREQUAL "BUNDLED")
--    build_dependency(${DEPENDENCY_NAME})
--  elseif(${DEPENDENCY_NAME}_SOURCE STREQUAL "SYSTEM")
--    find_package(${DEPENDENCY_NAME} REQUIRED)
--  endif()
-+  find_package(${DEPENDENCY_NAME} REQUIRED)
- endmacro()
- 
- macro(resolve_dependency_with_version DEPENDENCY_NAME REQUIRED_VERSION)
-@@ -765,6 +756,7 @@ endif()
- # - Tests need Boost at runtime.
- # - S3FS and Flight benchmarks need Boost at runtime.
- if(ARROW_BUILD_INTEGRATION
-+   OR ARROW_BOOST_REQUIRED
-    OR ARROW_BUILD_TESTS
-    OR ARROW_GANDIVA
-    OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS)
-@@ -785,7 +777,7 @@ if(ARROW_BOOST_REQUIRED)
-   elseif(BOOST_SOURCE STREQUAL "BUNDLED")
-     build_boost()
-   elseif(BOOST_SOURCE STREQUAL "SYSTEM")
--    find_package(BoostAlt ${ARROW_BOOST_REQUIRED_VERSION} REQUIRED)
-+    find_package(Boost ${ARROW_BOOST_REQUIRED_VERSION} REQUIRED)
-   endif()
- 
-   if(TARGET Boost::system)
-@@ -936,11 +928,11 @@ macro(build_brotli)
- endmacro()
- 
- if(ARROW_WITH_BROTLI)
--  resolve_dependency(Brotli)
-+  resolve_dependency(brotli)
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon
-+  get_target_property(BROTLI_INCLUDE_DIR brotli::brotlicommon
-                       INTERFACE_INCLUDE_DIRECTORIES)
--  include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
-+  include_directories(SYSTEM ${brotli_INCLUDE_DIR})
- endif()
- 
- if(PARQUET_REQUIRE_ENCRYPTION AND NOT ARROW_PARQUET)
-@@ -1146,9 +1138,10 @@ if(ARROW_NEED_GFLAGS)
-     endif()
-   endif()
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
-+  include_directories(SYSTEM ${gflags_INCLUDE_DIR})
-+  set(GFLAGS_LIBRARIES ${gflags_LIBRARIES})
- 
--  if(NOT TARGET ${GFLAGS_LIBRARIES})
-+  if(0)
-     if(TARGET gflags-shared)
-       set(GFLAGS_LIBRARIES gflags-shared)
-     elseif(TARGET gflags_shared)
-@@ -1237,12 +1230,13 @@ endmacro()
- if(ARROW_WITH_THRIFT)
-   # We already may have looked for Thrift earlier, when considering whether
-   # to build Boost, so don't look again if already found.
--  if(NOT Thrift_FOUND AND NOT THRIFT_FOUND)
-+  if(0)
-     # Thrift c++ code generated by 0.13 requires 0.11 or greater
-     resolve_dependency_with_version(Thrift 0.11.0)
-   endif()
-+  find_package(Thrift CONFIG REQUIRED)
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
-+  include_directories(SYSTEM ${Thrift_INCLUDE_DIR})
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -1407,6 +1401,7 @@ endif()
- # jemalloc - Unix-only high-performance allocator
- 
- if(ARROW_JEMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) jemalloc from source")
-   # We only use a vendored jemalloc as we want to control its version.
-   # Also our build of jemalloc is specially prefixed so that it will not
-@@ -1465,12 +1460,18 @@ if(ARROW_JEMALLOC)
-   add_dependencies(jemalloc::jemalloc jemalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc)
-+else()
-+  find_package(jemalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${jemalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${jemalloc_LIBRARIES_TARGETS} )
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
- # mimalloc - Cross-platform high-performance allocator, from Microsoft
- 
- if(ARROW_MIMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) mimalloc from source")
-   # We only use a vendored mimalloc as we want to control its build options.
- 
-@@ -1518,6 +1519,11 @@ if(ARROW_MIMALLOC)
-   add_dependencies(toolchain mimalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc)
-+else()
-+  find_package(mimalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${mimalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${mimalloc_LIBRARIES_TARGETS} )
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -1918,11 +1924,16 @@ macro(build_lz4)
- endmacro()
- 
- if(ARROW_WITH_LZ4)
--  resolve_dependency(Lz4)
-+  resolve_dependency(lz4)
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
--  include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
-+  if(TARGET LZ4::lz4_static)
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_static INTERFACE_INCLUDE_DIRECTORIES)
-+  else()
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_shared INTERFACE_INCLUDE_DIRECTORIES)
-+  endif()
-+  include_directories(SYSTEM ${lz4_INCLUDE_DIR})
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${lz4_LIBRARIES_TARGETS} )
- endif()
- 
- macro(build_zstd)
-@@ -2037,10 +2048,10 @@ macro(build_re2)
- endmacro()
- 
- if(ARROW_GANDIVA)
--  resolve_dependency(RE2)
-+  resolve_dependency(re2)
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(RE2_INCLUDE_DIR RE2::re2 INTERFACE_INCLUDE_DIRECTORIES)
-+  get_target_property(RE2_INCLUDE_DIR re2::re2 INTERFACE_INCLUDE_DIRECTORIES)
-   include_directories(SYSTEM ${RE2_INCLUDE_DIR})
- endif()
- 
-@@ -2480,17 +2491,24 @@ if(ARROW_WITH_GRPC)
-   endif()
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(GRPC_INCLUDE_DIR gRPC::grpc INTERFACE_INCLUDE_DIRECTORIES)
-+  # get_target_property(GRPC_INCLUDE_DIR gRPC::grpc INTERFACE_INCLUDE_DIRECTORIES)
-+  if(grpc_INCLUDE_DIRS_RELEASE)
-+    set(GRPC_INCLUDE_DIR ${grpc_INCLUDE_DIRS_RELEASE})
-+  elseif(grpc_INCLUDE_DIRS_DEBUG)
-+    set(GRPC_INCLUDE_DIR ${grpc_INCLUDE_DIRS_DEBUG})
-+  endif()
-   include_directories(SYSTEM ${GRPC_INCLUDE_DIR})
-+  include_directories(SYSTEM ${absl_INCLUDE_DIR})
-+  include_directories(SYSTEM ${protobuf_INCLUDE_DIR})
- 
-   if(GRPC_VENDORED)
-     set(GRPCPP_PP_INCLUDE TRUE)
-   else()
-     # grpc++ headers may reside in ${GRPC_INCLUDE_DIR}/grpc++ or ${GRPC_INCLUDE_DIR}/grpcpp
-     # depending on the gRPC version.
--    if(EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h")
-+    if(EXISTS ${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE TRUE)
--    elseif(EXISTS "${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h")
-+    elseif(EXISTS ${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE FALSE)
-     else()
-       message(FATAL_ERROR "Cannot find grpc++ headers in ${GRPC_INCLUDE_DIR}")
-diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
-index 5797a78..da6bd4d 100644
---- a/cpp/src/arrow/CMakeLists.txt
-+++ b/cpp/src/arrow/CMakeLists.txt
-@@ -292,10 +292,15 @@ set(ARROW_TESTING_SRCS
- 
- set(_allocator_dependencies "") # Empty list
- if(ARROW_JEMALLOC)
--  list(APPEND _allocator_dependencies jemalloc_ep)
-+  list(APPEND _allocator_dependencies jemalloc::jemalloc)
- endif()
-+
- if(ARROW_MIMALLOC)
--  list(APPEND _allocator_dependencies mimalloc_ep)
-+  if (TARGET mimalloc-static)
-+    list(APPEND _allocator_dependencies mimalloc-static)
-+  else()
-+    list(APPEND _allocator_dependencies mimalloc)
-+  endif()
- endif()
- 
- if(_allocator_dependencies)
-diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
-index 784bf7b..8f005a5 100644
---- a/cpp/src/arrow/memory_pool.cc
-+++ b/cpp/src/arrow/memory_pool.cc
-@@ -31,7 +31,7 @@
- // Needed to support jemalloc 3 and 4
- #define JEMALLOC_MANGLE
- // Explicitly link to our version of jemalloc
--#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
-+#include "jemalloc/jemalloc.h"
- #endif
- 
- #ifdef ARROW_MIMALLOC
-diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
-index 85e8db6..cd70c63 100644
---- a/cpp/src/gandiva/CMakeLists.txt
-+++ b/cpp/src/gandiva/CMakeLists.txt
-@@ -25,7 +25,7 @@ add_custom_target(gandiva-benchmarks)
- 
- add_dependencies(gandiva-all gandiva gandiva-tests gandiva-benchmarks)
- 
--find_package(LLVMAlt REQUIRED)
-+find_package(LLVM REQUIRED)
- 
- if(LLVM_VERSION_MAJOR LESS "10")
-   set(GANDIVA_CXX_STANDARD ${CMAKE_CXX_STANDARD})
-@@ -88,9 +88,16 @@ set(SRC_FILES
-     random_generator_holder.cc
-     ${GANDIVA_PRECOMPILED_CC_PATH})
- 
--set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared LLVM::LLVM_INTERFACE RE2::re2)
- 
--set(GANDIVA_STATIC_LINK_LIBS arrow_static LLVM::LLVM_INTERFACE RE2::re2)
-+  function(get_all_targets var)
-+    set(targets)
-+    get_all_targets_recursive(targets ${CMAKE_CURRENT_SOURCE_DIR})
-+    set(${var} ${targets} PARENT_SCOPE)
-+endfunction()
-+
-+set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared llvm-core::llvm-core re2::re2)
-+
-+set(GANDIVA_STATIC_LINK_LIBS arrow_static llvm-core::llvm-core re2::re2)
- 
- if(ARROW_GANDIVA_STATIC_LIBSTDCPP
-    AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX))
-@@ -131,7 +138,7 @@ add_arrow_lib(gandiva
-               arrow_dependencies
-               precompiled
-               EXTRA_INCLUDES
--              $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
-+              $<TARGET_PROPERTY:llvm-core::llvm-core,INTERFACE_INCLUDE_DIRECTORIES>
-               SHARED_LINK_FLAGS
-               ${GANDIVA_SHARED_LINK_FLAGS}
-               SHARED_LINK_LIBS
-@@ -203,7 +210,7 @@ endfunction()
- 
- set(GANDIVA_INTERNALS_TEST_ARGUMENTS)
- if(WIN32)
--  list(APPEND GANDIVA_INTERNALS_TEST_ARGUMENTS EXTRA_LINK_LIBS LLVM::LLVM_INTERFACE)
-+  list(APPEND GANDIVA_INTERNALS_TEST_ARGUMENTS EXTRA_LINK_LIBS llvm-core::llvm-core)
- endif()
- add_gandiva_test(internals-test
-                  SOURCES
-@@ -225,9 +232,9 @@ add_gandiva_test(internals-test
-                  decimal_type_util_test.cc
-                  random_generator_holder_test.cc
-                  EXTRA_DEPENDENCIES
--                 LLVM::LLVM_INTERFACE
-+                 llvm-core::llvm-core
-                  EXTRA_INCLUDES
--                 $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
-+                 $<TARGET_PROPERTY:llvm-core::llvm-core,INTERFACE_INCLUDE_DIRECTORIES>
-                  ${GANDIVA_INTERNALS_TEST_ARGUMENTS})
- 
- if(ARROW_GANDIVA_JAVA)
diff --git a/ci/conan/all/patches/11.0.0-0001-fix-cmake.patch b/ci/conan/all/patches/11.0.0-0001-fix-cmake.patch
new file mode 100644
index 0000000000000..37f36f99a0c33
--- /dev/null
+++ b/ci/conan/all/patches/11.0.0-0001-fix-cmake.patch
@@ -0,0 +1,64 @@
+MIT License
+
+Copyright (c) 2019 Conan.io
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+diff --git a/cpp/cmake_modules/FindThriftAlt.cmake b/cpp/cmake_modules/FindThriftAlt.cmake
+index f3e49021d..95177c2a6 100644
+--- a/cpp/cmake_modules/FindThriftAlt.cmake
++++ b/cpp/cmake_modules/FindThriftAlt.cmake
+@@ -45,22 +45,21 @@ endif()
+ #   * https://github.com/apache/thrift/pull/2725
+ #   * https://github.com/apache/thrift/pull/2726
+ #   * https://github.com/conda-forge/thrift-cpp-feedstock/issues/68
+-if(NOT WIN32)
+-  set(find_package_args "")
+-  if(ThriftAlt_FIND_VERSION)
+-    list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
+-  endif()
+-  if(ThriftAlt_FIND_QUIETLY)
+-    list(APPEND find_package_args QUIET)
+-  endif()
+-  find_package(Thrift ${find_package_args})
+-  if(Thrift_FOUND)
+-    set(ThriftAlt_FOUND TRUE)
+-    add_executable(thrift::compiler IMPORTED)
+-    set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
+-                                                      "${THRIFT_COMPILER}")
+-    return()
+-  endif()
++
++set(find_package_args "")
++if(ThriftAlt_FIND_VERSION)
++  list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
++endif()
++if(ThriftAlt_FIND_QUIETLY)
++  list(APPEND find_package_args QUIET)
++endif()
++find_package(Thrift ${find_package_args})
++if(Thrift_FOUND)
++  set(ThriftAlt_FOUND TRUE)
++  add_executable(thrift::compiler IMPORTED)
++  set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
++                                                    "${THRIFT_COMPILER}")
++  return()
+ endif()
+
+ function(extract_thrift_version)
diff --git a/ci/conan/all/patches/16.0.0-0001-fix-cmake.patch b/ci/conan/all/patches/16.0.0-0001-fix-cmake.patch
new file mode 100644
index 0000000000000..6077237139d49
--- /dev/null
+++ b/ci/conan/all/patches/16.0.0-0001-fix-cmake.patch
@@ -0,0 +1,84 @@
+MIT License
+
+Copyright (c) 2019 Conan.io
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+diff --git a/cpp/cmake_modules/FindThriftAlt.cmake b/cpp/cmake_modules/FindThriftAlt.cmake
+index f3e49021d..3e63f1edf 100644
+--- a/cpp/cmake_modules/FindThriftAlt.cmake
++++ b/cpp/cmake_modules/FindThriftAlt.cmake
+@@ -45,23 +45,23 @@ endif()
+ #   * https://github.com/apache/thrift/pull/2725
+ #   * https://github.com/apache/thrift/pull/2726
+ #   * https://github.com/conda-forge/thrift-cpp-feedstock/issues/68
+-if(NOT WIN32)
+-  set(find_package_args "")
+-  if(ThriftAlt_FIND_VERSION)
+-    list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
+-  endif()
+-  if(ThriftAlt_FIND_QUIETLY)
+-    list(APPEND find_package_args QUIET)
+-  endif()
+-  find_package(Thrift ${find_package_args})
+-  if(Thrift_FOUND)
+-    set(ThriftAlt_FOUND TRUE)
+-    add_executable(thrift::compiler IMPORTED)
+-    set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
+-                                                      "${THRIFT_COMPILER}")
+-    return()
+-  endif()
++
++set(find_package_args "")
++if(ThriftAlt_FIND_VERSION)
++  list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
++endif()
++if(ThriftAlt_FIND_QUIETLY)
++  list(APPEND find_package_args QUIET)
+ endif()
++find_package(Thrift ${find_package_args})
++if(Thrift_FOUND)
++  set(ThriftAlt_FOUND TRUE)
++  add_executable(thrift::compiler IMPORTED)
++  set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
++                                                    "${THRIFT_COMPILER}")
++  return()
++endif()
++
+
+ function(extract_thrift_version)
+   if(ThriftAlt_INCLUDE_DIR)
+diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
+index 93f2e72d8..e00f73f7d 100644
+--- a/cpp/src/parquet/CMakeLists.txt
++++ b/cpp/src/parquet/CMakeLists.txt
+@@ -262,11 +262,11 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY)
+
+   # These are libraries that we will link privately with parquet_shared (as they
+   # do not need to be linked transitively by other linkers)
+-  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift)
++  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS Boost::headers thrift::thrift)
+
+   # Link publicly with parquet_static (because internal users need to
+   # transitively link all dependencies)
+-  list(APPEND PARQUET_STATIC_LINK_LIBS thrift::thrift)
++  list(APPEND PARQUET_STATIC_LINK_LIBS Boost::headers thrift::thrift)
+   if(NOT THRIFT_VENDORED)
+     list(APPEND PARQUET_STATIC_INSTALL_INTERFACE_LIBS thrift::thrift)
+   endif()
diff --git a/ci/conan/all/patches/18.0.0-0001-fix-cmake.patch b/ci/conan/all/patches/18.0.0-0001-fix-cmake.patch
new file mode 100644
index 0000000000000..9abff332e4b6d
--- /dev/null
+++ b/ci/conan/all/patches/18.0.0-0001-fix-cmake.patch
@@ -0,0 +1,81 @@
+MIT License
+
+Copyright (c) 2019 Conan.io
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+diff --git a/cpp/cmake_modules/FindThriftAlt.cmake b/cpp/cmake_modules/FindThriftAlt.cmake
+index 98a706d..edf195e 100644
+--- a/cpp/cmake_modules/FindThriftAlt.cmake
++++ b/cpp/cmake_modules/FindThriftAlt.cmake
+@@ -45,22 +45,20 @@ endif()
+ #   * https://github.com/apache/thrift/pull/2725
+ #   * https://github.com/apache/thrift/pull/2726
+ #   * https://github.com/conda-forge/thrift-cpp-feedstock/issues/68
+-if(NOT WIN32)
+-  set(find_package_args "")
+-  if(ThriftAlt_FIND_VERSION)
+-    list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
+-  endif()
+-  if(ThriftAlt_FIND_QUIETLY)
+-    list(APPEND find_package_args QUIET)
+-  endif()
+-  find_package(Thrift ${find_package_args})
+-  if(Thrift_FOUND)
+-    set(ThriftAlt_FOUND TRUE)
+-    add_executable(thrift::compiler IMPORTED)
+-    set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
+-                                                      "${THRIFT_COMPILER}")
+-    return()
+-  endif()
++set(find_package_args "")
++if(ThriftAlt_FIND_VERSION)
++  list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
++endif()
++if(ThriftAlt_FIND_QUIETLY)
++  list(APPEND find_package_args QUIET)
++endif()
++find_package(Thrift ${find_package_args})
++if(Thrift_FOUND)
++  set(ThriftAlt_FOUND TRUE)
++  add_executable(thrift::compiler IMPORTED)
++  set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
++                                                    "${THRIFT_COMPILER}")
++  return()
+ endif()
+ 
+ function(extract_thrift_version)
+diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
+index b984ef7..429fc6d 100644
+--- a/cpp/src/parquet/CMakeLists.txt
++++ b/cpp/src/parquet/CMakeLists.txt
+@@ -263,11 +263,11 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY)
+ 
+   # These are libraries that we will link privately with parquet_shared (as they
+   # do not need to be linked transitively by other linkers)
+-  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift)
++  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS Boost::headers thrift::thrift)
+ 
+   # Link publicly with parquet_static (because internal users need to
+   # transitively link all dependencies)
+-  list(APPEND PARQUET_STATIC_LINK_LIBS thrift::thrift)
++  list(APPEND PARQUET_STATIC_LINK_LIBS Boost::headers thrift::thrift)
+   if(NOT THRIFT_VENDORED)
+     list(APPEND PARQUET_STATIC_INSTALL_INTERFACE_LIBS thrift::thrift)
+   endif()
diff --git a/ci/conan/all/patches/2.0.0-0003-fix-shared-msvc.patch b/ci/conan/all/patches/2.0.0-0003-fix-shared-msvc.patch
deleted file mode 100644
index 3583e5c221707..0000000000000
--- a/ci/conan/all/patches/2.0.0-0003-fix-shared-msvc.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
---- cpp/src/arrow/CMakeLists.txt
-+++ cpp/src/arrow/CMakeLists.txt
-@@ -504,6 +504,10 @@
-   target_compile_definitions(arrow_static PUBLIC ARROW_STATIC)
- endif()
- 
-+if(ARROW_BUILD_SHARED AND WIN32)
-+target_compile_definitions(arrow_shared PRIVATE ARROW_EXPORTING)
-+endif()
-+
- if(ARROW_WITH_BACKTRACE)
-   find_package(Backtrace)
- 
diff --git a/ci/conan/all/patches/2.0.0-0005-gandiva-engine.patch b/ci/conan/all/patches/2.0.0-0005-gandiva-engine.patch
deleted file mode 100644
index 6dc0c7947a5e0..0000000000000
--- a/ci/conan/all/patches/2.0.0-0005-gandiva-engine.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
---- cpp/src/gandiva/engine.cc
-+++ cpp/src/gandiva/engine.cc
-@@ -64,6 +64,10 @@
- #include <llvm/Transforms/Utils.h>
- #include <llvm/Transforms/Vectorize.h>
- 
-+#if GANDIVA_LLVM_VERSION >= 11
-+#include <llvm/Support/Host.h>
-+#endif
-+
- #if defined(_MSC_VER)
- #pragma warning(pop)
- #endif
diff --git a/ci/conan/all/patches/2.0.0-0008-fix-cmake.patch b/ci/conan/all/patches/2.0.0-0008-fix-cmake.patch
deleted file mode 100644
index abdcf7a0fa36a..0000000000000
--- a/ci/conan/all/patches/2.0.0-0008-fix-cmake.patch
+++ /dev/null
@@ -1,295 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
-index 515e6af..7488161 100644
---- a/cpp/CMakeLists.txt
-+++ b/cpp/CMakeLists.txt
-@@ -109,7 +109,7 @@ set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
- set(ARROW_CMAKE_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
- set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
- 
--set(ARROW_LLVM_VERSIONS "10" "9" "8" "7")
-+set(ARROW_LLVM_VERSIONS "13" "12" "11" "10" "9" "8" "7")
- list(GET ARROW_LLVM_VERSIONS 0 ARROW_LLVM_VERSION_PRIMARY)
- string(REGEX
-        REPLACE "^([0-9]+)(\\..+)?" "\\1" ARROW_LLVM_VERSION_PRIMARY_MAJOR
-@@ -667,7 +667,7 @@ endif()
- 
- if(ARROW_WITH_BROTLI)
-   # Order is important for static linking
--  set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon)
-+  set(ARROW_BROTLI_LIBS brotli::brotlienc brotli::brotlidec brotli::brotlicommon)
-   list(APPEND ARROW_LINK_LIBS ${ARROW_BROTLI_LIBS})
-   list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS})
-   if(Brotli_SOURCE STREQUAL "SYSTEM")
-@@ -683,9 +683,9 @@ if(ARROW_WITH_BZ2)
- endif()
- 
- if(ARROW_WITH_LZ4)
--  list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4)
-+  list(APPEND ARROW_STATIC_LINK_LIBS lz4::lz4)
-   if(Lz4_SOURCE STREQUAL "SYSTEM")
--    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4)
-+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS lz4::lz4)
-   endif()
- endif()
- 
-@@ -842,8 +842,14 @@ endif()
- 
- if(ARROW_MIMALLOC)
-   add_definitions(-DARROW_MIMALLOC)
--  list(APPEND ARROW_LINK_LIBS mimalloc::mimalloc)
--  list(APPEND ARROW_STATIC_LINK_LIBS mimalloc::mimalloc)
-+  if (TARGET mimalloc-static)
-+    list(APPEND ARROW_LINK_LIBS mimalloc-static)
-+    list(APPEND ARROW_STATIC_LINK_LIBS mimalloc-static)
-+    else()
-+    list(APPEND ARROW_LINK_LIBS mimalloc)
-+    list(APPEND ARROW_STATIC_LINK_LIBS mimalloc)
-+  endif()
-+
- endif()
- 
- # ----------------------------------------------------------------------
-diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-index cc37a3c..8fe6db9 100644
---- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
-+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-@@ -171,6 +171,7 @@ macro(provide_find_module DEPENDENCY_NAME)
- endmacro()
- 
- macro(resolve_dependency DEPENDENCY_NAME)
-+if(0)
-   set(options)
-   set(one_value_args REQUIRED_VERSION)
-   cmake_parse_arguments(ARG
-@@ -207,6 +208,14 @@ macro(resolve_dependency DEPENDENCY_NAME)
-     provide_find_module(${DEPENDENCY_NAME})
-     list(APPEND ARROW_SYSTEM_DEPENDENCIES ${DEPENDENCY_NAME})
-   endif()
-+else()
-+  if(ARG_REQUIRED_VERSION)
-+    find_package(${DEPENDENCY_NAME} ${ARG_REQUIRED_VERSION} REQUIRED)
-+  else()
-+    find_package(${DEPENDENCY_NAME} REQUIRED)
-+  endif()
-+  list(APPEND ARROW_SYSTEM_DEPENDENCIES ${DEPENDENCY_NAME})
-+endif()
- endmacro()
- 
- # ----------------------------------------------------------------------
-@@ -826,6 +835,7 @@ endif()
- # - Tests need Boost at runtime.
- # - S3FS and Flight benchmarks need Boost at runtime.
- if(ARROW_BUILD_INTEGRATION
-+   OR ARROW_BOOST_REQUIRED
-    OR ARROW_BUILD_TESTS
-    OR ARROW_GANDIVA
-    OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS)
-@@ -846,7 +856,7 @@ if(ARROW_BOOST_REQUIRED)
-   elseif(BOOST_SOURCE STREQUAL "BUNDLED")
-     build_boost()
-   elseif(BOOST_SOURCE STREQUAL "SYSTEM")
--    find_package(BoostAlt ${ARROW_BOOST_REQUIRED_VERSION} REQUIRED)
-+    find_package(Boost ${ARROW_BOOST_REQUIRED_VERSION} REQUIRED)
-   endif()
- 
-   if(TARGET Boost::system)
-@@ -973,11 +983,11 @@ macro(build_brotli)
- endmacro()
- 
- if(ARROW_WITH_BROTLI)
--  resolve_dependency(Brotli)
-+  resolve_dependency(brotli)
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon
-+  get_target_property(BROTLI_INCLUDE_DIR brotli::brotlicommon
-                       INTERFACE_INCLUDE_DIRECTORIES)
--  include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
-+  include_directories(SYSTEM ${brotli_INCLUDE_DIR})
- endif()
- 
- if(PARQUET_REQUIRE_ENCRYPTION AND NOT ARROW_PARQUET)
-@@ -1200,9 +1210,10 @@ if(ARROW_NEED_GFLAGS)
-     endif()
-   endif()
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
-+  include_directories(SYSTEM ${gflags_INCLUDE_DIR})
-+  set(GFLAGS_LIBRARIES ${gflags_LIBRARIES})
- 
--  if(NOT TARGET ${GFLAGS_LIBRARIES})
-+  if(0)
-     if(TARGET gflags-shared)
-       set(GFLAGS_LIBRARIES gflags-shared)
-     elseif(TARGET gflags_shared)
-@@ -1291,12 +1302,13 @@ endmacro()
- if(ARROW_WITH_THRIFT)
-   # We already may have looked for Thrift earlier, when considering whether
-   # to build Boost, so don't look again if already found.
--  if(NOT Thrift_FOUND AND NOT THRIFT_FOUND)
-+  if(0)
-     # Thrift c++ code generated by 0.13 requires 0.11 or greater
-     resolve_dependency(Thrift REQUIRED_VERSION 0.11.0)
-   endif()
-+  find_package(Thrift CONFIG REQUIRED)
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
-+  include_directories(SYSTEM ${Thrift_INCLUDE_DIR})
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -1461,6 +1473,7 @@ endif()
- # jemalloc - Unix-only high-performance allocator
- 
- if(ARROW_JEMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) jemalloc from source")
-   # We only use a vendored jemalloc as we want to control its version.
-   # Also our build of jemalloc is specially prefixed so that it will not
-@@ -1519,12 +1532,18 @@ if(ARROW_JEMALLOC)
-   add_dependencies(jemalloc::jemalloc jemalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc)
-+else()
-+  find_package(jemalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${jemalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${jemalloc_LIBRARIES_TARGETS} )
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
- # mimalloc - Cross-platform high-performance allocator, from Microsoft
- 
- if(ARROW_MIMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) mimalloc from source")
-   # We only use a vendored mimalloc as we want to control its build options.
- 
-@@ -1572,6 +1591,11 @@ if(ARROW_MIMALLOC)
-   add_dependencies(toolchain mimalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc)
-+else()
-+  find_package(mimalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${mimalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${mimalloc_LIBRARIES_TARGETS} )
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -1971,11 +1995,16 @@ macro(build_lz4)
- endmacro()
- 
- if(ARROW_WITH_LZ4)
--  resolve_dependency(Lz4)
-+  resolve_dependency(lz4)
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
--  include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
-+  if(TARGET LZ4::lz4_static)
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_static INTERFACE_INCLUDE_DIRECTORIES)
-+  else()
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_shared INTERFACE_INCLUDE_DIRECTORIES)
-+  endif()
-+  include_directories(SYSTEM ${lz4_INCLUDE_DIR})
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${lz4_LIBRARIES_TARGETS} )
- endif()
- 
- macro(build_zstd)
-@@ -2090,10 +2119,10 @@ macro(build_re2)
- endmacro()
- 
- if(ARROW_GANDIVA)
--  resolve_dependency(RE2)
-+  resolve_dependency(re2)
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(RE2_INCLUDE_DIR RE2::re2 INTERFACE_INCLUDE_DIRECTORIES)
-+  get_target_property(RE2_INCLUDE_DIR re2::re2 INTERFACE_INCLUDE_DIRECTORIES)
-   include_directories(SYSTEM ${RE2_INCLUDE_DIR})
- endif()
- 
-@@ -2541,17 +2570,24 @@ if(ARROW_WITH_GRPC)
-   endif()
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(GRPC_INCLUDE_DIR gRPC::grpc INTERFACE_INCLUDE_DIRECTORIES)
-+  if(grpc_INCLUDE_DIRS_RELEASE)
-+    set(GRPC_INCLUDE_DIR ${grpc_INCLUDE_DIRS_RELEASE})
-+  elseif(grpc_INCLUDE_DIRS_DEBUG)
-+    set(GRPC_INCLUDE_DIR ${grpc_INCLUDE_DIRS_DEBUG})
-+  endif()
-+
-   include_directories(SYSTEM ${GRPC_INCLUDE_DIR})
-+  include_directories(SYSTEM ${absl_INCLUDE_DIR})
-+  include_directories(SYSTEM ${protobuf_INCLUDE_DIR})
- 
-   if(GRPC_VENDORED)
-     set(GRPCPP_PP_INCLUDE TRUE)
-   else()
-     # grpc++ headers may reside in ${GRPC_INCLUDE_DIR}/grpc++ or ${GRPC_INCLUDE_DIR}/grpcpp
-     # depending on the gRPC version.
--    if(EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h")
-+    if(EXISTS ${gRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE TRUE)
--    elseif(EXISTS "${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h")
-+    elseif(EXISTS ${gRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE FALSE)
-     else()
-       message(FATAL_ERROR "Cannot find grpc++ headers in ${GRPC_INCLUDE_DIR}")
-diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
-index 2751254..842fc9e 100644
---- a/cpp/src/arrow/CMakeLists.txt
-+++ b/cpp/src/arrow/CMakeLists.txt
-@@ -307,10 +307,14 @@ set(ARROW_TESTING_SRCS
- 
- set(_allocator_dependencies "") # Empty list
- if(ARROW_JEMALLOC)
--  list(APPEND _allocator_dependencies jemalloc_ep)
-+  list(APPEND _allocator_dependencies jemalloc::jemalloc)
- endif()
- if(ARROW_MIMALLOC)
--  list(APPEND _allocator_dependencies mimalloc_ep)
-+  if (TARGET mimalloc-static)
-+    list(APPEND _allocator_dependencies mimalloc-static)
-+  else()
-+    list(APPEND _allocator_dependencies mimalloc)
-+  endif()
- endif()
- 
- if(_allocator_dependencies)
-diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
-index 784bf7b..8f005a5 100644
---- a/cpp/src/arrow/memory_pool.cc
-+++ b/cpp/src/arrow/memory_pool.cc
-@@ -31,7 +31,7 @@
- // Needed to support jemalloc 3 and 4
- #define JEMALLOC_MANGLE
- // Explicitly link to our version of jemalloc
--#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
-+#include "jemalloc/jemalloc.h"
- #endif
- 
- #ifdef ARROW_MIMALLOC
diff --git a/ci/conan/all/patches/7.0.0-0006-install-utils.patch b/ci/conan/all/patches/7.0.0-0006-install-utils.patch
deleted file mode 100644
index 7674174c8e254..0000000000000
--- a/ci/conan/all/patches/7.0.0-0006-install-utils.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
-index 495018e..f6cee6f 100644
---- a/cpp/src/arrow/ipc/CMakeLists.txt
-+++ b/cpp/src/arrow/ipc/CMakeLists.txt
-@@ -61,8 +61,12 @@ endif()
- if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION)
-   add_executable(arrow-file-to-stream file_to_stream.cc)
-   target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB})
-+  install(TARGETS arrow-file-to-stream ${INSTALL_IS_OPTIONAL}
-+    DESTINATION ${CMAKE_INSTALL_BINDIR})
-   add_executable(arrow-stream-to-file stream_to_file.cc)
-   target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB})
-+  install(TARGETS arrow-stream-to-file ${INSTALL_IS_OPTIONAL}
-+    DESTINATION ${CMAKE_INSTALL_BINDIR})
- 
-   if(ARROW_BUILD_INTEGRATION)
-     add_dependencies(arrow-integration arrow-file-to-stream)
diff --git a/ci/conan/all/patches/7.0.0-0007-fix-cmake.patch b/ci/conan/all/patches/7.0.0-0007-fix-cmake.patch
deleted file mode 100644
index eb2acb1523fc3..0000000000000
--- a/ci/conan/all/patches/7.0.0-0007-fix-cmake.patch
+++ /dev/null
@@ -1,369 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
-index 2d7baf1..dff5b1a 100644
---- a/cpp/CMakeLists.txt
-+++ b/cpp/CMakeLists.txt
-@@ -699,7 +699,7 @@ endif()
- 
- if(ARROW_WITH_BROTLI)
-   # Order is important for static linking
--  set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon)
-+  set(ARROW_BROTLI_LIBS brotli::brotlienc brotli::brotlidec brotli::brotlicommon)
-   list(APPEND ARROW_LINK_LIBS ${ARROW_BROTLI_LIBS})
-   list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS})
-   if(Brotli_SOURCE STREQUAL "SYSTEM")
-@@ -715,10 +715,17 @@ if(ARROW_WITH_BZ2)
- endif()
- 
- if(ARROW_WITH_LZ4)
--  list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4)
--  if(Lz4_SOURCE STREQUAL "SYSTEM")
--    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4)
--  endif()
-+  if (TARGET LZ4::lz4_static)
-+    list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4_static)
-+    if(Lz4_SOURCE STREQUAL "SYSTEM")
-+      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4_static)
-+    endif()
-+  else()
-+    list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4_shared)
-+    if(Lz4_SOURCE STREQUAL "SYSTEM")
-+      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4_shared)
-+    endif()
-+endif()
- endif()
- 
- if(ARROW_WITH_SNAPPY)
-@@ -907,8 +914,13 @@ endif()
- 
- if(ARROW_MIMALLOC)
-   add_definitions(-DARROW_MIMALLOC)
--  list(APPEND ARROW_LINK_LIBS mimalloc::mimalloc)
--  list(APPEND ARROW_STATIC_LINK_LIBS mimalloc::mimalloc)
-+  if (TARGET mimalloc-static)
-+    list(APPEND ARROW_LINK_LIBS mimalloc-static)
-+    list(APPEND ARROW_STATIC_LINK_LIBS mimalloc-static)
-+    else()
-+    list(APPEND ARROW_LINK_LIBS mimalloc)
-+    list(APPEND ARROW_STATIC_LINK_LIBS mimalloc)
-+  endif()
- endif()
- 
- # ----------------------------------------------------------------------
-diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-index bc38952..62bf314 100644
---- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
-+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-@@ -954,7 +954,7 @@ endif()
- 
- if(ARROW_BOOST_REQUIRED)
-   resolve_dependency(Boost
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_BOOST_REQUIRED_VERSION}
-@@ -965,7 +965,7 @@ if(ARROW_BOOST_REQUIRED)
-   if(TARGET Boost::system)
-     set(BOOST_SYSTEM_LIBRARY Boost::system)
-     set(BOOST_FILESYSTEM_LIBRARY Boost::filesystem)
--  elseif(BoostAlt_FOUND)
-+  elseif(Boost_FOUND)
-     set(BOOST_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY})
-     set(BOOST_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY})
-   else()
-@@ -1108,9 +1108,9 @@ macro(build_brotli)
- endmacro()
- 
- if(ARROW_WITH_BROTLI)
--  resolve_dependency(Brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
-+  resolve_dependency(brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon
-+  get_target_property(BROTLI_INCLUDE_DIR brotli::brotlicommon
-                       INTERFACE_INCLUDE_DIRECTORIES)
-   include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
- endif()
-@@ -1302,22 +1302,17 @@ endmacro()
- if(ARROW_NEED_GFLAGS)
-   set(ARROW_GFLAGS_REQUIRED_VERSION "2.1.0")
-   resolve_dependency(gflags
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_GFLAGS_REQUIRED_VERSION}
-                      IS_RUNTIME_DEPENDENCY
-                      FALSE)
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
-+  include_directories(SYSTEM ${gflags_INCLUDE_DIR})
- 
--  if(NOT TARGET ${GFLAGS_LIBRARIES})
--    if(TARGET gflags-shared)
--      set(GFLAGS_LIBRARIES gflags-shared)
--    elseif(TARGET gflags_shared)
--      set(GFLAGS_LIBRARIES gflags_shared)
--    endif()
--  endif()
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS ${gflags_LIBRARIES_TARGETS})
-+  set(GFLAGS_LIBRARIES gflags::gflags)
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -1411,9 +1406,9 @@ if(ARROW_WITH_THRIFT)
-                        thrift)
-   endif()
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
-+  include_directories(SYSTEM ${Thrift_INCLUDE_DIR})
- 
--  string(REPLACE "." ";" VERSION_LIST ${THRIFT_VERSION})
-+  string(REPLACE "." ";" VERSION_LIST ${Thrift_VERSION})
-   list(GET VERSION_LIST 0 THRIFT_VERSION_MAJOR)
-   list(GET VERSION_LIST 1 THRIFT_VERSION_MINOR)
-   list(GET VERSION_LIST 2 THRIFT_VERSION_PATCH)
-@@ -1528,6 +1523,7 @@ if(ARROW_WITH_PROTOBUF)
-     set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1")
-   endif()
-   resolve_dependency(Protobuf
-+                     USE_CONFIG
-                      REQUIRED_VERSION
-                      ${ARROW_PROTOBUF_REQUIRED_VERSION}
-                      PC_PACKAGE_NAMES
-@@ -1538,7 +1534,7 @@ if(ARROW_WITH_PROTOBUF)
-   endif()
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR})
-+  include_directories(SYSTEM ${protobuf_INCLUDE_DIR})
- 
-   if(TARGET arrow::protobuf::libprotobuf)
-     set(ARROW_PROTOBUF_LIBPROTOBUF arrow::protobuf::libprotobuf)
-@@ -1547,9 +1543,9 @@ if(ARROW_WITH_PROTOBUF)
-     if(NOT TARGET protobuf::libprotobuf)
-       add_library(protobuf::libprotobuf UNKNOWN IMPORTED)
-       set_target_properties(protobuf::libprotobuf
--                            PROPERTIES IMPORTED_LOCATION "${PROTOBUF_LIBRARY}"
-+                            PROPERTIES IMPORTED_LOCATION "${Protobuf_LIBRARY}"
-                                        INTERFACE_INCLUDE_DIRECTORIES
--                                       "${PROTOBUF_INCLUDE_DIR}")
-+                                       "${Protobuf_INCLUDE_DIR}")
-     endif()
-     set(ARROW_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
-   endif()
-@@ -1569,7 +1565,7 @@ if(ARROW_WITH_PROTOBUF)
-       set_target_properties(protobuf::libprotoc
-                             PROPERTIES IMPORTED_LOCATION "${Protobuf_PROTOC_LIBRARY}"
-                                        INTERFACE_INCLUDE_DIRECTORIES
--                                       "${PROTOBUF_INCLUDE_DIR}")
-+                                       "${Protobuf_INCLUDE_DIR}")
-     endif()
-     set(ARROW_PROTOBUF_LIBPROTOC protobuf::libprotoc)
-   endif()
-@@ -1600,6 +1596,7 @@ endif()
- # jemalloc - Unix-only high-performance allocator
- 
- if(ARROW_JEMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) jemalloc from source")
-   # We only use a vendored jemalloc as we want to control its version.
-   # Also our build of jemalloc is specially prefixed so that it will not
-@@ -1665,12 +1662,18 @@ if(ARROW_JEMALLOC)
-   add_dependencies(jemalloc::jemalloc jemalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc)
-+else()
-+  find_package(jemalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${jemalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${jemalloc_LIBRARIES_TARGETS})
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
- # mimalloc - Cross-platform high-performance allocator, from Microsoft
- 
- if(ARROW_MIMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) mimalloc from source")
-   # We only use a vendored mimalloc as we want to control its build options.
- 
-@@ -1716,6 +1719,11 @@ if(ARROW_MIMALLOC)
-   add_dependencies(toolchain mimalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc)
-+else()
-+  find_package(mimalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${mimalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${mimalloc_LIBRARIES_TARGETS} )
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -2001,7 +2009,7 @@ endmacro()
- if(ARROW_WITH_RAPIDJSON)
-   set(ARROW_RAPIDJSON_REQUIRED_VERSION "1.1.0")
-   resolve_dependency(RapidJSON
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_RAPIDJSON_REQUIRED_VERSION}
-@@ -2038,10 +2046,9 @@ endmacro()
- 
- if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE") OR (NOT ARROW_RUNTIME_SIMD_LEVEL STREQUAL "NONE"
-                                              ))
--  set(xsimd_SOURCE "BUNDLED")
-   resolve_dependency(xsimd)
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${XSIMD_INCLUDE_DIR})
-+  include_directories(SYSTEM ${xsimd_INCLUDE_DIR})
- endif()
- 
- macro(build_zlib)
-@@ -2140,10 +2147,14 @@ macro(build_lz4)
- endmacro()
- 
- if(ARROW_WITH_LZ4)
--  resolve_dependency(Lz4 PC_PACKAGE_NAMES liblz4)
-+  resolve_dependency(lz4)
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
-+  if (TARGET LZ4::lz4_static)
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_static INTERFACE_INCLUDE_DIRECTORIES)
-+  else()
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_shared INTERFACE_INCLUDE_DIRECTORIES)
-+  endif()
-   include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
- endif()
- 
-@@ -2274,7 +2285,7 @@ if(ARROW_WITH_RE2)
-   # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may
-   # include -std=c++11. It's not compatible with C source and C++
-   # source not uses C++ 11.
--  resolve_dependency(re2 HAVE_ALT TRUE)
-+  resolve_dependency(re2 USE_CONFIG TRUE)
-   if(${re2_SOURCE} STREQUAL "SYSTEM")
-     get_target_property(RE2_LIB re2::re2 IMPORTED_LOCATION)
-     string(APPEND ARROW_PC_LIBS_PRIVATE " ${RE2_LIB}")
-@@ -2337,7 +2348,7 @@ endmacro()
- if(ARROW_WITH_BZ2)
-   resolve_dependency(BZip2)
-   if(${BZip2_SOURCE} STREQUAL "SYSTEM")
--    string(APPEND ARROW_PC_LIBS_PRIVATE " ${BZIP2_LIBRARIES}")
-+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${BZip2_LIBRARIES}")
-   endif()
- 
-   if(NOT TARGET BZip2::BZip2)
-@@ -2346,7 +2357,7 @@ if(ARROW_WITH_BZ2)
-                           PROPERTIES IMPORTED_LOCATION "${BZIP2_LIBRARIES}"
-                                      INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}")
-   endif()
--  include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}")
-+  include_directories(SYSTEM "${BZip2_INCLUDE_DIR}")
- endif()
- 
- macro(build_utf8proc)
-@@ -3555,7 +3566,7 @@ if(ARROW_WITH_GRPC)
-     set(gRPC_SOURCE "${Protobuf_SOURCE}")
-   endif()
-   resolve_dependency(gRPC
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_GRPC_REQUIRED_VERSION}
-@@ -3573,9 +3584,9 @@ if(ARROW_WITH_GRPC)
-   else()
-     # grpc++ headers may reside in ${GRPC_INCLUDE_DIR}/grpc++ or ${GRPC_INCLUDE_DIR}/grpcpp
-     # depending on the gRPC version.
--    if(EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h")
-+    if(EXISTS ${gRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE TRUE)
--    elseif(EXISTS "${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h")
-+    elseif(EXISTS ${gPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE FALSE)
-     else()
-       message(FATAL_ERROR "Cannot find grpc++ headers in ${GRPC_INCLUDE_DIR}")
-@@ -4097,9 +4108,9 @@ macro(build_opentelemetry)
- endmacro()
- 
- if(ARROW_WITH_OPENTELEMETRY)
--  set(opentelemetry-cpp_SOURCE "AUTO")
-+  set(opentelemetry-cpp_SOURCE "SYSTEM")
-   resolve_dependency(opentelemetry-cpp)
--  get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api
-+  get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::opentelemetry_common
-                       INTERFACE_INCLUDE_DIRECTORIES)
-   include_directories(SYSTEM ${OPENTELEMETRY_INCLUDE_DIR})
-   message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}")
-diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
-index b984bc1..2c78cd9 100644
---- a/cpp/src/arrow/CMakeLists.txt
-+++ b/cpp/src/arrow/CMakeLists.txt
-@@ -323,10 +323,14 @@ set(ARROW_TESTING_SRCS
- 
- set(_allocator_dependencies "") # Empty list
- if(ARROW_JEMALLOC)
--  list(APPEND _allocator_dependencies jemalloc_ep)
-+  list(APPEND _allocator_dependencies jemalloc::jemalloc)
- endif()
- if(ARROW_MIMALLOC)
--  list(APPEND _allocator_dependencies mimalloc_ep)
-+  if (TARGET mimalloc-static)
-+    list(APPEND _allocator_dependencies mimalloc-static)
-+  else()
-+    list(APPEND _allocator_dependencies mimalloc)
-+  endif()
- endif()
- 
- if(_allocator_dependencies)
-diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
-index 2cf8c99..90ebb9a 100644
---- a/cpp/src/arrow/flight/CMakeLists.txt
-+++ b/cpp/src/arrow/flight/CMakeLists.txt
-@@ -17,6 +17,9 @@
- 
- add_custom_target(arrow_flight)
- 
-+# TODO: This is a temporary workaround. absl should be LINKED as TARGET.
-+include_directories(SYSTEM ${absl_INCLUDE_DIR})
-+
- arrow_install_all_headers("arrow/flight")
- 
- set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++ ${ARROW_PROTOBUF_LIBPROTOBUF})
-diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
-index 2dcfb01..0394c01 100644
---- a/cpp/src/arrow/memory_pool.cc
-+++ b/cpp/src/arrow/memory_pool.cc
-@@ -48,7 +48,7 @@
- // Needed to support jemalloc 3 and 4
- #define JEMALLOC_MANGLE
- // Explicitly link to our version of jemalloc
--#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
-+#include "jemalloc/jemalloc.h"
- #endif
- 
- #ifdef ARROW_MIMALLOC
diff --git a/ci/conan/all/patches/8.0.0-0005-install-utils.patch b/ci/conan/all/patches/8.0.0-0005-install-utils.patch
deleted file mode 100644
index 98075913ed109..0000000000000
--- a/ci/conan/all/patches/8.0.0-0005-install-utils.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
-index aba18c8..bb463d0 100644
---- a/cpp/CMakeLists.txt
-+++ b/cpp/CMakeLists.txt
-@@ -721,7 +721,7 @@ if(ARROW_WITH_BZ2)
- endif()
- 
- if(ARROW_WITH_LZ4)
--  list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4)
-+  list(APPEND ARROW_STATIC_LINK_LIBS lz4::lz4)
-   if(Lz4_SOURCE STREQUAL "SYSTEM")
-     list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4)
-   endif()
-@@ -907,8 +907,8 @@ endif()
- if(ARROW_JEMALLOC)
-   add_definitions(-DARROW_JEMALLOC)
-   add_definitions(-DARROW_JEMALLOC_INCLUDE_DIR=${JEMALLOC_INCLUDE_DIR})
--  list(APPEND ARROW_LINK_LIBS jemalloc::jemalloc)
--  list(APPEND ARROW_STATIC_LINK_LIBS jemalloc::jemalloc)
-+  list(APPEND ARROW_LINK_LIBS jemalloc)
-+  list(APPEND ARROW_STATIC_LINK_LIBS jemalloc)
- endif()
- 
- if(ARROW_MIMALLOC)
-diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
-index 495018e..3dcb35d 100644
---- a/cpp/src/arrow/ipc/CMakeLists.txt
-+++ b/cpp/src/arrow/ipc/CMakeLists.txt
-@@ -61,9 +61,13 @@ endif()
- if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION)
-   add_executable(arrow-file-to-stream file_to_stream.cc)
-   target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB})
-+  install(TARGETS arrow-file-to-stream ${INSTALL_IS_OPTIONAL}
-+    DESTINATION ${CMAKE_INSTALL_BINDIR})
-   add_executable(arrow-stream-to-file stream_to_file.cc)
-   target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB})
--
-+  install(TARGETS arrow-stream-to-file ${INSTALL_IS_OPTIONAL}
-+    DESTINATION ${CMAKE_INSTALL_BINDIR})
-+  
-   if(ARROW_BUILD_INTEGRATION)
-     add_dependencies(arrow-integration arrow-file-to-stream)
-     add_dependencies(arrow-integration arrow-stream-to-file)
diff --git a/ci/conan/all/patches/8.0.0-0006-fix-cmake.patch b/ci/conan/all/patches/8.0.0-0006-fix-cmake.patch
deleted file mode 100644
index 7be516e1b4855..0000000000000
--- a/ci/conan/all/patches/8.0.0-0006-fix-cmake.patch
+++ /dev/null
@@ -1,447 +0,0 @@
-MIT License
-
-Copyright (c) 2019 Conan.io
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
-index bb463d0..ce2d1df 100644
---- a/cpp/CMakeLists.txt
-+++ b/cpp/CMakeLists.txt
-@@ -705,7 +705,7 @@ endif()
- 
- if(ARROW_WITH_BROTLI)
-   # Order is important for static linking
--  set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon)
-+  set(ARROW_BROTLI_LIBS brotli::brotlienc brotli::brotlidec brotli::brotlicommon)
-   list(APPEND ARROW_LINK_LIBS ${ARROW_BROTLI_LIBS})
-   list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS})
-   if(Brotli_SOURCE STREQUAL "SYSTEM")
-@@ -721,11 +721,18 @@ if(ARROW_WITH_BZ2)
- endif()
- 
- if(ARROW_WITH_LZ4)
--  list(APPEND ARROW_STATIC_LINK_LIBS lz4::lz4)
--  if(Lz4_SOURCE STREQUAL "SYSTEM")
--    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4)
-+  if (TARGET LZ4::lz4_static)
-+    list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4_static)
-+    if(Lz4_SOURCE STREQUAL "SYSTEM")
-+      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4_static)
-+    endif()
-+  else()
-+    list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4_shared)
-+    if(Lz4_SOURCE STREQUAL "SYSTEM")
-+      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4_shared)
-   endif()
- endif()
-+endif()
- 
- if(ARROW_WITH_SNAPPY)
-   list(APPEND ARROW_STATIC_LINK_LIBS Snappy::snappy)
-@@ -913,8 +920,13 @@ endif()
- 
- if(ARROW_MIMALLOC)
-   add_definitions(-DARROW_MIMALLOC)
--  list(APPEND ARROW_LINK_LIBS mimalloc::mimalloc)
--  list(APPEND ARROW_STATIC_LINK_LIBS mimalloc::mimalloc)
-+  if (TARGET mimalloc-static)
-+    list(APPEND ARROW_LINK_LIBS mimalloc-static)
-+    list(APPEND ARROW_STATIC_LINK_LIBS mimalloc-static)
-+  else()
-+    list(APPEND ARROW_LINK_LIBS mimalloc)
-+    list(APPEND ARROW_STATIC_LINK_LIBS mimalloc)
-+  endif()
- endif()
- 
- # ----------------------------------------------------------------------
-diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-index f070323..16faf73 100644
---- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
-+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
-@@ -959,6 +959,7 @@ endif()
- # - Tests need Boost at runtime.
- # - S3FS and Flight benchmarks need Boost at runtime.
- if(ARROW_BUILD_INTEGRATION
-+   OR ARROW_BOOST_REQUIRED
-    OR ARROW_BUILD_TESTS
-    OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS)
-    OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS))
-@@ -975,7 +976,7 @@ endif()
- 
- if(ARROW_BOOST_REQUIRED)
-   resolve_dependency(Boost
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_BOOST_REQUIRED_VERSION}
-@@ -986,7 +987,7 @@ if(ARROW_BOOST_REQUIRED)
-   if(TARGET Boost::system)
-     set(BOOST_SYSTEM_LIBRARY Boost::system)
-     set(BOOST_FILESYSTEM_LIBRARY Boost::filesystem)
--  elseif(BoostAlt_FOUND)
-+  elseif(Boost_FOUND)
-     set(BOOST_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY})
-     set(BOOST_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY})
-   else()
-@@ -1129,9 +1130,9 @@ macro(build_brotli)
- endmacro()
- 
- if(ARROW_WITH_BROTLI)
--  resolve_dependency(Brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
-+  resolve_dependency(brotli PC_PACKAGE_NAMES libbrotlidec libbrotlienc)
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(BROTLI_INCLUDE_DIR Brotli::brotlicommon
-+  get_target_property(BROTLI_INCLUDE_DIR brotli::brotlicommon
-                       INTERFACE_INCLUDE_DIRECTORIES)
-   include_directories(SYSTEM ${BROTLI_INCLUDE_DIR})
- endif()
-@@ -1323,22 +1324,16 @@ endmacro()
- if(ARROW_NEED_GFLAGS)
-   set(ARROW_GFLAGS_REQUIRED_VERSION "2.1.0")
-   resolve_dependency(gflags
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_GFLAGS_REQUIRED_VERSION}
-                      IS_RUNTIME_DEPENDENCY
-                      FALSE)
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
--
--  if(NOT TARGET ${GFLAGS_LIBRARIES})
--    if(TARGET gflags-shared)
--      set(GFLAGS_LIBRARIES gflags-shared)
--    elseif(TARGET gflags_shared)
--      set(GFLAGS_LIBRARIES gflags_shared)
--    endif()
--  endif()
-+  include_directories(SYSTEM ${gflags_INCLUDE_DIR})
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS ${gflags_LIBRARIES_TARGETS})
-+  set(GFLAGS_LIBRARIES gflags::gflags)
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -1432,9 +1427,9 @@ if(ARROW_WITH_THRIFT)
-                        thrift)
-   endif()
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${THRIFT_INCLUDE_DIR})
-+  include_directories(SYSTEM ${Thrift_INCLUDE_DIR})
- 
--  string(REPLACE "." ";" VERSION_LIST ${THRIFT_VERSION})
-+  string(REPLACE "." ";" VERSION_LIST ${Thrift_VERSION})
-   list(GET VERSION_LIST 0 THRIFT_VERSION_MAJOR)
-   list(GET VERSION_LIST 1 THRIFT_VERSION_MINOR)
-   list(GET VERSION_LIST 2 THRIFT_VERSION_PATCH)
-@@ -1557,6 +1552,7 @@ if(ARROW_WITH_PROTOBUF)
-     set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1")
-   endif()
-   resolve_dependency(Protobuf
-+                     USE_CONFIG
-                      REQUIRED_VERSION
-                      ${ARROW_PROTOBUF_REQUIRED_VERSION}
-                      PC_PACKAGE_NAMES
-@@ -1567,7 +1563,7 @@ if(ARROW_WITH_PROTOBUF)
-   endif()
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR})
-+  include_directories(SYSTEM ${protobuf_INCLUDE_DIR})
- 
-   if(TARGET arrow::protobuf::libprotobuf)
-     set(ARROW_PROTOBUF_LIBPROTOBUF arrow::protobuf::libprotobuf)
-@@ -1576,9 +1572,9 @@ if(ARROW_WITH_PROTOBUF)
-     if(NOT TARGET protobuf::libprotobuf)
-       add_library(protobuf::libprotobuf UNKNOWN IMPORTED)
-       set_target_properties(protobuf::libprotobuf
--                            PROPERTIES IMPORTED_LOCATION "${PROTOBUF_LIBRARY}"
-+                            PROPERTIES IMPORTED_LOCATION "${Protobuf_LIBRARY}"
-                                        INTERFACE_INCLUDE_DIRECTORIES
--                                       "${PROTOBUF_INCLUDE_DIR}")
-+                                       "${Protobuf_INCLUDE_DIR}")
-     endif()
-     set(ARROW_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
-   endif()
-@@ -1598,7 +1594,7 @@ if(ARROW_WITH_PROTOBUF)
-       set_target_properties(protobuf::libprotoc
-                             PROPERTIES IMPORTED_LOCATION "${Protobuf_PROTOC_LIBRARY}"
-                                        INTERFACE_INCLUDE_DIRECTORIES
--                                       "${PROTOBUF_INCLUDE_DIR}")
-+                                       "${Protobuf_INCLUDE_DIR}")
-     endif()
-     set(ARROW_PROTOBUF_LIBPROTOC protobuf::libprotoc)
-   endif()
-@@ -1690,11 +1686,12 @@ macro(build_substrait)
- 
-   add_custom_target(substrait_gen ALL DEPENDS ${SUBSTRAIT_PROTO_GEN_ALL})
- 
--  set(SUBSTRAIT_INCLUDES ${SUBSTRAIT_CPP_DIR} ${PROTOBUF_INCLUDE_DIR})
-+  set(SUBSTRAIT_INCLUDES ${SUBSTRAIT_CPP_DIR} ${protobuf_INCLUDE_DIR})
- 
-   add_library(substrait STATIC ${SUBSTRAIT_SOURCES})
-   set_target_properties(substrait PROPERTIES POSITION_INDEPENDENT_CODE ON)
-   target_include_directories(substrait PUBLIC ${SUBSTRAIT_INCLUDES})
-+  target_include_directories(substrait PUBLIC ${PROTOBUF_INCLUDE_DIR})
-   target_link_libraries(substrait INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
-   add_dependencies(substrait substrait_gen)
- 
-@@ -1711,6 +1708,7 @@ endif()
- # jemalloc - Unix-only high-performance allocator
- 
- if(ARROW_JEMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) jemalloc from source")
-   # We only use a vendored jemalloc as we want to control its version.
-   # Also our build of jemalloc is specially prefixed so that it will not
-@@ -1780,12 +1778,18 @@ if(ARROW_JEMALLOC)
-   add_dependencies(jemalloc::jemalloc jemalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS jemalloc::jemalloc)
-+else()
-+  find_package(jemalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${jemalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${jemalloc_LIBRARIES_TARGETS})
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
- # mimalloc - Cross-platform high-performance allocator, from Microsoft
- 
- if(ARROW_MIMALLOC)
-+if(0)
-   message(STATUS "Building (vendored) mimalloc from source")
-   # We only use a vendored mimalloc as we want to control its build options.
- 
-@@ -1836,6 +1840,11 @@ if(ARROW_MIMALLOC)
-   add_dependencies(toolchain mimalloc_ep)
- 
-   list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc)
-+else()
-+  find_package(mimalloc REQUIRED CONFIG)
-+  include_directories(SYSTEM "${mimalloc_INCLUDE_DIR}")
-+  list(APPEND ARROW_BUNDLED_STATIC_LIBS  ${mimalloc_LIBRARIES_TARGETS} )
-+endif()
- endif()
- 
- # ----------------------------------------------------------------------
-@@ -2121,7 +2130,7 @@ endmacro()
- if(ARROW_WITH_RAPIDJSON)
-   set(ARROW_RAPIDJSON_REQUIRED_VERSION "1.1.0")
-   resolve_dependency(RapidJSON
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_RAPIDJSON_REQUIRED_VERSION}
-@@ -2158,10 +2167,10 @@ endmacro()
- 
- if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE") OR (NOT ARROW_RUNTIME_SIMD_LEVEL STREQUAL "NONE"
-                                              ))
--  set(xsimd_SOURCE "BUNDLED")
-+  set(xsimd_SOURCE "SYSTEM")
-   resolve_dependency(xsimd)
-   # TODO: Don't use global includes but rather target_include_directories
--  include_directories(SYSTEM ${XSIMD_INCLUDE_DIR})
-+  include_directories(SYSTEM ${xsimd_INCLUDE_DIR})
- endif()
- 
- macro(build_zlib)
-@@ -2260,10 +2269,14 @@ macro(build_lz4)
- endmacro()
- 
- if(ARROW_WITH_LZ4)
--  resolve_dependency(Lz4 PC_PACKAGE_NAMES liblz4)
-+  resolve_dependency(Lz4)
- 
-   # TODO: Don't use global includes but rather target_include_directories
--  get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
-+  if (TARGET LZ4::lz4_static)
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_static INTERFACE_INCLUDE_DIRECTORIES)
-+  else()
-+    get_target_property(LZ4_INCLUDE_DIR LZ4::lz4_shared INTERFACE_INCLUDE_DIRECTORIES)
-+  endif()
-   include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
- endif()
- 
-@@ -2394,7 +2407,7 @@ if(ARROW_WITH_RE2)
-   # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may
-   # include -std=c++11. It's not compatible with C source and C++
-   # source not uses C++ 11.
--  resolve_dependency(re2 HAVE_ALT TRUE)
-+  resolve_dependency(re2 USE_CONFIG TRUE)
-   if(${re2_SOURCE} STREQUAL "SYSTEM")
-     get_target_property(RE2_LIB re2::re2 IMPORTED_LOCATION_${UPPERCASE_BUILD_TYPE})
-     if(NOT RE2_LIB)
-@@ -2464,7 +2477,7 @@ endmacro()
- if(ARROW_WITH_BZ2)
-   resolve_dependency(BZip2)
-   if(${BZip2_SOURCE} STREQUAL "SYSTEM")
--    string(APPEND ARROW_PC_LIBS_PRIVATE " ${BZIP2_LIBRARIES}")
-+    string(APPEND ARROW_PC_LIBS_PRIVATE " ${BZip2_LIBRARIES}")
-   endif()
- 
-   if(NOT TARGET BZip2::BZip2)
-@@ -2473,7 +2486,7 @@ if(ARROW_WITH_BZ2)
-                           PROPERTIES IMPORTED_LOCATION "${BZIP2_LIBRARIES}"
-                                      INTERFACE_INCLUDE_DIRECTORIES "${BZIP2_INCLUDE_DIR}")
-   endif()
--  include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}")
-+  include_directories(SYSTEM "${BZip2_INCLUDE_DIR}")
- endif()
- 
- macro(build_utf8proc)
-@@ -3709,7 +3722,7 @@ if(ARROW_WITH_GRPC)
-     set(gRPC_SOURCE "${Protobuf_SOURCE}")
-   endif()
-   resolve_dependency(gRPC
--                     HAVE_ALT
-+                     USE_CONFIG
-                      TRUE
-                      REQUIRED_VERSION
-                      ${ARROW_GRPC_REQUIRED_VERSION}
-@@ -3727,9 +3740,9 @@ if(ARROW_WITH_GRPC)
-   else()
-     # grpc++ headers may reside in ${GRPC_INCLUDE_DIR}/grpc++ or ${GRPC_INCLUDE_DIR}/grpcpp
-     # depending on the gRPC version.
--    if(EXISTS "${GRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h")
-+    if(EXISTS ${gRPC_INCLUDE_DIR}/grpcpp/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE TRUE)
--    elseif(EXISTS "${GRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h")
-+    elseif(EXISTS ${gRPC_INCLUDE_DIR}/grpc++/impl/codegen/config_protobuf.h)
-       set(GRPCPP_PP_INCLUDE FALSE)
-     else()
-       message(FATAL_ERROR "Cannot find grpc++ headers in ${GRPC_INCLUDE_DIR}")
-@@ -3937,7 +3950,7 @@ macro(build_google_cloud_cpp_storage)
- endmacro()
- 
- if(ARROW_WITH_GOOGLE_CLOUD_CPP)
--  resolve_dependency(google_cloud_cpp_storage)
-+  resolve_dependency(google_cloud_cpp)
-   get_target_property(google_cloud_cpp_storage_INCLUDE_DIR google-cloud-cpp::storage
-                       INTERFACE_INCLUDE_DIRECTORIES)
-   include_directories(SYSTEM ${google_cloud_cpp_storage_INCLUDE_DIR})
-@@ -4264,9 +4277,9 @@ if(ARROW_WITH_OPENTELEMETRY)
-   # cURL is required whether we build from source or use an existing installation
-   # (OTel's cmake files do not call find_curl for you)
-   find_curl()
--  set(opentelemetry-cpp_SOURCE "AUTO")
-+  set(opentelemetry-cpp_SOURCE "SYSTEM")
-   resolve_dependency(opentelemetry-cpp)
--  get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api
-+  get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::opentelemetry_common
-                       INTERFACE_INCLUDE_DIRECTORIES)
-   include_directories(SYSTEM ${OPENTELEMETRY_INCLUDE_DIR})
-   message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}")
-diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
-index 690c51a..752f3b9 100644
---- a/cpp/src/arrow/CMakeLists.txt
-+++ b/cpp/src/arrow/CMakeLists.txt
-@@ -326,10 +326,14 @@ set(ARROW_TESTING_SRCS
- 
- set(_allocator_dependencies "") # Empty list
- if(ARROW_JEMALLOC)
--  list(APPEND _allocator_dependencies jemalloc_ep)
-+  list(APPEND _allocator_dependencies jemalloc::jemalloc)
- endif()
- if(ARROW_MIMALLOC)
--  list(APPEND _allocator_dependencies mimalloc_ep)
-+  if (TARGET mimalloc-static)
-+    list(APPEND _allocator_dependencies mimalloc-static)
-+  else()
-+    list(APPEND _allocator_dependencies mimalloc)
-+  endif()
- endif()
- 
- if(_allocator_dependencies)
-diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
-index f9d1356..c9bcf79 100644
---- a/cpp/src/arrow/flight/CMakeLists.txt
-+++ b/cpp/src/arrow/flight/CMakeLists.txt
-@@ -17,6 +17,9 @@
- 
- add_custom_target(arrow_flight)
- 
-+# TODO: This is a temporary workaround. absl should be LINKED as TARGET.
-+include_directories(SYSTEM ${absl_INCLUDE_DIR})
-+
- arrow_install_all_headers("arrow/flight")
- 
- set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++ ${ARROW_PROTOBUF_LIBPROTOBUF})
-diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
-index ed1c2d8..37a89da 100644
---- a/cpp/src/arrow/memory_pool.cc
-+++ b/cpp/src/arrow/memory_pool.cc
-@@ -52,7 +52,7 @@
- // Needed to support jemalloc 3 and 4
- #define JEMALLOC_MANGLE
- // Explicitly link to our version of jemalloc
--#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
-+#include "jemalloc/jemalloc.h"
- #endif
- 
- #ifdef ARROW_MIMALLOC
-diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
-index 71faf9a..3aabea1 100644
---- a/cpp/src/gandiva/CMakeLists.txt
-+++ b/cpp/src/gandiva/CMakeLists.txt
-@@ -25,7 +25,7 @@ add_custom_target(gandiva-benchmarks)
- 
- add_dependencies(gandiva-all gandiva gandiva-tests gandiva-benchmarks)
- 
--find_package(LLVMAlt REQUIRED)
-+find_package(LLVM REQUIRED)
- 
- if(LLVM_VERSION_MAJOR LESS "10")
-   set(GANDIVA_CXX_STANDARD ${CMAKE_CXX_STANDARD})
-@@ -40,7 +40,7 @@ endif()
- 
- add_definitions(-DGANDIVA_LLVM_VERSION=${LLVM_VERSION_MAJOR})
- 
--find_package(OpenSSLAlt REQUIRED)
-+find_package(OpenSSL REQUIRED)
- 
- # Set the path where the bitcode file generated, see precompiled/CMakeLists.txt
- set(GANDIVA_PRECOMPILED_BC_PATH "${CMAKE_CURRENT_BINARY_DIR}/irhelpers.bc")
-@@ -98,10 +98,11 @@ set(SRC_FILES
-     random_generator_holder.cc
-     ${GANDIVA_PRECOMPILED_CC_PATH})
- 
--set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared LLVM::LLVM_INTERFACE
--                                     ${GANDIVA_OPENSSL_LIBS})
-+set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared llvm-core::llvm-core NTERFACE
-+                                    ${GANDIVA_OPENSSL_LIBS})
-+
-+set(GANDIVA_STATIC_LINK_LIBS arrow_static llvm-core::llvm-core ${GANDIVA_OPENSSL_LIBS})
- 
--set(GANDIVA_STATIC_LINK_LIBS arrow_static LLVM::LLVM_INTERFACE ${GANDIVA_OPENSSL_LIBS})
- 
- if(ARROW_GANDIVA_STATIC_LIBSTDCPP AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX
-                                       ))
-@@ -139,7 +140,7 @@ add_arrow_lib(gandiva
-               arrow_dependencies
-               precompiled
-               EXTRA_INCLUDES
--              $<TARGET_PROPERTY:LLVM::LLVM_INTERFACE,INTERFACE_INCLUDE_DIRECTORIES>
-+              $<TARGET_PROPERTY:llvm-core::llvm-core,INTERFACE_INCLUDE_DIRECTORIES>
-               ${GANDIVA_OPENSSL_INCLUDE_DIR}
-               ${UTF8PROC_INCLUDE_DIR}
-               SHARED_LINK_FLAGS
diff --git a/ci/conan/all/test_package/CMakeLists.txt b/ci/conan/all/test_package/CMakeLists.txt
index 18761d0f52c21..b25c8e889cb84 100644
--- a/ci/conan/all/test_package/CMakeLists.txt
+++ b/ci/conan/all/test_package/CMakeLists.txt
@@ -26,7 +26,13 @@ project(test_package LANGUAGES CXX)
 find_package(Arrow REQUIRED CONFIG)
 
 add_executable(${PROJECT_NAME} test_package.cpp)
-target_link_libraries(${PROJECT_NAME} PRIVATE arrow::arrow)
+
+if (TARGET Arrow::arrow_shared)
+    target_link_libraries(${PROJECT_NAME} PRIVATE Arrow::arrow_shared)
+else()
+    target_link_libraries(${PROJECT_NAME} PRIVATE Arrow::arrow_static)
+endif()
+
 if (${Arrow_VERSION} VERSION_LESS "10.0.0")
     target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_11)
 else()
diff --git a/ci/conan/all/test_v1_package/CMakeLists.txt b/ci/conan/all/test_v1_package/CMakeLists.txt
deleted file mode 100644
index faf547dec70c2..0000000000000
--- a/ci/conan/all/test_v1_package/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-# MIT License
-#
-# Copyright (c) 2019 Conan.io
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-cmake_minimum_required(VERSION 3.1)
-
-project(test_package)
-
-include(${CMAKE_BINARY_DIR}/conanbuildinfo.cmake)
-conan_basic_setup(TARGETS)
-
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../test_package/
-                 ${CMAKE_CURRENT_BINARY_DIR}/test_package/)
diff --git a/ci/conan/all/test_v1_package/conanfile.py b/ci/conan/all/test_v1_package/conanfile.py
deleted file mode 100644
index 4f5cc2b61011b..0000000000000
--- a/ci/conan/all/test_v1_package/conanfile.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# MIT License
-#
-# Copyright (c) 2019 Conan.io
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-from conans import ConanFile, CMake
-from conan.tools.build import cross_building
-import os
-
-
-class TestPackageV1Conan(ConanFile):
-    settings = "os", "arch", "compiler", "build_type"
-    generators = "cmake", "cmake_find_package_multi"
-
-    def build(self):
-        cmake = CMake(self)
-        cmake.configure()
-        cmake.build()
-
-    def test(self):
-        if not cross_building(self):
-            bin_path = os.path.join("bin", "test_package")
-            self.run(bin_path, run_environment=True)
diff --git a/ci/conan/config.yml b/ci/conan/config.yml
index 3fa90be6f669a..cbb2fce054738 100644
--- a/ci/conan/config.yml
+++ b/ci/conan/config.yml
@@ -21,29 +21,15 @@
 # SOFTWARE.
 
 versions:
-  "15.0.0":
-    folder: all
-  "14.0.2":
-    folder: all
-  "14.0.1":
-    folder: all
-  "14.0.0":
-    folder: all
-  "13.0.0":
+  "18.1.0":
     folder: all
-  "12.0.1":
+  "18.0.0":
     folder: all
-  "12.0.0":
+  "17.0.0":
     folder: all
-  "11.0.0":
+  "16.1.0":
     folder: all
-  "10.0.1":
-    folder: all
-  "10.0.0":
-    folder: all
-  "8.0.1":
-    folder: all
-  "8.0.0":
-    folder: all
-  "7.0.0":
+  "15.0.0":
     folder: all
+  "14.0.2":
+    folder: all
\ No newline at end of file
diff --git a/ci/conan/merge_status.sh b/ci/conan/merge_status.sh
index bd99c22def1c9..600385c0e1770 100644
--- a/ci/conan/merge_status.sh
+++ b/ci/conan/merge_status.sh
@@ -15,4 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-UPSTREAM_REVISION=681a40adca5f83c80581814fe92316d6298ed96f
+UPSTREAM_REVISION=a9b270f9d2052e193ce3c0a6c4e2fda0b0ac5ade
diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt
index f28a24cac8d2d..731b49fa462d4 100644
--- a/ci/conda_env_cpp.txt
+++ b/ci/conda_env_cpp.txt
@@ -37,6 +37,7 @@ libprotobuf
 libutf8proc
 lz4-c
 make
+meson
 ninja
 nodejs
 orc
diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index 751df9b2f3c01..840577fdd97a4 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -20,6 +20,9 @@ breathe
 doxygen
 ipython
 linkify-it-py
+# We can't install linuxdoc by conda. We install linuxdoc by pip in
+# ci/dockerfiles/conda-python-pandas.dockerfile.
+# linuxdoc
 myst-parser
 numpydoc
 pydata-sphinx-theme=0.14
diff --git a/ci/docker/centos-7-cpp.dockerfile b/ci/docker/centos-7-cpp.dockerfile
index 1f30eed694e4e..b012a5abed2e0 100644
--- a/ci/docker/centos-7-cpp.dockerfile
+++ b/ci/docker/centos-7-cpp.dockerfile
@@ -37,7 +37,6 @@ RUN \
     -e 's/mirror\.centos\.org/vault.centos.org/' \
     /etc/yum.repos.d/CentOS-SCLo-scl*.repo && \
   yum install -y \
-    cmake3 \
     curl \
     devtoolset-8 \
     diffutils \
@@ -49,9 +48,13 @@ RUN \
     wget \
     which
 
+ARG cmake
+COPY ci/scripts/install_cmake.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local/
+
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN bash /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
 ENV \
   ARROW_R_DEV=TRUE \
-  CMAKE=/usr/bin/cmake3
+  CMAKE=/usr/local/bin/cmake
diff --git a/ci/docker/conda-python-pandas.dockerfile b/ci/docker/conda-python-pandas.dockerfile
index 9ee62cd282d36..4a52ffa8e12bc 100644
--- a/ci/docker/conda-python-pandas.dockerfile
+++ b/ci/docker/conda-python-pandas.dockerfile
@@ -27,6 +27,8 @@ ARG numpy=latest
 # so ensure to install doc requirements
 COPY ci/conda_env_sphinx.txt /arrow/ci/
 RUN mamba install -q -y --file arrow/ci/conda_env_sphinx.txt && \
+    # We can't install linuxdoc by mamba. We install linuxdoc by pip here.
+    pip install linuxdoc && \
     mamba clean --all
 
 COPY ci/scripts/install_pandas.sh /arrow/ci/scripts/
diff --git a/ci/docker/conda-python-substrait.dockerfile b/ci/docker/conda-python-substrait.dockerfile
deleted file mode 100644
index 36dd64e51e7ad..0000000000000
--- a/ci/docker/conda-python-substrait.dockerfile
+++ /dev/null
@@ -1,59 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG repo
-ARG arch
-ARG python=3.9
-
-FROM ${repo}:${arch}-conda-python-${python}
-
-COPY ci/conda_env_python.txt \
-     ci/conda_env_sphinx.txt \
-     /arrow/ci/
-
-# Note: openjdk is pinned to 17 because the
-# substrait repo currently pins to jdk 17.
-# Newer jdk versions are currently failing
-# due to the recent upgrade to Gradle 8 via
-# install_substrait_consumer.sh.
-# https://github.com/substrait-io/substrait-java/issues/274
-RUN mamba install -q -y \
-        --file arrow/ci/conda_env_python.txt \
-        --file arrow/ci/conda_env_sphinx.txt \
-        $([ "$python" == "3.9" ] && echo "pickle5") \
-        python=${python} \
-        openjdk=17 \
-        nomkl && \
-    mamba clean --all
-
-
-ARG substrait=latest
-COPY ci/scripts/install_substrait_consumer.sh /arrow/ci/scripts/
-
-RUN /arrow/ci/scripts/install_substrait_consumer.sh
-
-ENV ARROW_ACERO=ON \
-    ARROW_COMPUTE=ON \
-    ARROW_CSV=ON \
-    ARROW_DATASET=ON \
-    ARROW_FILESYSTEM=ON \
-    ARROW_FLIGHT=OFF \
-    ARROW_FLIGHT_SQL=OFF \
-    ARROW_GANDIVA=OFF \
-    ARROW_JSON=ON \
-    ARROW_SUBSTRAIT=ON \
-    ARROW_TESTING=OFF
diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile
index fe3976248cc86..44c845bb17eff 100644
--- a/ci/docker/debian-12-cpp.dockerfile
+++ b/ci/docker/debian-12-cpp.dockerfile
@@ -30,13 +30,14 @@ RUN apt-get update -y -q && \
         lsb-release \
         wget && \
     if [ ${llvm} -ge 17 ]; then \
-      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | \
-          gpg \
-              --import - \
-              --keyring /usr/share/keyrings/llvm-snapshot.gpg \
-              --no-default-keyring && \
-      echo "deb[keyring=/usr/share/keyrings/llvm-snapshot.gpg] https://apt.llvm.org/$(lsb_release --codename --short)/ llvm-toolchain-$(lsb_release --codename --short)-${available_llvm} main" > \
-          /etc/apt/sources.list.d/llvm.list; \
+      wget -O /usr/share/keyrings/llvm-snapshot.asc \
+        https://apt.llvm.org/llvm-snapshot.gpg.key && \
+      (echo "Types: deb"; \
+       echo "URIs: https://apt.llvm.org/$(lsb_release --codename --short)/"; \
+       echo "Suites: llvm-toolchain-$(lsb_release --codename --short)-${llvm}"; \
+       echo "Components: main"; \
+       echo "Signed-By: /usr/share/keyrings/llvm-snapshot.asc") | \
+        tee /etc/apt/sources.list.d/llvm.sources; \
     fi && \
     apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile
deleted file mode 100644
index 479f4aa598b18..0000000000000
--- a/ci/docker/java-jni-manylinux-201x.dockerfile
+++ /dev/null
@@ -1,55 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG base
-FROM ${base}
-
-# Install the libraries required by the Gandiva to run
-# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva
-RUN vcpkg install \
-        --clean-after-build \
-        --x-install-root=${VCPKG_ROOT}/installed \
-        --x-manifest-root=/arrow/ci/vcpkg \
-        --x-feature=dev \
-        --x-feature=flight \
-        --x-feature=gcs \
-        --x-feature=json \
-        --x-feature=parquet \
-        --x-feature=gandiva \
-        --x-feature=s3
-
-# Install Java
-ARG java=11
-ARG maven=3.9.3
-RUN yum install -y java-$java-openjdk-devel && \
-      yum clean all && \
-      curl \
-        --fail \
-        --location \
-        "https://www.apache.org/dyn/closer.lua?action=download&filename=maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" | \
-        tar xfz - -C /usr/local && \
-      ln -s /usr/local/apache-maven-${maven}/bin/mvn /usr/local/bin
-
-# Install the gcs testbench
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-RUN PYTHON=python /arrow/ci/scripts/install_gcs_testbench.sh default
-
-# For ci/scripts/{cpp,java}_*.sh
-ENV ARROW_HOME=/tmp/local \
-    ARROW_JAVA_CDATA=ON \
-    ARROW_JAVA_JNI=ON \
-    ARROW_USE_CCACHE=ON
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 0804f3543c283..31435d4989129 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -19,7 +19,6 @@ ARG base
 FROM ${base}
 
 ARG r=4.4
-ARG jdk=11
 
 ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
 
@@ -67,7 +66,6 @@ RUN apt-get update -y && \
         nodejs \
         npm \
         nvidia-cuda-toolkit \
-        openjdk-${jdk}-jdk-headless \
         pandoc \
         r-recommended=${r}* \
         r-base=${r}* \
@@ -80,15 +78,6 @@ RUN apt-get update -y && \
     PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
        npm install -g yarn @mermaid-js/mermaid-cli
 
-ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64
-
-ARG maven=3.8.7
-COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/util_download_apache.sh \
-    "maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" /opt
-ENV PATH=/opt/apache-maven-${maven}/bin:$PATH
-RUN mvn -version
-
 COPY c_glib/Gemfile /arrow/c_glib/
 RUN gem install --no-document bundler && \
     bundle install --gemfile /arrow/c_glib/Gemfile
@@ -133,4 +122,5 @@ ENV ARROW_ACERO=ON \
     ARROW_S3=ON \
     ARROW_USE_GLOG=OFF \
     CMAKE_UNITY_BUILD=ON \
+    CUDAToolkit_ROOT=/usr \
     RETICULATE_PYTHON_ENV=${ARROW_PYTHON_VENV}
diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile
index 9ec80440a3c21..b73cc585ea74e 100644
--- a/ci/docker/linux-apt-lint.dockerfile
+++ b/ci/docker/linux-apt-lint.dockerfile
@@ -58,6 +58,7 @@ RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
 RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site
 # We don't need arrow's dependencies, only lintr (and its dependencies)
 RUN R -e "install.packages('lintr')"
+RUN R -e "install.packages('cyclocomp')"
 
 # Docker linter
 COPY --from=hadolint /bin/hadolint /usr/bin/hadolint
diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile
index 7b7e989adc0d1..da378eac43028 100644
--- a/ci/docker/linux-r.dockerfile
+++ b/ci/docker/linux-r.dockerfile
@@ -51,6 +51,10 @@ RUN /arrow/ci/scripts/r_docker_configure.sh
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
+ARG cmake
+COPY ci/scripts/install_cmake.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local/
+
 COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
 COPY r/DESCRIPTION /arrow/r/
 RUN /arrow/ci/scripts/r_deps.sh /arrow
diff --git a/ci/docker/python-free-threaded-wheel-windows-test-vs2019.dockerfile b/ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile
similarity index 100%
rename from ci/docker/python-free-threaded-wheel-windows-test-vs2019.dockerfile
rename to ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile
diff --git a/ci/docker/python-free-threaded-wheel-windows-vs2019.dockerfile b/ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile
similarity index 100%
rename from ci/docker/python-free-threaded-wheel-windows-vs2019.dockerfile
rename to ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 0b5645285b6e1..ffcaa8c0a0741 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -53,7 +53,7 @@ ENV PATH=/opt/python/${CPYTHON_VERSION}-${CPYTHON_VERSION}/bin:${PATH}
 # Install CMake
 ARG cmake=3.29.2
 COPY ci/scripts/install_cmake.sh arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_cmake.sh ${arch} linux ${cmake} /usr/local
+RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local
 
 # Install Ninja
 ARG ninja=1.10.2
diff --git a/ci/docker/python-wheel-windows-test-vs2019-base.dockerfile b/ci/docker/python-wheel-windows-test-vs2019-base.dockerfile
deleted file mode 100644
index 73a78da30b907..0000000000000
--- a/ci/docker/python-wheel-windows-test-vs2019-base.dockerfile
+++ /dev/null
@@ -1,51 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: You must update PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION in .env
-# when you update this file.
-
-# based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2019 preinstalled
-FROM abrarov/msvc-2019:2.11.0
-
-# hadolint shell=cmd.exe
-
-# Add unix tools to path
-RUN setx path "%path%;C:\Program Files\Git\usr\bin"
-
-# 1. Remove previous installations of Python from the base image
-# NOTE: a more recent base image (tried with 2.12.1) comes with Python 3.9.7
-# and the MSI installers are failing to remove pip and tcl/tk "products" making
-# the subsequent choco python installation step failing for installing Python
-# version 3.9.* due to existing python version
-# 2. Install Minio for S3 testing.
-RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
-    rm -rf Python* && \
-    curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z \
-        --output "C:\Windows\Minio.exe"
-
-# Install archiver to extract xz archives (for timezone database).
-# Install the GCS testbench using a well-known Python version.
-# NOTE: cannot use pipx's `--fetch-missing-python` because of
-# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
-RUN choco install --no-progress -r -y archiver && \
-    choco install -r -y --pre --no-progress python --version=3.11.9
-ENV PIPX_BIN_DIR=C:\\Windows\\
-ENV PIPX_PYTHON="C:\Python311\python.exe"
-COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
-RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
-    storage-testbench -h
diff --git a/ci/docker/python-wheel-windows-test-vs2022-base.dockerfile b/ci/docker/python-wheel-windows-test-vs2022-base.dockerfile
new file mode 100644
index 0000000000000..1d1602c03a235
--- /dev/null
+++ b/ci/docker/python-wheel-windows-test-vs2022-base.dockerfile
@@ -0,0 +1,65 @@
+# escape=`
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: You must update PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION in .env
+# when you update this file.
+
+FROM mcr.microsoft.com/windows/servercore:ltsc2022
+
+# Ensure we in a command shell and not Powershell
+SHELL ["cmd", "/S", "/C"]
+
+# Install MSVC BuildTools
+#
+# The set of components below (lines starting with --add) is the most minimal
+# set we could find that would still compile Arrow C++.
+RUN `
+    curl -SL --output vs_buildtools.exe https://aka.ms/vs/17/release/vs_buildtools.exe `
+    && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache `
+    --installPath "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools" `
+    --add Microsoft.VisualStudio.Component.VC.CoreBuildTools `
+    --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 `
+    --add Microsoft.VisualStudio.Component.Windows10SDK.20348 `
+    --add Microsoft.VisualStudio.Component.VC.CMake.Project `
+    || IF "%ERRORLEVEL%"=="3010" EXIT 0) `
+    && del /q vs_buildtools.exe
+
+# Install choco CLI
+#
+# We switch into Powershell just for this command and switch back to cmd
+# See https://chocolatey.org/install#completely-offline-install
+SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
+RUN `
+    Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+SHELL ["cmd", "/S", "/C"]
+
+# Install git, wget, minio
+RUN choco install --no-progress -r -y git wget
+RUN curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z `
+    --output "C:\Windows\Minio.exe"
+
+# Install the GCS testbench using a well-known Python version.
+# NOTE: cannot use pipx's `--fetch-missing-python` because of
+# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
+RUN choco install -r -y --pre --no-progress python --version=3.11.9
+ENV PIPX_BIN_DIR=C:\\Windows\\
+ENV PIPX_PYTHON="C:\Python311\python.exe"
+COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
+RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && `
+    storage-testbench -h
diff --git a/ci/docker/python-wheel-windows-test-vs2019.dockerfile b/ci/docker/python-wheel-windows-test-vs2022.dockerfile
similarity index 100%
rename from ci/docker/python-wheel-windows-test-vs2019.dockerfile
rename to ci/docker/python-wheel-windows-test-vs2022.dockerfile
diff --git a/ci/docker/python-wheel-windows-vs2019-base.dockerfile b/ci/docker/python-wheel-windows-vs2019-base.dockerfile
deleted file mode 100644
index bd91f01bf9b6d..0000000000000
--- a/ci/docker/python-wheel-windows-vs2019-base.dockerfile
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: You must update PYTHON_WHEEL_WINDOWS_IMAGE_REVISION in .env
-# when you update this file.
-
-# based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2019 preinstalled
-FROM abrarov/msvc-2019:2.11.0
-
-# Install CMake and Ninja
-ARG cmake=3.31.2
-RUN choco install --no-progress -r -y cmake --version=%cmake% --installargs 'ADD_CMAKE_TO_PATH=System' && \
-    choco install --no-progress -r -y gzip wget ninja
-
-# Add unix tools to path
-RUN setx path "%path%;C:\Program Files\Git\usr\bin"
-
-# Install vcpkg
-#
-# Compiling vcpkg itself from a git tag doesn't work anymore since vcpkg has
-# started to ship precompiled binaries for the vcpkg-tool.
-ARG vcpkg
-COPY ci/vcpkg/*.patch \
-     ci/vcpkg/*windows*.cmake \
-     arrow/ci/vcpkg/
-COPY ci/scripts/install_vcpkg.sh arrow/ci/scripts/
-ENV VCPKG_ROOT=C:\\vcpkg
-RUN bash arrow/ci/scripts/install_vcpkg.sh /c/vcpkg %vcpkg% && \
-    setx PATH "%PATH%;%VCPKG_ROOT%"
-
-# Configure vcpkg and install dependencies
-# NOTE: use windows batch environment notation for build arguments in RUN
-# statements but bash notation in ENV statements
-# VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system
-# cmake's and ninja's versions are recent enough
-ARG build_type=release
-ENV CMAKE_BUILD_TYPE=${build_type} \
-    VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \
-    VCPKG_DEFAULT_TRIPLET=amd64-windows-static-md-${build_type} \
-    VCPKG_FEATURE_FLAGS="manifests"
-COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
-# cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
-# ssl related fixes as well as we can patch the vcpkg portfile to support
-# arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186
-# but we cannot patch those portfiles since vcpkg-tool handles the checkout of
-# previous versions => use bundled S3 build
-RUN vcpkg install \
-        --clean-after-build \
-        --x-install-root=%VCPKG_ROOT%\installed \
-        --x-manifest-root=arrow/ci/vcpkg \
-        --x-feature=flight \
-        --x-feature=gcs \
-        --x-feature=json \
-        --x-feature=orc \
-        --x-feature=parquet \
-        --x-feature=s3
-
-# Remove previous installations of Python from the base image
-# NOTE: a more recent base image (tried with 2.12.1) comes with Python 3.9.7
-# and the MSI installers are failing to remove pip and tcl/tk "products" making
-# the subsequent choco python installation step failing for installing Python
-# version 3.9.* due to existing Python version
-RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
-    rm -rf Python*
diff --git a/ci/docker/python-wheel-windows-vs2022-base.dockerfile b/ci/docker/python-wheel-windows-vs2022-base.dockerfile
new file mode 100644
index 0000000000000..7f683487a8c01
--- /dev/null
+++ b/ci/docker/python-wheel-windows-vs2022-base.dockerfile
@@ -0,0 +1,132 @@
+# escape=`
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# NOTE: To build this Dockerfile, you probably need to do the following two
+# things:
+#
+# 1. Increase your container image size to a higher value.
+#
+#  e.g.,
+#
+#    Set a custom 'storage-opts' value in your Windows Docker config and restart
+#    Docker:
+#
+#        "storage-opts": [
+#             "size=50GB"
+#        ]
+#
+#    See
+#
+#       https://learn.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/container-storage#example
+#
+#    for details on this step and
+#
+#       https://learn.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2022#troubleshoot-build-tools-containers
+#
+#    for more information.
+#
+# 2. Increase the memory limit for the build container to at least 4GB.
+#
+#  e.g.,
+#
+#     docker build -t sometag -m 4GB --file `
+#       .\ci\docker\python-wheel-windows-vs2022-base.dockerfile .
+
+# NOTE: You must update PYTHON_WHEEL_WINDOWS_IMAGE_REVISION in .env
+# when you update this file.
+
+FROM mcr.microsoft.com/windows/servercore:ltsc2022
+
+# Ensure we in a command shell and not Powershell
+SHELL ["cmd", "/S", "/C"]
+
+# Install MSVC BuildTools
+#
+# The set of components below (lines starting with --add) is the most minimal
+# set we could find that would still compile Arrow C++.
+RUN `
+  curl -SL --output vs_buildtools.exe https://aka.ms/vs/17/release/vs_buildtools.exe `
+  && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache `
+  --installPath "%ProgramFiles(x86)%\Microsoft Visual Studio\2022\BuildTools" `
+  --add Microsoft.VisualStudio.Component.VC.CoreBuildTools `
+  --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 `
+  --add Microsoft.VisualStudio.Component.Windows10SDK.20348 `
+  --add Microsoft.VisualStudio.Component.VC.CMake.Project `
+  || IF "%ERRORLEVEL%"=="3010" EXIT 0) `
+  && del /q vs_buildtools.exe
+
+# Install choco CLI
+#
+# Switch into Powershell just for this command because choco only provides a
+# Powershell installation script. After, we switch back to cmd.
+#
+# See https://chocolatey.org/install#completely-offline-install
+SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
+RUN `
+  Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+SHELL ["cmd", "/S", "/C"]
+
+# Install CMake and other tools
+ARG cmake=3.31.2
+RUN choco install --no-progress -r -y cmake --version=%cmake% --installargs 'ADD_CMAKE_TO_PATH=System'
+RUN choco install --no-progress -r -y git gzip ninja wget
+
+# Add UNIX tools to PATH
+RUN setx path "%path%;C:\Program Files\Git\usr\bin"
+
+# Install vcpkg
+#
+# Compiling vcpkg itself from a git tag doesn't work anymore since vcpkg has
+# started to ship precompiled binaries for the vcpkg-tool.
+ARG vcpkg
+COPY ci/vcpkg/*.patch `
+  ci/vcpkg/*windows*.cmake `
+  arrow/ci/vcpkg/
+COPY ci/scripts/install_vcpkg.sh arrow/ci/scripts/
+ENV VCPKG_ROOT=C:\\vcpkg
+RUN bash arrow/ci/scripts/install_vcpkg.sh /c/vcpkg %vcpkg% && `
+  setx PATH "%PATH%;%VCPKG_ROOT%"
+
+# Configure vcpkg and install dependencies
+# NOTE: use windows batch environment notation for build arguments in RUN
+# statements but bash notation in ENV statements
+# VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system
+# cmake's and ninja's versions are recent enough
+ARG build_type=release
+ENV CMAKE_BUILD_TYPE=${build_type} `
+  VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg `
+  VCPKG_DEFAULT_TRIPLET=amd64-windows-static-md-${build_type} `
+  VCPKG_FEATURE_FLAGS="manifests"
+COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
+# cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
+# ssl related fixes as well as we can patch the vcpkg portfile to support
+# arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186
+# but we cannot patch those portfiles since vcpkg-tool handles the checkout of
+# previous versions => use bundled S3 build
+RUN vcpkg install `
+  --clean-after-build `
+  --x-install-root=%VCPKG_ROOT%\installed `
+  --x-manifest-root=arrow/ci/vcpkg `
+  --x-feature=flight`
+  --x-feature=gcs`
+  --x-feature=json`
+  --x-feature=orc`
+  --x-feature=parquet`
+  --x-feature=s3
diff --git a/ci/docker/python-wheel-windows-vs2019.dockerfile b/ci/docker/python-wheel-windows-vs2022.dockerfile
similarity index 100%
rename from ci/docker/python-wheel-windows-vs2019.dockerfile
rename to ci/docker/python-wheel-windows-vs2022.dockerfile
diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
deleted file mode 100644
index 1b342df596c9d..0000000000000
--- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
+++ /dev/null
@@ -1,105 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG base=amd64/ubuntu:20.04
-FROM ${base}
-
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
-
-RUN echo "debconf debconf/frontend select Noninteractive" | \
-        debconf-set-selections
-
-RUN apt-get update -y -q && \
-    apt-get install -y -q \
-        build-essential \
-        ccache \
-        cmake \
-        curl \
-        gdb \
-        git \
-        libssl-dev \
-        libcurl4-openssl-dev \
-        python3-pip \
-        python3-venv \
-        tzdata \
-        wget && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-# Installs LLVM toolchain, for Gandiva and testing other compilers
-#
-# Note that this is installed before the base packages to improve iteration
-# while debugging package list with docker build.
-ARG llvm
-RUN latest_system_llvm=10 && \
-    if [ ${llvm} -gt ${latest_system_llvm} ]; then \
-      apt-get update -y -q && \
-      apt-get install -y -q --no-install-recommends \
-          apt-transport-https \
-          ca-certificates \
-          gnupg \
-          lsb-release \
-          wget && \
-      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
-      code_name=$(lsb_release --codename --short) && \
-      if [ ${llvm} -gt 10 ]; then \
-        echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \
-           /etc/apt/sources.list.d/llvm.list; \
-      fi; \
-    fi && \
-    apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-        clang-${llvm} \
-        llvm-${llvm}-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
-
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
-COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
-
-ENV ARROW_ACERO=ON \
-    ARROW_AZURE=OFF \
-    ARROW_BUILD_TESTS=ON \
-    ARROW_DATASET=ON \
-    ARROW_FLIGHT=ON \
-    ARROW_GANDIVA=ON \
-    ARROW_GCS=ON \
-    ARROW_HDFS=ON \
-    ARROW_HOME=/usr/local \
-    ARROW_INSTALL_NAME_RPATH=OFF \
-    ARROW_ORC=ON \
-    ARROW_PARQUET=ON \
-    ARROW_S3=ON \
-    ARROW_USE_CCACHE=ON \
-    ARROW_WITH_BROTLI=ON \
-    ARROW_WITH_BZ2=ON \
-    ARROW_WITH_LZ4=ON \
-    ARROW_WITH_OPENTELEMETRY=OFF \
-    ARROW_WITH_SNAPPY=ON \
-    ARROW_WITH_ZLIB=ON \
-    ARROW_WITH_ZSTD=ON \
-    CMAKE_GENERATOR="Unix Makefiles" \
-    PARQUET_BUILD_EXAMPLES=ON \
-    PARQUET_BUILD_EXECUTABLES=ON \
-    PATH=/usr/lib/ccache/:$PATH \
-    PYTHON=python3
diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile
deleted file mode 100644
index 259c5fb77fa41..0000000000000
--- a/ci/docker/ubuntu-20.04-cpp.dockerfile
+++ /dev/null
@@ -1,194 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG base=amd64/ubuntu:20.04
-FROM ${base}
-
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
-
-RUN echo "debconf debconf/frontend select Noninteractive" | \
-        debconf-set-selections
-
-# Installs LLVM toolchain, for Gandiva and testing other compilers
-#
-# Note that this is installed before the base packages to improve iteration
-# while debugging package list with docker build.
-ARG clang_tools
-ARG llvm
-RUN latest_system_llvm=10 && \
-    if [ ${llvm} -gt ${latest_system_llvm} -o \
-         ${clang_tools} -gt ${latest_system_llvm} ]; then \
-      apt-get update -y -q && \
-      apt-get install -y -q --no-install-recommends \
-          apt-transport-https \
-          ca-certificates \
-          gnupg \
-          lsb-release \
-          wget && \
-      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
-      code_name=$(lsb_release --codename --short) && \
-      if [ ${llvm} -gt 10 ]; then \
-        echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \
-           /etc/apt/sources.list.d/llvm.list; \
-      fi && \
-      if [ ${clang_tools} -ne ${llvm} -a \
-           ${clang_tools} -gt ${latest_system_llvm} ]; then \
-        echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${clang_tools} main" > \
-           /etc/apt/sources.list.d/clang-tools.list; \
-      fi; \
-    fi && \
-    apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-        clang-${clang_tools} \
-        clang-${llvm} \
-        clang-format-${clang_tools} \
-        clang-tidy-${clang_tools} \
-        llvm-${llvm}-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-# Installs C++ toolchain and dependencies
-RUN apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-        autoconf \
-        ca-certificates \
-        ccache \
-        cmake \
-        curl \
-        g++ \
-        gcc \
-        gdb \
-        git \
-        libbenchmark-dev \
-        libboost-filesystem-dev \
-        libboost-system-dev \
-        libbrotli-dev \
-        libbz2-dev \
-        libc-ares-dev \
-        libcurl4-openssl-dev \
-        libgflags-dev \
-        libgoogle-glog-dev \
-        libidn2-dev \
-        libkrb5-dev \
-        libldap-dev \
-        liblz4-dev \
-        libnghttp2-dev \
-        libprotobuf-dev \
-        libprotoc-dev \
-        libpsl-dev \
-        libradospp-dev \
-        libre2-dev \
-        librtmp-dev \
-        libsnappy-dev \
-        libssh-dev \
-        libssh2-1-dev \
-        libssl-dev \
-        libthrift-dev \
-        libutf8proc-dev \
-        libxml2-dev \
-        libzstd-dev \
-        lld \
-        make \
-        ninja-build \
-        nlohmann-json3-dev \
-        npm \
-        patch \
-        pkg-config \
-        protobuf-compiler \
-        python3-dev \
-        python3-pip \
-        python3-rados \
-        python3-venv \
-        rados-objclass-dev \
-        rapidjson-dev \
-        rsync \
-        tzdata \
-        wget && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists*
-
-COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
-
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
-COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_azurite.sh
-
-COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_ceph.sh
-
-COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
-RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
-
-# Prioritize system packages and local installation.
-#
-# The following dependencies will be downloaded due to missing/invalid packages
-# provided by the distribution:
-# - Abseil is not packaged
-# - libc-ares-dev does not install CMake config files
-# - flatbuffer is not packaged
-# - libgtest-dev only provide sources
-# - libprotobuf-dev only provide sources
-# - opentelemetry-cpp-dev is not packaged
-#
-# ARROW-17051: this build uses static Protobuf, so we must also use
-# static Arrow to run Flight/Flight SQL tests.
-ENV absl_SOURCE=BUNDLED \
-    ARROW_ACERO=ON \
-    ARROW_AZURE=OFF \
-    ARROW_BUILD_STATIC=ON \
-    ARROW_BUILD_TESTS=ON \
-    ARROW_DEPENDENCY_SOURCE=SYSTEM \
-    ARROW_DATASET=ON \
-    ARROW_FLIGHT=OFF \
-    ARROW_GANDIVA=ON \
-    ARROW_GCS=ON \
-    ARROW_HDFS=ON \
-    ARROW_HOME=/usr/local \
-    ARROW_INSTALL_NAME_RPATH=OFF \
-    ARROW_JEMALLOC=ON \
-    ARROW_ORC=ON \
-    ARROW_PARQUET=ON \
-    ARROW_S3=ON \
-    ARROW_SUBSTRAIT=ON \
-    ARROW_USE_ASAN=OFF \
-    ARROW_USE_CCACHE=ON \
-    ARROW_USE_LLD=ON \
-    ARROW_USE_UBSAN=OFF \
-    ARROW_WITH_BROTLI=ON \
-    ARROW_WITH_BZ2=ON \
-    ARROW_WITH_LZ4=ON \
-    ARROW_WITH_OPENTELEMETRY=ON \
-    ARROW_WITH_SNAPPY=ON \
-    ARROW_WITH_ZLIB=ON \
-    ARROW_WITH_ZSTD=ON \
-    ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \
-    AWSSDK_SOURCE=BUNDLED \
-    Azure_SOURCE=BUNDLED \
-    google_cloud_cpp_storage_SOURCE=BUNDLED \
-    gRPC_SOURCE=BUNDLED \
-    GTest_SOURCE=BUNDLED \
-    opentelemetry_cpp_SOURCE=BUNDLED \
-    ORC_SOURCE=BUNDLED \
-    PARQUET_BUILD_EXAMPLES=ON \
-    PARQUET_BUILD_EXECUTABLES=ON \
-    Protobuf_SOURCE=BUNDLED \
-    PATH=/usr/lib/ccache/:$PATH \
-    PYTHON=python3 \
-    xsimd_SOURCE=BUNDLED
diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
index ce31c457e909e..2a90a5637d4df 100644
--- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
@@ -68,6 +68,10 @@ RUN latest_system_llvm=14 && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists*
 
+ARG cmake
+COPY ci/scripts/install_cmake.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local/
+
 COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
 
diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile
index 721b37dcae842..8235e72c4ef15 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -41,7 +41,7 @@ RUN latest_system_llvm=14 && \
           wget && \
       wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
       code_name=$(lsb_release --codename --short) && \
-      if [ ${llvm} -gt 10 ]; then \
+      if [ ${llvm} -gt ${latest_system_llvm} ]; then \
         echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \
            /etc/apt/sources.list.d/llvm.list; \
       fi && \
@@ -68,10 +68,6 @@ RUN apt-get update -y -q && \
         bzip2 \
         ca-certificates \
         ccache \
-        ceph \
-        ceph-fuse \
-        ceph-mds \
-        cmake \
         curl \
         gdb \
         git \
@@ -168,6 +164,10 @@ RUN if [ "${gcc}" = "" ]; then \
 # make sure zlib is cached in the EMSDK folder
 RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib
 
+ARG cmake
+COPY ci/scripts/install_cmake.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local/
+
 COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
 
@@ -177,6 +177,9 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_azurite.sh
 
+COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_ceph.sh
+
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
diff --git a/ci/docker/ubuntu-22.04-verify-rc.dockerfile b/ci/docker/ubuntu-22.04-verify-rc.dockerfile
index 8bc6f39b67a09..b9f130d24ea94 100644
--- a/ci/docker/ubuntu-22.04-verify-rc.dockerfile
+++ b/ci/docker/ubuntu-22.04-verify-rc.dockerfile
@@ -24,3 +24,7 @@ RUN /setup-ubuntu.sh && \
     rm /setup-ubuntu.sh && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists*
+
+ARG cmake
+COPY ci/scripts/install_cmake.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_cmake.sh ${cmake} /usr/local/
diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile
index 592a9a6a232e5..0347d452d7bfc 100644
--- a/ci/docker/ubuntu-24.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -68,9 +68,6 @@ RUN apt-get update -y -q && \
         autoconf \
         ca-certificates \
         ccache \
-        ceph \
-        ceph-fuse \
-        ceph-mds \
         cmake \
         curl \
         gdb \
@@ -165,6 +162,9 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_azurite.sh
 
+COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_ceph.sh
+
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
diff --git a/ci/rtools/README.md b/ci/rtools/README.md
new file mode 100644
index 0000000000000..08b5ea7f5136e
--- /dev/null
+++ b/ci/rtools/README.md
@@ -0,0 +1,35 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# rtools40 patches for AWS SDK and related libs
+
+The patches in this directory are solely for the purpose of building Arrow C++
+under [Rtools40](https://cran.r-project.org/bin/windows/Rtools/rtools40.html)
+and not used elsewhere. Once we've dropped support for Rtools40, we can consider
+removing these patches.
+
+The larger reason these patches are needed is that Rtools provides their own
+packages and their versions of the AWS libraries weren't compatible with CMake
+3.25. Our solution was to bundle the AWS libs instead and these patches were
+required to get them building under the Rtools40 environment.
+
+The patches were added while upgrading the minimum required CMake version to
+3.25 in [GH-44950](https://github.com/apache/arrow/issues/44950). Please see the
+associated PR, [GH-44989](https://github.com/apache/arrow/pull/44989), for more
+context.
diff --git a/ci/scripts/install_substrait_consumer.sh b/ci/rtools/aws_c_common_ep.patch
old mode 100755
new mode 100644
similarity index 56%
rename from ci/scripts/install_substrait_consumer.sh
rename to ci/rtools/aws_c_common_ep.patch
index 2e6d299f68bf2..94c84d0fe1b5a
--- a/ci/scripts/install_substrait_consumer.sh
+++ b/ci/rtools/aws_c_common_ep.patch
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,20 +15,25 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
-
-echo "Install Substrait Consumer Test Suite";
+diff --git a/include/aws/common/byte_order.inl b/include/aws/common/byte_order.inl
+index 1204be0..0abd9cb 100644
+--- a/include/aws/common/byte_order.inl
++++ b/include/aws/common/byte_order.inl
+@@ -13,7 +13,7 @@
+ #    include <stdlib.h>
+ #else
+ #    include <netinet/in.h>
+-#endif /* _MSC_VER */
++#endif /* _WIN32 */
 
-git clone https://github.com/substrait-io/consumer-testing.git
-cd consumer-testing
-# avoid installing pyarrow
-grep -v 'pyarrow\|arrow-nightlies' requirements.txt > requirements-no-arrow.txt
-pip install -r requirements-no-arrow.txt
+ AWS_EXTERN_C_BEGIN
 
-pip install -r requirements-build.txt
-# setup substrait-java
-git submodule init
-git submodule update --init
-./build-and-copy-isthmus-shadow-jar.sh
-# install substrait_consumer library
-python setup.py install
+@@ -39,7 +39,7 @@ AWS_STATIC_IMPL uint64_t aws_hton64(uint64_t x) {
+     uint64_t v;
+     __asm__("bswap %q0" : "=r"(v) : "0"(x));
+     return v;
+-#elif defined(_MSC_VER)
++#elif defined(_WIN32)
+     return _byteswap_uint64(x);
+ #else
+     uint32_t low = x & UINT32_MAX;
diff --git a/ci/rtools/aws_c_io_ep.patch b/ci/rtools/aws_c_io_ep.patch
new file mode 100644
index 0000000000000..a15d706ba1238
--- /dev/null
+++ b/ci/rtools/aws_c_io_ep.patch
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+diff --git a/source/windows/secure_channel_tls_handler.c b/source/windows/secure_channel_tls_handler.c
+index 50caf02..29fe850 100644
+--- a/source/windows/secure_channel_tls_handler.c
++++ b/source/windows/secure_channel_tls_handler.c
+@@ -35,6 +36,25 @@
+ #    pragma warning(disable : 4306) /* Identifier is type cast to a larger pointer. */
+ #endif
+
++#ifndef SP_PROT_TLS1_0_SERVER
++#define SP_PROT_TLS1_0_SERVER SP_PROT_TLS1_SERVER
++#endif
++#ifndef SP_PROT_TLS1_0_CLIENT
++#define SP_PROT_TLS1_0_CLIENT SP_PROT_TLS1_CLIENT
++#endif
++#ifndef SP_PROT_TLS1_1_SERVER
++#define SP_PROT_TLS1_1_SERVER 0x00000100
++#endif
++#ifndef SP_PROT_TLS1_1_CLIENT
++#define SP_PROT_TLS1_1_CLIENT 0x00000200
++#endif
++#ifndef SCH_USE_STRONG_CRYPTO
++#define SCH_USE_STRONG_CRYPTO 0x00400000
++#endif
++#ifndef SECBUFFER_ALERT
++#define SECBUFFER_ALERT 0x11
++#endif
++
+ #define KB_1 1024
+ #define READ_OUT_SIZE (16 * KB_1)
+ #define READ_IN_SIZE READ_OUT_SIZE
+@@ -456,7 +476,7 @@ static int s_fillin_alpn_data(
+ 
+     *extension_length += sizeof(uint32_t) + sizeof(uint16_t);
+ 
+-    *extension_name = SecApplicationProtocolNegotiationExt_ALPN;
++    *extension_name = 2;
+     /*now add the protocols*/
+     for (size_t i = 0; i < protocols_count; ++i) {
+         struct aws_byte_cursor *protocol_ptr = NULL;
diff --git a/ci/rtools/awssdk_ep.patch b/ci/rtools/awssdk_ep.patch
new file mode 100644
index 0000000000000..bd26f85329090
--- /dev/null
+++ b/ci/rtools/awssdk_ep.patch
@@ -0,0 +1,181 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+diff --git a/aws-cpp-sdk-core/include/aws/core/utils/Array.h b/aws-cpp-sdk-core/include/aws/core/utils/Array.h
+index 2b5bbc566..7cb93bdf0 100644
+--- a/aws-cpp-sdk-core/include/aws/core/utils/Array.h
++++ b/aws-cpp-sdk-core/include/aws/core/utils/Array.h
+@@ -54,7 +54,7 @@ namespace Aws
+                 {
+                     m_data.reset(Aws::NewArray<T>(m_size, ARRAY_ALLOCATION_TAG));
+
+-#ifdef _WIN32
++#ifdef _MSC_VER
+                     std::copy(arrayToCopy, arrayToCopy + arraySize, stdext::checked_array_iterator< T * >(m_data.get(), m_size));
+ #else
+                     std::copy(arrayToCopy, arrayToCopy + arraySize, m_data.get());
+@@ -82,7 +82,7 @@ namespace Aws
+                     if(arr->m_size > 0 && arr->m_data)
+                     {
+                         size_t arraySize = arr->m_size;
+-#ifdef _WIN32
++#ifdef _MSC_VER
+                         std::copy(arr->m_data.get(), arr->m_data.get() + arraySize, stdext::checked_array_iterator< T * >(m_data.get() + location, m_size));
+ #else
+                         std::copy(arr->m_data.get(), arr->m_data.get() + arraySize, m_data.get() + location);
+@@ -101,7 +101,7 @@ namespace Aws
+                 {
+                     m_data.reset(Aws::NewArray<T>(m_size, ARRAY_ALLOCATION_TAG));
+
+-#ifdef _WIN32
++#ifdef _MSC_VER
+                     std::copy(other.m_data.get(), other.m_data.get() + other.m_size, stdext::checked_array_iterator< T * >(m_data.get(), m_size));
+ #else
+                     std::copy(other.m_data.get(), other.m_data.get() + other.m_size, m_data.get());
+@@ -134,7 +134,7 @@ namespace Aws
+                 {
+                     m_data.reset(Aws::NewArray<T>(m_size, ARRAY_ALLOCATION_TAG));
+
+-#ifdef _WIN32
++#ifdef _MSC_VER
+                     std::copy(other.m_data.get(), other.m_data.get() + other.m_size, stdext::checked_array_iterator< T * >(m_data.get(), m_size));
+ #else
+                     std::copy(other.m_data.get(), other.m_data.get() + other.m_size, m_data.get());
+diff --git a/aws-cpp-sdk-core/source/http/windows/WinHttpSyncHttpClient.cpp b/aws-cpp-sdk-core/source/http/windows/WinHttpSyncHttpClient.cpp
+index 4dade6489..a0456cf8e 100644
+--- a/aws-cpp-sdk-core/source/http/windows/WinHttpSyncHttpClient.cpp
++++ b/aws-cpp-sdk-core/source/http/windows/WinHttpSyncHttpClient.cpp
+@@ -22,6 +22,16 @@
+ #include <sstream>
+ #include <iostream>
+
++#ifndef WINHTTP_OPTION_WEB_SOCKET_KEEPALIVE_INTERVAL
++#define WINHTTP_OPTION_WEB_SOCKET_KEEPALIVE_INTERVAL 116
++#endif
++#ifndef WINHTTP_FLAG_SECURE_PROTOCOL_TLS1_1
++#define WINHTTP_FLAG_SECURE_PROTOCOL_TLS1_1 0x00000200
++#endif
++#ifndef WINHTTP_FLAG_SECURE_PROTOCOL_TLS1_2
++#define WINHTTP_FLAG_SECURE_PROTOCOL_TLS1_2 0x00000800
++#endif
++
+ using namespace Aws::Client;
+ using namespace Aws::Http;
+ using namespace Aws::Http::Standard;
+@@ -272,7 +282,7 @@ bool WinHttpSyncHttpClient::DoQueryHeaders(void* hHttpRequest, std::shared_ptr<H
+     wmemset(contentTypeStr, 0, static_cast<size_t>(dwSize / sizeof(wchar_t)));
+
+     WinHttpQueryHeaders(hHttpRequest, WINHTTP_QUERY_CONTENT_TYPE, nullptr, &contentTypeStr, &dwSize, 0);
+-    if (contentTypeStr[0] != NULL)
++    if (contentTypeStr[0])
+     {
+         Aws::String contentStr = StringUtils::FromWString(contentTypeStr);
+         response->SetContentType(contentStr);
+@@ -303,7 +313,7 @@ bool WinHttpSyncHttpClient::DoQueryHeaders(void* hHttpRequest, std::shared_ptr<H
+
+ bool WinHttpSyncHttpClient::DoSendRequest(void* hHttpRequest) const
+ {
+-    return (WinHttpSendRequest(hHttpRequest, NULL, NULL, 0, 0, 0, NULL) != 0);
++    return (WinHttpSendRequest(hHttpRequest, NULL, 0, NULL, 0, 0, 0) != 0);
+ }
+
+ bool WinHttpSyncHttpClient::DoReadData(void* hHttpRequest, char* body, uint64_t size, uint64_t& read) const
+diff --git a/aws-cpp-sdk-core/source/http/windows/WinINetSyncHttpClient.cpp b/aws-cpp-sdk-core/source/http/windows/WinINetSyncHttpClient.cpp
+index 5854cc334..70184a358 100644
+--- a/aws-cpp-sdk-core/source/http/windows/WinINetSyncHttpClient.cpp
++++ b/aws-cpp-sdk-core/source/http/windows/WinINetSyncHttpClient.cpp
+@@ -225,7 +225,7 @@ bool WinINetSyncHttpClient::DoQueryHeaders(void* hHttpRequest, std::shared_ptr<H
+     char contentTypeStr[1024];
+     dwSize = sizeof(contentTypeStr);
+     HttpQueryInfoA(hHttpRequest, HTTP_QUERY_CONTENT_TYPE, &contentTypeStr, &dwSize, 0);
+-    if (contentTypeStr[0] != NULL)
++    if (contentTypeStr[0])
+     {
+         response->SetContentType(contentTypeStr);
+         AWS_LOGSTREAM_DEBUG(GetLogTag(), "Received content type " << contentTypeStr);
+diff --git a/aws-cpp-sdk-core/source/http/windows/WinSyncHttpClient.cpp b/aws-cpp-sdk-core/source/http/windows/WinSyncHttpClient.cpp
+index d7513cc3c..e390a8d4e 100644
+--- a/aws-cpp-sdk-core/source/http/windows/WinSyncHttpClient.cpp
++++ b/aws-cpp-sdk-core/source/http/windows/WinSyncHttpClient.cpp
+@@ -349,7 +349,7 @@ std::shared_ptr<HttpResponse> WinSyncHttpClient::MakeRequest(const std::shared_p
+         }
+     }
+
+-    if (!success && !IsRequestProcessingEnabled() || !ContinueRequest(*request))
++    if ((!success && !IsRequestProcessingEnabled()) || !ContinueRequest(*request))
+     {
+         response->SetClientErrorType(CoreErrors::USER_CANCELLED);
+         response->SetClientErrorMessage("Request processing disabled or continuation cancelled by user's continuation handler.");
+diff --git a/aws-cpp-sdk-core/source/platform/windows/FileSystem.cpp b/aws-cpp-sdk-core/source/platform/windows/FileSystem.cpp
+index 2ea82de6f..bc423441e 100644
+--- a/aws-cpp-sdk-core/source/platform/windows/FileSystem.cpp
++++ b/aws-cpp-sdk-core/source/platform/windows/FileSystem.cpp
+@@ -11,7 +11,9 @@
+ #include <iostream>
+ #include <Userenv.h>
+
++#ifdef _MSC_VER
+ #pragma warning( disable : 4996)
++#endif
+
+ using namespace Aws::Utils;
+ namespace Aws
+@@ -304,6 +306,9 @@ Aws::String CreateTempFilePath()
+ {
+ #ifdef _MSC_VER
+ #pragma warning(disable: 4996) // _CRT_SECURE_NO_WARNINGS
++#elif !defined(L_tmpnam_s)
++    // Definition from the MSVC stdio.h
++    #define L_tmpnam_s (sizeof("\\") + 16)
+ #endif
+     char s_tempName[L_tmpnam_s+1];
+
+diff --git a/aws-cpp-sdk-core/source/platform/windows/OSVersionInfo.cpp b/aws-cpp-sdk-core/source/platform/windows/OSVersionInfo.cpp
+index 0180f7fbf..3adbab313 100644
+--- a/aws-cpp-sdk-core/source/platform/windows/OSVersionInfo.cpp
++++ b/aws-cpp-sdk-core/source/platform/windows/OSVersionInfo.cpp
+@@ -9,7 +9,9 @@
+
+ #include <iomanip>
+
++#ifdef _MSC_VER
+ #pragma warning(disable: 4996)
++#endif
+ #include <windows.h>
+ #include <stdio.h>
+ namespace Aws
+diff --git a/aws-cpp-sdk-core/source/utils/crypto/factory/Factories.cpp b/aws-cpp-sdk-core/source/utils/crypto/factory/Factories.cpp
+index 2ee517b48..3b0dce665 100644
+--- a/aws-cpp-sdk-core/source/utils/crypto/factory/Factories.cpp
++++ b/aws-cpp-sdk-core/source/utils/crypto/factory/Factories.cpp
+@@ -939,7 +939,7 @@ std::shared_ptr<Aws::Utils::Crypto::HMAC> Aws::Utils::Crypto::CreateSha256HMACIm
+     return GetSha256HMACFactory()->CreateImplementation();
+ }
+
+-#ifdef _WIN32
++#ifdef _MSC_VER
+ #pragma warning( push )
+ #pragma warning( disable : 4702 )
+ #endif
+@@ -1032,7 +1032,7 @@ std::shared_ptr<SymmetricCipher> Aws::Utils::Crypto::CreateAES_KeyWrapImplementa
+     return GetAES_KeyWrapFactory()->CreateImplementation(key);
+ }
+
+-#ifdef _WIN32
++#ifdef _MSC_VER
+ #pragma warning(pop)
+ #endif
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index efeed954006c1..9eac3ef5cb9f2 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,25 +18,22 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=19.0.0.9000
+pkgver=19.0.1.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
 url="https://arrow.apache.org/"
 license=("Apache-2.0")
-depends=("${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp"
-         "${MINGW_PACKAGE_PREFIX}-bzip2"
+depends=("${MINGW_PACKAGE_PREFIX}-bzip2"
          "${MINGW_PACKAGE_PREFIX}-curl" # for google-cloud-cpp bundled build
          "${MINGW_PACKAGE_PREFIX}-libutf8proc"
          "${MINGW_PACKAGE_PREFIX}-re2"
-         "${MINGW_PACKAGE_PREFIX}-thrift"
          "${MINGW_PACKAGE_PREFIX}-snappy"
          "${MINGW_PACKAGE_PREFIX}-zlib"
          "${MINGW_PACKAGE_PREFIX}-lz4"
          "${MINGW_PACKAGE_PREFIX}-zstd"
          "${MINGW_PACKAGE_PREFIX}-brotli")
 makedepends=("${MINGW_PACKAGE_PREFIX}-ccache"
-             "${MINGW_PACKAGE_PREFIX}-cmake"
              "${MINGW_PACKAGE_PREFIX}-gcc")
 options=("staticlibs" "strip" "!buildflags")
 
@@ -82,8 +79,31 @@ build() {
   # CMAKE_UNITY_BUILD is set to OFF as otherwise some compute functionality
   # segfaults in tests
 
+  # We use the bundled AWS SDK instead of the MINGW one because the upstream
+  # one on rtools packages is unmaintained, uses an old version (1.7.365)
+  # and does not work with newer versions of CMake. See comments:
+  # https://github.com/apache/arrow/pull/44989/files#r1901428998
+
+  # We use the bundled Apache Thrift instead of the MINGW one because
+  # the upstream one on rtools packages is unmaintained. Apache Thrift
+  # still have the following problem:
+  #
+  #   https://github.com/apache/thrift/pull/2725
+  #
+  # The original MSYS2 package has another fix:
+  #
+  #   https://github.com/msys2/MINGW-packages/blob/master/mingw-w64-thrift/002-fix-pkgconfig-paths.patch
+  #
+  # But one on rtools packages doesn't have the fix. So we can't use
+  # the MINGW one.
+
+  # MSYS2_ARG_CONV_EXCL is needed to prevent autoconverting CMAKE_INSTALL_PREFIX
+  # to Windows paths. See https://www.msys2.org/docs/filesystem-paths/#process-arguments
+
+  # We require the full path to the CMake executable in order to build Arrow
+  # which is in the Program Files directory: "/c/Program Files/CMake/bin/cmake"
   MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \
-    ${MINGW_PREFIX}/bin/cmake.exe \
+    "${PROGRAMFILES}\CMake\bin\cmake.exe" \
     ${ARROW_CPP_DIR} \
     -G "MSYS Makefiles" \
     -DARROW_ACERO=ON \
@@ -116,10 +136,12 @@ build() {
     -DARROW_WITH_BZ2=ON \
     -DARROW_ZSTD_USE_SHARED=OFF \
     -DARROW_CXXFLAGS="${CPPFLAGS}" \
+    -DAWSSDK_SOURCE=BUNDLED \
     -DCMAKE_BUILD_TYPE="release" \
     -DCMAKE_INSTALL_PREFIX=${MINGW_PREFIX} \
     -DCMAKE_UNITY_BUILD=OFF \
-    -DCMAKE_VERBOSE_MAKEFILE=ON
+    -DCMAKE_VERBOSE_MAKEFILE=ON \
+    -DThrift_SOURCE=BUNDLED
 
   make -j3
   popd
diff --git a/ci/scripts/conan_build.sh b/ci/scripts/conan_build.sh
index 0ea3fc29192dd..03e5cab8426c6 100755
--- a/ci/scripts/conan_build.sh
+++ b/ci/scripts/conan_build.sh
@@ -25,7 +25,6 @@ build_dir=${1}
 shift
 
 export ARROW_HOME=${source_dir}
-export CONAN_HOOK_ERROR_LEVEL=40
 
 conan_args=()
 conan_args+=(--build=missing)
@@ -67,6 +66,7 @@ fi
 
 version=$(grep '^set(ARROW_VERSION ' ${ARROW_HOME}/cpp/CMakeLists.txt | \
             grep -E -o '([0-9.]*)')
+conan_args+=(--version ${version})
 
 rm -rf ~/.conan/data/arrow/
 rm -rf ${build_dir}/conan || sudo rm -rf ${build_dir}/conan
@@ -78,4 +78,4 @@ else
   sudo chown -R $(id -u):$(id -g) ${build_dir}/conan/
 fi
 cd ${build_dir}/conan/all
-conan create . arrow/${version}@ "${conan_args[@]}" "$@"
+conan create . "${conan_args[@]}" "$@"
diff --git a/ci/scripts/conan_setup.sh b/ci/scripts/conan_setup.sh
index bc56ee296a234..d665ce5436b2b 100755
--- a/ci/scripts/conan_setup.sh
+++ b/ci/scripts/conan_setup.sh
@@ -19,5 +19,4 @@
 
 set -eux
 
-conan config install https://github.com/conan-io/hooks.git -sf hooks -tf hooks
-conan config set hooks.conan-center
+conan profile detect
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index c1e7adf6a05e0..9611f94d52209 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -109,7 +109,13 @@ if [ "${ARROW_OFFLINE}" = "ON" ]; then
   echo > /etc/resolv.conf
 fi
 
-if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
+if [ "${ARROW_USE_MESON:-OFF}" = "ON" ]; then
+  meson setup \
+    --prefix=${MESON_PREFIX:-${ARROW_HOME}} \
+    --buildtype=${ARROW_BUILD_TYPE:-debug} \
+    . \
+    ${source_dir}
+elif [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
   if [ "${UBUNTU}" = "20.04" ]; then
     echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04"
     exit -1
@@ -141,7 +147,6 @@ else
     -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \
     -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \
     -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \
-    -DARROW_BUILD_OPENMP_BENCHMARKS=${ARROW_BUILD_OPENMP_BENCHMARKS:-OFF} \
     -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
     -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
     -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
@@ -220,6 +225,7 @@ else
     -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
     -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
     -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
+    -DCUDAToolkit_ROOT=${CUDAToolkit_ROOT:-} \
     -Dgflags_SOURCE=${gflags_SOURCE:-} \
     -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \
     -DgRPC_SOURCE=${gRPC_SOURCE:-} \
@@ -243,8 +249,12 @@ else
     ${source_dir}
 fi
 
-export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-$[${n_jobs} + 1]}
-time cmake --build . --target install
+if [ "${ARROW_USE_MESON:-OFF}" = "ON" ]; then
+  time meson install
+else
+  export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-$[${n_jobs} + 1]}
+  time cmake --build . --target install
+fi
 
 # Save disk space by removing large temporary build products
 find . -name "*.o" -delete
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 7912bf23e491c..36e09e8936f60 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -89,7 +89,13 @@ pushd ${build_dir}
 if [ -z "${PYTHON}" ] && ! which python > /dev/null 2>&1; then
   export PYTHON="${PYTHON:-python3}"
 fi
-ctest \
+if [ "${ARROW_USE_MESON:-OFF}" = "ON" ]; then
+  ARROW_BUILD_EXAMPLES=OFF # TODO: Remove this
+  meson test \
+    --print-errorlogs \
+    "$@"
+else
+  ctest \
     --label-regex unittest \
     --output-on-failure \
     --parallel ${n_jobs} \
@@ -97,6 +103,7 @@ ctest \
     --timeout ${ARROW_CTEST_TIMEOUT:-300} \
     "${ctest_options[@]}" \
     "$@"
+fi
 
 if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then
     examples=$(find ${binary_output_dir} -executable -name "*example")
diff --git a/ci/scripts/install_cmake.sh b/ci/scripts/install_cmake.sh
index 7fdb06d90f02c..d01a7a744dca8 100755
--- a/ci/scripts/install_cmake.sh
+++ b/ci/scripts/install_cmake.sh
@@ -17,29 +17,44 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <prefix>"
+  exit 1
+fi
 
 declare -A archs
-archs=([amd64]=x86_64
-       [arch64]=aarch64
+archs=([x86_64]=x86_64
        [arm64]=aarch64
-       [arm64v8]=aarch64
-       [x86_64]=x86_64)
-
-declare -A platforms
-platforms=([linux]=linux
-           [macos]=macos
-           [windows]=windows)
+       [aarch64]=aarch64)
 
-if [ "$#" -ne 4 ]; then
-  echo "Usage: $0 <architecture> <platform> <version> <prefix>"
-  exit 1
+arch=$(uname -m)
+if [ -z ${archs[$arch]} ]; then
+  echo "Unsupported architecture: ${arch}"
+  exit 0
 fi
+arch=${archs[$arch]}
+
+version=$1
+prefix=$2
 
-arch=${archs[$1]}
-platform=${platforms[$2]}
-version=$3
-prefix=$4
+platform=$(uname)
+case ${platform} in
+  Linux)
+    platform=linux
+    ;;
+  Darwin)
+    platform=macos
+    ;;
+  MSYS_NT*|MINGW64_NT*)
+    platform=windows
+    ;;
+  *)
+    echo "Unsupported platform: ${platform}"
+    exit 0
+    ;;
+esac
 
 mkdir -p ${prefix}
 url="https://github.com/Kitware/CMake/releases/download/v${version}/cmake-${version}-${platform}-"
diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
index 0f8a0804691e7..a84d136c0c2c7 100755
--- a/ci/scripts/install_python.sh
+++ b/ci/scripts/install_python.sh
@@ -28,9 +28,9 @@ declare -A versions
 versions=([3.9]=3.9.13
           [3.10]=3.10.11
           [3.11]=3.11.9
-          [3.12]=3.12.5
-          [3.13]=3.13.0
-          [3.13t]=3.13.0)
+          [3.12]=3.12.9
+          [3.13]=3.13.2
+          [3.13t]=3.13.2)
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <platform> <version>"
@@ -47,17 +47,11 @@ full_version=${versions[$2]}
 if [ $platform = "macOS" ]; then
     echo "Downloading Python installer..."
 
-    if [ "$version" = "3.13" ] || [ "$version" = "3.13t" ];
+    if [ "$(uname -m)" = "x86_64" ] && [ "$version" = "3.9" ];
     then
-        fname="python-${full_version}rc2-macos11.pkg"
-    elif [ "$(uname -m)" = "arm64" ] || \
-         [ "$version" = "3.10" ] || \
-         [ "$version" = "3.11" ] || \
-         [ "$version" = "3.12" ];
-    then
-        fname="python-${full_version}-macos11.pkg"
-    else
         fname="python-${full_version}-macosx10.9.pkg"
+    else
+        fname="python-${full_version}-macos11.pkg"
     fi
     wget "https://www.python.org/ftp/python/${full_version}/${fname}"
 
diff --git a/ci/scripts/integration_substrait.sh b/ci/scripts/integration_substrait.sh
deleted file mode 100755
index 152a8d9440187..0000000000000
--- a/ci/scripts/integration_substrait.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-
-# check that optional pyarrow modules are available
-# because pytest would just skip the substrait tests
-echo "Substrait Integration Tests"
-echo "Validating imports"
-python -c "import pyarrow.substrait"
-python -c "from substrait_consumer.consumers.acero_consumer import AceroConsumer"
-
-echo "Executing pytest"
-cd consumer-testing
-pytest -r s substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py --producer isthmus --consumer acero
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index 833d31059c710..fd6d0591661c1 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -24,7 +24,7 @@ py -0p
 
 %PYTHON_CMD% -m sysconfig || exit /B 1
 
-call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
+call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
 @echo on
 
 echo "=== (%PYTHON%) Clear output directories and leftovers ==="
@@ -56,7 +56,7 @@ set ARROW_WITH_SNAPPY=ON
 set ARROW_WITH_ZLIB=ON
 set ARROW_WITH_ZSTD=ON
 set CMAKE_UNITY_BUILD=ON
-set CMAKE_GENERATOR=Visual Studio 16 2019
+set CMAKE_GENERATOR=Visual Studio 17 2022
 set CMAKE_PLATFORM=x64
 set VCPKG_ROOT=C:\vcpkg
 set VCPKG_FEATURE_FLAGS=-manifests
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index ffe8b388f93df..a686215b93dad 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -58,12 +58,5 @@ py -0p
 @REM Validate wheel contents
 %PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1
 
-@rem Download IANA Timezone Database for ORC C++
-curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
-mkdir %USERPROFILE%\Downloads\test\tzdata
-arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata || exit /B
-set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo
-dir %TZDIR%
-
 @REM Execute unittest
 %PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index d5fd78914755e..67142b66dd7fd 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -91,7 +91,9 @@ export TEXMFVAR=/tmp/texmf-var
 BEFORE=$(ls -alh ~/)
 
 SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
-  if (as_cran) {
+  # generally will be false, but we can override it by setting SKIP_VIGNETTES=true
+  skip_vignettes <- identical(tolower(Sys.getenv('SKIP_VIGNETTES')), 'true')
+  if (as_cran && !skip_vignettes) {
     args <- '--as-cran'
     build_args <- character()
   } else {
diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh
index c9395eb243f76..de92addf08371 100755
--- a/ci/scripts/r_windows_build.sh
+++ b/ci/scripts/r_windows_build.sh
@@ -23,13 +23,8 @@ set -ex
 # Make sure it is absolute and exported
 export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)"
 
-# Uncomment L38-41 if you're testing a new rtools dependency that hasn't yet sync'd to CRAN
-# curl https://raw.githubusercontent.com/r-windows/rtools-packages/master/pacman.conf > /etc/pacman.conf
-# curl -OSsl "http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
-# pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-# pacman --noconfirm -Scc
-
 pacman --noconfirm -Syy
+
 RWINLIB_LIB_DIR="lib"
 : ${MINGW_ARCH:="mingw32 mingw64 ucrt64"}
 
@@ -71,7 +66,7 @@ if [ -d mingw64/lib/ ]; then
   # Move the 64-bit versions of libarrow into the expected location
   mv mingw64/lib/*.a $DST_DIR/lib/x64
   # These are from https://dl.bintray.com/rtools/mingw{32,64}/
-  cp $MSYS_LIB_DIR/mingw64/lib/lib{thrift,snappy,zstd,lz4,brotli*,bz2,crypto,curl,ss*,utf8proc,re2,aws*,nghttp2}.a $DST_DIR/lib/x64
+  cp $MSYS_LIB_DIR/mingw64/lib/lib{snappy,zstd,lz4,brotli*,bz2,crypto,curl,ss*,utf8proc,re2,nghttp2}.a $DST_DIR/lib/x64
 fi
 
 # Same for the 32-bit versions
@@ -79,7 +74,7 @@ if [ -d mingw32/lib/ ]; then
   ls $MSYS_LIB_DIR/mingw32/lib/
   mkdir -p $DST_DIR/lib/i386
   mv mingw32/lib/*.a $DST_DIR/lib/i386
-  cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy,zstd,lz4,brotli*,bz2,crypto,curl,ss*,utf8proc,re2,aws*,nghttp2}.a $DST_DIR/lib/i386
+  cp $MSYS_LIB_DIR/mingw32/lib/lib{snappy,zstd,lz4,brotli*,bz2,crypto,curl,ss*,utf8proc,re2,nghttp2}.a $DST_DIR/lib/i386
 fi
 
 # Do the same also for ucrt64
@@ -87,7 +82,7 @@ if [ -d ucrt64/lib/ ]; then
   ls $MSYS_LIB_DIR/ucrt64/lib/
   mkdir -p $DST_DIR/lib/x64-ucrt
   mv ucrt64/lib/*.a $DST_DIR/lib/x64-ucrt
-  cp $MSYS_LIB_DIR/ucrt64/lib/lib{thrift,snappy,zstd,lz4,brotli*,bz2,crypto,curl,ss*,utf8proc,re2,aws*,nghttp2}.a $DST_DIR/lib/x64-ucrt
+  cp $MSYS_LIB_DIR/ucrt64/lib/lib{snappy,zstd,lz4,brotli*,bz2,crypto,curl,ss*,utf8proc,re2,nghttp2}.a $DST_DIR/lib/x64-ucrt
 fi
 
 # Create build artifact
diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch
index 67fb2a4a3ea76..39b51874b1c0e 100644
--- a/ci/vcpkg/ports.patch
+++ b/ci/vcpkg/ports.patch
@@ -29,17 +29,16 @@ index a79c72a59..6b7fa6a66 100644
  
  vcpkg_cmake_install(ADD_BIN_TO_PATH)
 diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake
-index 0c7098082..c603c3653 100644
+index 0312b2ae1..fdb576b5f 100644
 --- a/ports/snappy/portfile.cmake
 +++ b/ports/snappy/portfile.cmake
-@@ -10,6 +10,7 @@ vcpkg_from_github(
-     PATCHES
+@@ -8,5 +8,6 @@ vcpkg_from_github(
          fix_clang-cl_build.patch
          no-werror.patch
+         pkgconfig.diff
 +        "snappy-disable-bmi.patch"
  )
- 
- vcpkg_cmake_configure(
+ file(COPY "${CURRENT_PORT_DIR}/snappy.pc.in" DESTINATION "${SOURCE_PATH}")
 diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
 new file mode 100644
 index 000000000..e839c93a4
diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json
index a45adbb6a751e..5dfe61a0c6062 100644
--- a/ci/vcpkg/vcpkg.json
+++ b/ci/vcpkg/vcpkg.json
@@ -75,6 +75,7 @@
         {
           "name": "llvm",
           "default-features": false,
+          "version>=": "18.1",
           "features": [
             "clang",
             "default-targets",
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a7d80c2e96c23..f2500b3a72f40 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 3.16)
+cmake_minimum_required(VERSION 3.25)
 message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
 
 # https://www.cmake.org/cmake/help/latest/policy/CMP0025.html
diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json
index 85febbc5c9a7c..114f79271d282 100644
--- a/cpp/CMakePresets.json
+++ b/cpp/CMakePresets.json
@@ -41,7 +41,6 @@
       "cacheVariables": {
         "ARROW_BUILD_BENCHMARKS": "ON",
         "ARROW_BUILD_BENCHMARKS_REFERENCE": "ON",
-        "ARROW_BUILD_OPENMP_BENCHMARKS": "ON",
         "ARROW_BUILD_DETAILED_BENCHMARKS": "OFF",
         "CMAKE_BUILD_TYPE": "RelWithDebInfo"
       }
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 43e4e7603cfbf..ee6315f8f0f9a 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -243,9 +243,6 @@ takes precedence over ccache if a storage backend is configured" ON)
   define_option(ARROW_BUILD_BENCHMARKS_REFERENCE
                 "Build the Arrow micro reference benchmarks" OFF)
 
-  define_option(ARROW_BUILD_OPENMP_BENCHMARKS
-                "Build the Arrow benchmarks that rely on OpenMP" OFF)
-
   define_option(ARROW_BUILD_DETAILED_BENCHMARKS
                 "Build benchmarks that do a longer exploration of performance" OFF)
 
diff --git a/cpp/cmake_modules/FindThriftAlt.cmake b/cpp/cmake_modules/FindThriftAlt.cmake
index 98a706deb9919..0c5aed8e4e06c 100644
--- a/cpp/cmake_modules/FindThriftAlt.cmake
+++ b/cpp/cmake_modules/FindThriftAlt.cmake
@@ -32,35 +32,20 @@ if(ThriftAlt_FOUND)
   return()
 endif()
 
-# There are some problems in ThriftConfig.cmake provided by MSYS2 and
-# conda on Windows:
-#
-#   * https://github.com/conda-forge/thrift-cpp-feedstock/issues/68
-#   * https://github.com/msys2/MINGW-packages/issues/6619#issuecomment-649728718
-#
-# We can remove the following "if(NOT WIN32)" condition once the
-# followings are fixed and a new version that includes these fixes is
-# published by MSYS2 and conda:
-#
-#   * https://github.com/apache/thrift/pull/2725
-#   * https://github.com/apache/thrift/pull/2726
-#   * https://github.com/conda-forge/thrift-cpp-feedstock/issues/68
-if(NOT WIN32)
-  set(find_package_args "")
-  if(ThriftAlt_FIND_VERSION)
-    list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
-  endif()
-  if(ThriftAlt_FIND_QUIETLY)
-    list(APPEND find_package_args QUIET)
-  endif()
-  find_package(Thrift ${find_package_args})
-  if(Thrift_FOUND)
-    set(ThriftAlt_FOUND TRUE)
-    add_executable(thrift::compiler IMPORTED)
-    set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
-                                                      "${THRIFT_COMPILER}")
-    return()
-  endif()
+set(find_package_args "")
+if(ThriftAlt_FIND_VERSION)
+  list(APPEND find_package_args ${ThriftAlt_FIND_VERSION})
+endif()
+if(ThriftAlt_FIND_QUIETLY)
+  list(APPEND find_package_args QUIET)
+endif()
+find_package(Thrift ${find_package_args})
+if(Thrift_FOUND)
+  set(ThriftAlt_FOUND TRUE)
+  add_executable(thrift::compiler IMPORTED)
+  set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
+                                                    "${THRIFT_COMPILER}")
+  return()
 endif()
 
 function(extract_thrift_version)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index cbc96ce397fd6..ef7b0b008f29f 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -386,14 +386,14 @@ if(ARROW_WITH_OPENTELEMETRY)
   set(ARROW_WITH_PROTOBUF ON)
 endif()
 
-if(ARROW_THRIFT)
-  set(ARROW_WITH_ZLIB ON)
-endif()
-
 if(ARROW_PARQUET)
   set(ARROW_WITH_THRIFT ON)
 endif()
 
+if(ARROW_WITH_THRIFT)
+  set(ARROW_WITH_ZLIB ON)
+endif()
+
 if(ARROW_FLIGHT)
   set(ARROW_WITH_GRPC ON)
 endif()
@@ -1256,13 +1256,19 @@ endif()
 # - Gandiva has a compile-time (header-only) dependency on Boost, not runtime.
 # - Tests need Boost at runtime.
 # - S3FS and Flight benchmarks need Boost at runtime.
+# - arrow_testing uses boost::filesystem. So arrow_testing requires
+#   Boost library. (boost::filesystem isn't header-only.) But if we
+#   use arrow_testing as a static library without
+#   using arrow::util::Process, we don't need boost::filesystem.
 if(ARROW_BUILD_INTEGRATION
    OR ARROW_BUILD_TESTS
    OR (ARROW_FLIGHT AND (ARROW_TESTING OR ARROW_BUILD_BENCHMARKS))
-   OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS))
+   OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS)
+   OR (ARROW_TESTING AND ARROW_BUILD_SHARED))
   set(ARROW_USE_BOOST TRUE)
   set(ARROW_BOOST_REQUIRE_LIBRARY TRUE)
 elseif(ARROW_GANDIVA
+       OR ARROW_TESTING
        OR ARROW_WITH_THRIFT
        OR (NOT ARROW_USE_NATIVE_INT128))
   set(ARROW_USE_BOOST TRUE)
@@ -1767,9 +1773,10 @@ macro(build_thrift)
   if(DEFINED BOOST_ROOT)
     list(APPEND THRIFT_CMAKE_ARGS "-DBOOST_ROOT=${BOOST_ROOT}")
   endif()
-  if(DEFINED Boost_INCLUDE_DIR)
-    list(APPEND THRIFT_CMAKE_ARGS "-DBoost_INCLUDE_DIR=${Boost_INCLUDE_DIR}")
-  endif()
+  list(APPEND
+       THRIFT_CMAKE_ARGS
+       "-DBoost_INCLUDE_DIR=$<TARGET_PROPERTY:Boost::headers,INTERFACE_INCLUDE_DIRECTORIES>"
+  )
   if(DEFINED Boost_NAMESPACE)
     list(APPEND THRIFT_CMAKE_ARGS "-DBoost_NAMESPACE=${Boost_NAMESPACE}")
   endif()
@@ -4640,6 +4647,10 @@ function(build_orc)
     set(ZLIB_HOME
         ${ZLIB_ROOT}
         CACHE STRING "" FORCE)
+    # From CMake 3.21 onwards the set(CACHE) command does not remove any normal
+    # variable of the same name from the current scope. We have to manually remove
+    # the variable via unset to avoid ORC not finding the ZLIB_LIBRARY.
+    unset(ZLIB_LIBRARY)
     set(ZLIB_LIBRARY
         ZLIB::ZLIB
         CACHE STRING "" FORCE)
@@ -5044,6 +5055,18 @@ macro(build_awssdk)
     string(APPEND AWS_C_FLAGS " -Wno-deprecated")
     string(APPEND AWS_CXX_FLAGS " -Wno-deprecated")
   endif()
+  # GH-44950: This is required to build under Rtools40 and we may be able to
+  # remove it if/when we no longer need to build under Rtools40
+  if(WIN32 AND NOT MSVC)
+    string(APPEND
+           AWS_C_FLAGS
+           " -D_WIN32_WINNT=0x0601 -D__USE_MINGW_ANSI_STDIO=1 -Wno-error -Wno-error=format= -Wno-error=format-extra-args -Wno-unused-local-typedefs -Wno-unused-variable"
+    )
+    string(APPEND
+           AWS_CXX_FLAGS
+           " -D_WIN32_WINNT=0x0601 -D__USE_MINGW_ANSI_STDIO=1 -Wno-error -Wno-error=format= -Wno-error=format-extra-args -Wno-unused-local-typedefs -Wno-unused-variable"
+    )
+  endif()
 
   set(AWSSDK_COMMON_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS}
@@ -5081,6 +5104,28 @@ macro(build_awssdk)
   endif()
   list(APPEND AWSSDK_PATCH_COMMAND ${AWSSDK_UNUSED_DIRECTORIES})
 
+  # Patch parts of the AWSSDK EP so it builds cleanly under Rtools40
+  if(WIN32 AND NOT MSVC)
+    find_program(PATCH patch REQUIRED)
+    # Patch aws_c_common to build under Rtools40
+    set(AWS_C_COMMON_PATCH_COMMAND ${PATCH} -p1 -i
+                                   ${CMAKE_SOURCE_DIR}/../ci/rtools/aws_c_common_ep.patch)
+    message(STATUS "Hello ${AWS_C_COMMON_PATCH_COMMAND}")
+    # aws_c_io_ep to build under Rtools40
+    set(AWS_C_IO_PATCH_COMMAND ${PATCH} -p1 -i
+                               ${CMAKE_SOURCE_DIR}/../ci/rtools/aws_c_io_ep.patch)
+    message(STATUS "Hello ${AWS_C_IO_PATCH_COMMAND}")
+    # awssdk_ep to build under Rtools40
+    list(APPEND
+         AWSSDK_PATCH_COMMAND
+         &&
+         ${PATCH}
+         -p1
+         -i
+         ${CMAKE_SOURCE_DIR}/../ci/rtools/awssdk_ep.patch)
+    message(STATUS "Hello ${AWSSDK_PATCH_COMMAND}")
+  endif()
+
   if(UNIX)
     # on Linux and macOS curl seems to be required
     find_curl()
@@ -5175,6 +5220,7 @@ macro(build_awssdk)
                       ${EP_COMMON_OPTIONS}
                       URL ${AWS_C_COMMON_SOURCE_URL}
                       URL_HASH "SHA256=${ARROW_AWS_C_COMMON_BUILD_SHA256_CHECKSUM}"
+                      PATCH_COMMAND ${AWS_C_COMMON_PATCH_COMMAND}
                       CMAKE_ARGS ${AWSSDK_COMMON_CMAKE_ARGS}
                       BUILD_BYPRODUCTS ${AWS_C_COMMON_STATIC_LIBRARY})
   add_dependencies(AWS::aws-c-common aws_c_common_ep)
@@ -5270,6 +5316,7 @@ macro(build_awssdk)
                       ${EP_COMMON_OPTIONS}
                       URL ${AWS_C_IO_SOURCE_URL}
                       URL_HASH "SHA256=${ARROW_AWS_C_IO_BUILD_SHA256_CHECKSUM}"
+                      PATCH_COMMAND ${AWS_C_IO_PATCH_COMMAND}
                       CMAKE_ARGS ${AWSSDK_COMMON_CMAKE_ARGS}
                       BUILD_BYPRODUCTS ${AWS_C_IO_STATIC_LIBRARY}
                       DEPENDS ${AWS_C_IO_DEPENDS})
diff --git a/cpp/examples/minimal_build/minimal.dockerfile b/cpp/examples/minimal_build/minimal.dockerfile
index 9361fc5e81d4d..8062e9b698437 100644
--- a/cpp/examples/minimal_build/minimal.dockerfile
+++ b/cpp/examples/minimal_build/minimal.dockerfile
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM ubuntu:focal
+FROM ubuntu:24.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
diff --git a/cpp/examples/minimal_build/system_dependency.dockerfile b/cpp/examples/minimal_build/system_dependency.dockerfile
index 926fcaf6f4baa..84a16c4902f3a 100644
--- a/cpp/examples/minimal_build/system_dependency.dockerfile
+++ b/cpp/examples/minimal_build/system_dependency.dockerfile
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM ubuntu:focal
+FROM ubuntu:24.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
diff --git a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt
index 0480391e3800e..189d17914d678 100644
--- a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt
+++ b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt
@@ -16,7 +16,7 @@
 # under the License.
 
 # Require cmake that supports BYPRODUCTS in add_custom_command, ExternalProject_Add [1].
-cmake_minimum_required(VERSION 3.16)
+cmake_minimum_required(VERSION 3.25)
 
 project(parquet_arrow_example)
 
diff --git a/cpp/examples/tutorial_examples/CMakeLists.txt b/cpp/examples/tutorial_examples/CMakeLists.txt
index 8788501484c87..a6f8350c41dfe 100644
--- a/cpp/examples/tutorial_examples/CMakeLists.txt
+++ b/cpp/examples/tutorial_examples/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 3.16)
+cmake_minimum_required(VERSION 3.25)
 
 project(ArrowTutorialExamples)
 
diff --git a/cpp/examples/tutorial_examples/tutorial.dockerfile b/cpp/examples/tutorial_examples/tutorial.dockerfile
index 9361fc5e81d4d..8062e9b698437 100644
--- a/cpp/examples/tutorial_examples/tutorial.dockerfile
+++ b/cpp/examples/tutorial_examples/tutorial.dockerfile
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM ubuntu:focal
+FROM ubuntu:24.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
diff --git a/cpp/meson.build b/cpp/meson.build
new file mode 100644
index 0000000000000..9eca739b82522
--- /dev/null
+++ b/cpp/meson.build
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+project(
+    'arrow',
+    'cpp',
+    'c',
+    version: '20.0.0-SNAPSHOT',
+    license: 'Apache-2.0',
+    meson_version: '>=1.3.0',
+    default_options: [
+        'buildtype=release',
+        'c_std=c99',
+        'warning_level=2',
+        'cpp_std=c++17',
+    ],
+)
+
+project_args = [
+    '-Wno-unused-parameter',
+    '-Wno-array-bounds',
+    '-Wno-stringop-overflow',
+    '-Wno-aggressive-loop-optimizations',
+    '-Wno-nonnull',
+]
+
+c_compiler = meson.get_compiler('c')
+c_args = c_compiler.get_supported_arguments(project_args)
+add_project_arguments(c_args, language: 'c')
+
+cpp_compiler = meson.get_compiler('cpp')
+cpp_args = cpp_compiler.get_supported_arguments(project_args)
+add_project_arguments(cpp_args, language: 'cpp')
+
+git_id = get_option('git_id')
+if git_id == ''
+    git_id = run_command('git', 'log', '-n1', '--format=%H', check: false).stdout().strip()
+endif
+
+git_description = get_option('git_description')
+if git_description == ''
+    git_description = run_command('git', 'describe', '--tags', check: false).stdout().strip()
+endif
+
+subdir('src/arrow')
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from b/cpp/meson.options
similarity index 71%
rename from dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from
rename to cpp/meson.options
index 52ab48b66f223..1391cd361c691 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal-arm64/from
+++ b/cpp/meson.options
@@ -6,7 +6,7 @@
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
-#   http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
@@ -15,4 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
-arm64v8/ubuntu:focal
+option(
+    'git_id',
+    type: 'string',
+)
+
+option(
+    'git_description',
+    type: 'string',
+)
+
+option(
+    'package_kind',
+    type: 'string',
+    description: 'Arbitrary string that identifies the kind of package (for informational purposes)',
+)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index eb9860b240f16..775e3633aa4a3 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -529,6 +529,7 @@ set(ARROW_UTIL_SRCS
     util/logger.cc
     util/logging.cc
     util/key_value_metadata.cc
+    util/math_internal.cc
     util/memory.cc
     util/mutex.cc
     util/ree_util.cc
@@ -752,10 +753,12 @@ if(ARROW_COMPUTE)
        ARROW_COMPUTE_SRCS
        compute/kernels/aggregate_basic.cc
        compute/kernels/aggregate_mode.cc
+       compute/kernels/aggregate_pivot.cc
        compute/kernels/aggregate_quantile.cc
        compute/kernels/aggregate_tdigest.cc
        compute/kernels/aggregate_var_std.cc
        compute/kernels/hash_aggregate.cc
+       compute/kernels/pivot_internal.cc
        compute/kernels/scalar_arithmetic.cc
        compute/kernels/scalar_boolean.cc
        compute/kernels/scalar_compare.cc
diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt
index 54269f1df0eb6..e6aa0560dfa80 100644
--- a/cpp/src/arrow/acero/CMakeLists.txt
+++ b/cpp/src/arrow/acero/CMakeLists.txt
@@ -221,18 +221,7 @@ if(ARROW_BUILD_BENCHMARKS)
 
   add_arrow_acero_benchmark(aggregate_benchmark SOURCES aggregate_benchmark.cc)
 
-  if(ARROW_BUILD_OPENMP_BENCHMARKS)
-    find_package(OpenMP REQUIRED)
-    add_arrow_acero_benchmark(hash_join_benchmark
-                              EXTRA_LINK_LIBS
-                              OpenMP::OpenMP_CXX
-                              SOURCES
-                              hash_join_benchmark.cc)
-    if(MSVC)
-      target_compile_options(arrow-compute-hash-join-benchmark
-                             PRIVATE "-openmp:experimental -openmp:llvm")
-    endif()
-  endif()
+  add_arrow_acero_benchmark(hash_join_benchmark SOURCES hash_join_benchmark.cc)
 
   if(ARROW_BUILD_STATIC)
     target_link_libraries(arrow-acero-expression-benchmark PUBLIC arrow_acero_static)
@@ -240,17 +229,13 @@ if(ARROW_BUILD_BENCHMARKS)
     target_link_libraries(arrow-acero-project-benchmark PUBLIC arrow_acero_static)
     target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_static)
     target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_static)
-    if(ARROW_BUILD_OPENMP_BENCHMARKS)
-      target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_static)
-    endif()
+    target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_static)
   else()
     target_link_libraries(arrow-acero-expression-benchmark PUBLIC arrow_acero_shared)
     target_link_libraries(arrow-acero-filter-benchmark PUBLIC arrow_acero_shared)
     target_link_libraries(arrow-acero-project-benchmark PUBLIC arrow_acero_shared)
     target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_shared)
     target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_shared)
-    if(ARROW_BUILD_OPENMP_BENCHMARKS)
-      target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_shared)
-    endif()
+    target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_shared)
   endif()
 endif()
diff --git a/cpp/src/arrow/acero/accumulation_queue.h b/cpp/src/arrow/acero/accumulation_queue.h
index 92d62d5d99d16..b0e0b85a4f3d0 100644
--- a/cpp/src/arrow/acero/accumulation_queue.h
+++ b/cpp/src/arrow/acero/accumulation_queue.h
@@ -34,7 +34,7 @@ using arrow::compute::ExecBatch;
 
 /// \brief A container that accumulates batches until they are ready to
 ///        be processed.
-class AccumulationQueue {
+class ARROW_ACERO_EXPORT AccumulationQueue {
  public:
   AccumulationQueue() : row_count_(0) {}
   ~AccumulationQueue() = default;
diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc
index 3ab976e671ccf..bfba3b5e61703 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -1103,7 +1103,7 @@ class AsofJoinNode : public ExecNode {
 
   void ProcessThread() {
     for (;;) {
-      if (!process_.Pop()) {
+      if (!process_.WaitAndPop()) {
         EndFromProcessThread();
         return;
       }
diff --git a/cpp/src/arrow/acero/concurrent_queue_internal.h b/cpp/src/arrow/acero/concurrent_queue_internal.h
index 20ec2089bee41..a751db70262f9 100644
--- a/cpp/src/arrow/acero/concurrent_queue_internal.h
+++ b/cpp/src/arrow/acero/concurrent_queue_internal.h
@@ -31,40 +31,40 @@ namespace arrow::acero {
 template <class T>
 class ConcurrentQueue {
  public:
-  // Pops the last item from the queue. Must be called on a non-empty queue
-  //
-  T Pop() {
+  // Pops the last item from the queue but waits if the queue is empty until new items are
+  // pushed.
+  T WaitAndPop() {
     std::unique_lock<std::mutex> lock(mutex_);
-    cond_.wait(lock, [&] { return !queue_.empty(); });
+    WaitUntilNonEmpty(lock);
     return PopUnlocked();
   }
 
   // Pops the last item from the queue, or returns a nullopt if empty
-  //
   std::optional<T> TryPop() {
     std::unique_lock<std::mutex> lock(mutex_);
     return TryPopUnlocked();
   }
 
   // Pushes an item to the queue
-  //
   void Push(const T& item) {
     std::unique_lock<std::mutex> lock(mutex_);
     return PushUnlocked(item);
   }
 
   // Clears the queue
-  //
   void Clear() {
     std::unique_lock<std::mutex> lock(mutex_);
     ClearUnlocked();
   }
 
+  // Checks if the queue is empty
   bool Empty() const {
     std::unique_lock<std::mutex> lock(mutex_);
     return queue_.empty();
   }
 
+  // Returns a reference to the next element in the queue. Must be called on a non-empty
+  // queue
   const T& Front() const {
     // Need to lock the queue because `front()` may be implemented in terms
     // of `begin()`, which isn't safe with concurrent calls to e.g. `push()`.
@@ -78,6 +78,10 @@ class ConcurrentQueue {
 
   size_t SizeUnlocked() const { return queue_.size(); }
 
+  void WaitUntilNonEmpty(std::unique_lock<std::mutex>& lock) {
+    cond_.wait(lock, [&] { return !queue_.empty(); });
+  }
+
   T PopUnlocked() {
     auto item = queue_.front();
     queue_.pop();
@@ -130,28 +134,34 @@ class BackpressureConcurrentQueue : public ConcurrentQueue<T> {
   explicit BackpressureConcurrentQueue(BackpressureHandler handler)
       : handler_(std::move(handler)) {}
 
-  T Pop() {
+  // Pops the last item from the queue but waits if the queue is empty until new items are
+  // pushed.
+  T WaitAndPop() {
     std::unique_lock<std::mutex> lock(ConcurrentQueue<T>::GetMutex());
+    ConcurrentQueue<T>::WaitUntilNonEmpty(lock);
     DoHandle do_handle(*this);
     return ConcurrentQueue<T>::PopUnlocked();
   }
 
-  void Push(const T& item) {
+  // Pops the last item from the queue, or returns a nullopt if empty
+  std::optional<T> TryPop() {
     std::unique_lock<std::mutex> lock(ConcurrentQueue<T>::GetMutex());
     DoHandle do_handle(*this);
-    ConcurrentQueue<T>::PushUnlocked(item);
+    return ConcurrentQueue<T>::TryPopUnlocked();
   }
 
-  void Clear() {
+  // Pushes an item to the queue
+  void Push(const T& item) {
     std::unique_lock<std::mutex> lock(ConcurrentQueue<T>::GetMutex());
     DoHandle do_handle(*this);
-    ConcurrentQueue<T>::ClearUnlocked();
+    ConcurrentQueue<T>::PushUnlocked(item);
   }
 
-  std::optional<T> TryPop() {
+  // Clears the queue
+  void Clear() {
     std::unique_lock<std::mutex> lock(ConcurrentQueue<T>::GetMutex());
     DoHandle do_handle(*this);
-    return ConcurrentQueue<T>::TryPopUnlocked();
+    ConcurrentQueue<T>::ClearUnlocked();
   }
 
   Status ForceShutdown() { return handler_.ForceShutdown(); }
diff --git a/cpp/src/arrow/acero/groupby_aggregate_node.cc b/cpp/src/arrow/acero/groupby_aggregate_node.cc
index 06b034ab2d459..2beef360b45d4 100644
--- a/cpp/src/arrow/acero/groupby_aggregate_node.cc
+++ b/cpp/src/arrow/acero/groupby_aggregate_node.cc
@@ -282,6 +282,11 @@ Status GroupByNode::Merge() {
       DCHECK(state0->agg_states[span_i]);
       batch_ctx.SetState(state0->agg_states[span_i].get());
 
+      // XXX this resizes each KernelState (state0->agg_states[span_i]) multiple times.
+      // An alternative would be a two-pass algorithm:
+      // 1. Compute all transpositions (one per local state) and the final number of
+      // groups.
+      // 2. Process all agg kernels, resizing each KernelState only once.
       RETURN_NOT_OK(
           agg_kernels_[span_i]->resize(&batch_ctx, state0->grouper->num_groups()));
       RETURN_NOT_OK(agg_kernels_[span_i]->merge(
diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc
index 7f4b6dd75272f..347bb962691ea 100644
--- a/cpp/src/arrow/acero/hash_aggregate_test.cc
+++ b/cpp/src/arrow/acero/hash_aggregate_test.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <limits>
 #include <memory>
+#include <random>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
@@ -30,16 +31,14 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_primitive.h"
 #include "arrow/array/concatenate.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api_aggregate.h"
-#include "arrow/compute/api_scalar.h"
-#include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
-#include "arrow/compute/kernels/aggregate_internal.h"
-#include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/table.h"
@@ -50,9 +49,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/async_generator.h"
-#include "arrow/util/bitmap_reader.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/int_util_overflow.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
@@ -64,7 +61,6 @@ using testing::HasSubstr;
 
 namespace arrow {
 
-using internal::BitmapReader;
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 using internal::ToChars;
@@ -77,9 +73,11 @@ using compute::ExecBatchFromJSON;
 using compute::ExecSpan;
 using compute::FunctionOptions;
 using compute::Grouper;
+using compute::PivotWiderOptions;
 using compute::RowSegmenter;
 using compute::ScalarAggregateOptions;
 using compute::Segment;
+using compute::SkewOptions;
 using compute::SortIndices;
 using compute::SortKey;
 using compute::SortOrder;
@@ -565,6 +563,7 @@ class GroupBy : public ::testing::TestWithParam<GroupByFunction> {
     return acero::GroupByTest(GetParam(), arguments, keys, aggregates, use_threads);
   }
 
+  // This is not named GroupByTest to avoid ambiguities between overloads
   Result<Datum> AltGroupBy(const std::vector<Datum>& arguments,
                            const std::vector<Datum>& keys,
                            const std::vector<Datum>& segment_keys,
@@ -574,6 +573,70 @@ class GroupBy : public ::testing::TestWithParam<GroupByFunction> {
                       /*naive=*/false);
   }
 
+  Result<Datum> RunPivot(const std::shared_ptr<DataType>& key_type,
+                         const std::shared_ptr<DataType>& value_type,
+                         const PivotWiderOptions& options,
+                         const std::shared_ptr<Table>& table, bool use_threads = false) {
+    Aggregate agg{"hash_pivot_wider", std::make_shared<PivotWiderOptions>(options),
+                  /*target=*/std::vector<FieldRef>{"agg_0", "agg_1"}, /*name=*/"out"};
+    ARROW_ASSIGN_OR_RAISE(
+        Datum aggregated_and_grouped,
+        AltGroupBy({table->GetColumnByName("key"), table->GetColumnByName("value")},
+                   {table->GetColumnByName("group_key")},
+                   /*segment_keys=*/{}, {agg}, use_threads));
+    ValidateOutput(aggregated_and_grouped);
+    return aggregated_and_grouped;
+  }
+
+  Result<Datum> RunPivot(const std::shared_ptr<DataType>& key_type,
+                         const std::shared_ptr<DataType>& value_type,
+                         const PivotWiderOptions& options,
+                         const std::vector<std::string>& table_json,
+                         bool use_threads = false) {
+    auto table =
+        TableFromJSON(schema({field("group_key", int64()), field("key", key_type),
+                              field("value", value_type)}),
+                      table_json);
+    return RunPivot(key_type, value_type, options, table, use_threads);
+  }
+
+  void CheckPivoted(const std::shared_ptr<DataType>& key_type,
+                    const std::shared_ptr<DataType>& value_type,
+                    const PivotWiderOptions& options, const Datum& pivoted,
+                    const std::string& expected_json) {
+    FieldVector pivoted_fields;
+    for (const auto& key_name : options.key_names) {
+      pivoted_fields.push_back(field(key_name, value_type));
+    }
+    auto expected_type = struct_({
+        field("key_0", int64()),
+        field("out", struct_(std::move(pivoted_fields))),
+    });
+    auto expected = ArrayFromJSON(expected_type, expected_json);
+    AssertDatumsEqual(expected, pivoted, /*verbose=*/true);
+  }
+
+  void TestPivot(const std::shared_ptr<DataType>& key_type,
+                 const std::shared_ptr<DataType>& value_type,
+                 const PivotWiderOptions& options,
+                 const std::vector<std::string>& table_json,
+                 const std::string& expected_json, bool use_threads) {
+    ASSERT_OK_AND_ASSIGN(
+        auto pivoted, RunPivot(key_type, value_type, options, table_json, use_threads));
+    CheckPivoted(key_type, value_type, options, pivoted, expected_json);
+  }
+
+  void TestPivot(const std::shared_ptr<DataType>& key_type,
+                 const std::shared_ptr<DataType>& value_type,
+                 const PivotWiderOptions& options,
+                 const std::vector<std::string>& table_json,
+                 const std::string& expected_json) {
+    for (bool use_threads : {false, true}) {
+      ARROW_SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+      TestPivot(key_type, value_type, options, table_json, expected_json, use_threads);
+    }
+  }
+
   void TestSegmentKey(const std::shared_ptr<Table>& table, Datum output,
                       const std::vector<Datum>& segment_keys) {
     return acero::TestSegmentKey(GetParam(), table, output, segment_keys);
@@ -1018,9 +1081,67 @@ TEST_P(GroupBy, MeanOverflow) {
   }
 }
 
-TEST_P(GroupBy, VarianceAndStddev) {
+TEST_P(GroupBy, VarianceStddevSkewKurtosis) {
+  for (auto value_type : {int32(), float64()}) {
+    ARROW_SCOPED_TRACE("value_type = ", *value_type);
+    auto batch = RecordBatchFromJSON(
+        schema({field("argument", value_type), field("key", int64())}), R"([
+      [1,   1],
+      [null,  1],
+      [0,   2],
+      [null,  3],
+      [4,   null],
+      [3,  1],
+      [0, 2],
+      [-1, 2],
+      [1,  null],
+      [null,  3]
+    ])");
+
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         GroupByTest(
+                             {
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                             },
+                             {
+                                 batch->GetColumnByName("key"),
+                             },
+                             {},
+                             {
+                                 {"hash_variance", nullptr},
+                                 {"hash_stddev", nullptr},
+                                 {"hash_skew", nullptr},
+                                 {"hash_kurtosis", nullptr},
+                             },
+                             false));
+
+    auto expected = ArrayFromJSON(struct_({
+                                      field("key_0", int64()),
+                                      field("hash_variance", float64()),
+                                      field("hash_stddev", float64()),
+                                      field("hash_skew", float64()),
+                                      field("hash_kurtosis", float64()),
+                                  }),
+                                  R"([
+      [1,    1.0,                 1.0,                0.0,                 -2.0],
+      [2,    0.22222222222222224, 0.4714045207910317, -0.7071067811865478, -1.5],
+      [3,    null,                null,               null,                null],
+      [null, 2.25,                1.5,                0.0,                 -2.0]
+    ])");
+    AssertDatumsApproxEqual(expected, aggregated_and_grouped,
+                            /*verbose=*/true);
+  }
+}
+
+TEST_P(GroupBy, VarianceAndStddevDdof) {
+  // Test ddof
+  auto variance_options = std::make_shared<VarianceOptions>(/*ddof=*/2);
+
   auto batch = RecordBatchFromJSON(
-      schema({field("argument", int32()), field("key", int64())}), R"([
+      schema({field("argument", float64()), field("key", int64())}), R"([
     [1,   1],
     [null,  1],
     [0,   2],
@@ -1032,83 +1153,7 @@ TEST_P(GroupBy, VarianceAndStddev) {
     [1,  null],
     [null,  3]
   ])");
-
   ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       GroupByTest(
-                           {
-                               batch->GetColumnByName("argument"),
-                               batch->GetColumnByName("argument"),
-                           },
-                           {
-                               batch->GetColumnByName("key"),
-                           },
-                           {},
-                           {
-                               {"hash_variance", nullptr},
-                               {"hash_stddev", nullptr},
-                           },
-                           false));
-
-  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
-                                            field("key_0", int64()),
-                                            field("hash_variance", float64()),
-                                            field("hash_stddev", float64()),
-                                        }),
-                                        R"([
-    [1,    1.0,                 1.0               ],
-    [2,    0.22222222222222224, 0.4714045207910317],
-    [3,    null,                null              ],
-    [null, 2.25,                1.5               ]
-  ])"),
-                          aggregated_and_grouped,
-                          /*verbose=*/true);
-
-  batch = RecordBatchFromJSON(
-      schema({field("argument", float64()), field("key", int64())}), R"([
-    [1.0,   1],
-    [null,  1],
-    [0.0,   2],
-    [null,  3],
-    [4.0,   null],
-    [3.0,  1],
-    [0.0, 2],
-    [-1.0, 2],
-    [1.0,  null],
-    [null,  3]
-  ])");
-
-  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped, GroupByTest(
-                                                   {
-                                                       batch->GetColumnByName("argument"),
-                                                       batch->GetColumnByName("argument"),
-                                                   },
-                                                   {
-                                                       batch->GetColumnByName("key"),
-                                                   },
-                                                   {},
-                                                   {
-                                                       {"hash_variance", nullptr},
-                                                       {"hash_stddev", nullptr},
-                                                   },
-                                                   false));
-
-  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
-                                            field("key_0", int64()),
-                                            field("hash_variance", float64()),
-                                            field("hash_stddev", float64()),
-                                        }),
-                                        R"([
-    [1,    1.0,                 1.0               ],
-    [2,    0.22222222222222224, 0.4714045207910317],
-    [3,    null,                null              ],
-    [null, 2.25,                1.5               ]
-  ])"),
-                          aggregated_and_grouped,
-                          /*verbose=*/true);
-
-  // Test ddof
-  auto variance_options = std::make_shared<VarianceOptions>(/*ddof=*/2);
-  ASSERT_OK_AND_ASSIGN(aggregated_and_grouped,
                        GroupByTest(
                            {
                                batch->GetColumnByName("argument"),
@@ -1139,55 +1184,59 @@ TEST_P(GroupBy, VarianceAndStddev) {
                           /*verbose=*/true);
 }
 
-TEST_P(GroupBy, VarianceAndStddevDecimal) {
-  auto batch = RecordBatchFromJSON(
-      schema({field("argument0", decimal128(3, 2)), field("argument1", decimal128(3, 2)),
-              field("key", int64())}),
-      R"([
-    ["1.00",  "1.00",  1],
-    [null,    null,    1],
-    ["0.00",  "0.00",  2],
-    ["4.00",  "4.00",  null],
-    ["3.00",  "3.00",  1],
-    ["0.00",  "0.00",  2],
-    ["-1.00", "-1.00", 2],
-    ["1.00",  "1.00",  null]
-  ])");
+TEST_P(GroupBy, VarianceStddevSkewKurtosisDecimal) {
+  for (auto value_type :
+       {decimal32(3, 2), decimal64(3, 2), decimal128(3, 2), decimal256(3, 2)}) {
+    ARROW_SCOPED_TRACE("value_type = ", *value_type);
+    auto batch = RecordBatchFromJSON(
+        schema({field("argument", value_type), field("key", int64())}),
+        R"([
+      ["1.00",  1],
+      [null,    1],
+      ["0.00",  2],
+      ["4.00",  null],
+      ["3.00",  1],
+      ["0.00",  2],
+      ["-1.00", 2],
+      ["1.00",  null]
+    ])");
 
-  ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
-                       GroupByTest(
-                           {
-                               batch->GetColumnByName("argument0"),
-                               batch->GetColumnByName("argument0"),
-                               batch->GetColumnByName("argument1"),
-                               batch->GetColumnByName("argument1"),
-                           },
-                           {
-                               batch->GetColumnByName("key"),
-                           },
-                           {},
-                           {
-                               {"hash_variance", nullptr},
-                               {"hash_stddev", nullptr},
-                               {"hash_variance", nullptr},
-                               {"hash_stddev", nullptr},
-                           },
-                           false));
+    ASSERT_OK_AND_ASSIGN(Datum aggregated_and_grouped,
+                         GroupByTest(
+                             {
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                                 batch->GetColumnByName("argument"),
+                             },
+                             {
+                                 batch->GetColumnByName("key"),
+                             },
+                             {},
+                             {
+                                 {"hash_variance", nullptr},
+                                 {"hash_stddev", nullptr},
+                                 {"hash_skew", nullptr},
+                                 {"hash_kurtosis", nullptr},
+                             },
+                             false));
 
-  AssertDatumsApproxEqual(ArrayFromJSON(struct_({
-                                            field("key_0", int64()),
-                                            field("hash_variance", float64()),
-                                            field("hash_stddev", float64()),
-                                            field("hash_variance", float64()),
-                                            field("hash_stddev", float64()),
-                                        }),
-                                        R"([
-    [1,    1.0,                 1.0,                1.0,                 1.0               ],
-    [2,    0.22222222222222224, 0.4714045207910317, 0.22222222222222224, 0.4714045207910317],
-    [null, 2.25,                1.5,                2.25,                1.5               ]
-  ])"),
-                          aggregated_and_grouped,
-                          /*verbose=*/true);
+    auto expected = ArrayFromJSON(struct_({
+                                      field("key_0", int64()),
+                                      field("hash_variance", float64()),
+                                      field("hash_stddev", float64()),
+                                      field("hash_skew", float64()),
+                                      field("hash_kurtosis", float64()),
+                                  }),
+                                  R"([
+    [1,    1.0,                 1.0,                0.0,                 -2.0],
+    [2,    0.22222222222222224, 0.4714045207910317, -0.7071067811865478, -1.5],
+    [null, 2.25,                1.5,                0.0,                 -2.0]
+  ])");
+
+    AssertDatumsApproxEqual(expected, aggregated_and_grouped,
+                            /*verbose=*/true);
+  }
 }
 
 TEST_P(GroupBy, TDigest) {
@@ -1425,7 +1474,7 @@ TEST_P(GroupBy, StddevVarianceTDigestScalar) {
   }
 }
 
-TEST_P(GroupBy, VarianceOptions) {
+TEST_P(GroupBy, VarianceOptionsAndSkewOptions) {
   BatchesWithSchema input;
   input.batches = {
       ExecBatchFromJSON(
@@ -1441,81 +1490,93 @@ TEST_P(GroupBy, VarianceOptions) {
                         "[[null, null, 1]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[2, 2.0, 1], [3, 3.0, 2]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[4, 4.0, 2], [2, 2.0, 4]]"),
-      ExecBatchFromJSON({int32(), float32(), int64()}, "[[null, null, 4]]"),
+      ExecBatchFromJSON({int32(), float32(), int64()}, "[[null, null, 4], [6, 6.0, 3]]"),
   };
   input.schema = schema(
       {field("argument", int32()), field("argument1", float32()), field("key", int64())});
 
-  auto keep_nulls = std::make_shared<VarianceOptions>(/*ddof=*/0, /*skip_nulls=*/false,
-                                                      /*min_count=*/0);
-  auto min_count =
+  auto var_keep_nulls =
+      std::make_shared<VarianceOptions>(/*ddof=*/0, /*skip_nulls=*/false,
+                                        /*min_count=*/0);
+  auto var_min_count =
       std::make_shared<VarianceOptions>(/*ddof=*/0, /*skip_nulls=*/true, /*min_count=*/3);
-  auto keep_nulls_min_count = std::make_shared<VarianceOptions>(
+  auto var_keep_nulls_min_count = std::make_shared<VarianceOptions>(
       /*ddof=*/0, /*skip_nulls=*/false, /*min_count=*/3);
 
-  for (bool use_threads : {false}) {
-    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
-    ASSERT_OK_AND_ASSIGN(
-        Datum actual,
-        RunGroupBy(
-            input, {"key"},
-            {
-                {"hash_stddev", keep_nulls, "argument", "hash_stddev"},
-                {"hash_stddev", min_count, "argument", "hash_stddev"},
-                {"hash_stddev", keep_nulls_min_count, "argument", "hash_stddev"},
-                {"hash_variance", keep_nulls, "argument", "hash_variance"},
-                {"hash_variance", min_count, "argument", "hash_variance"},
-                {"hash_variance", keep_nulls_min_count, "argument", "hash_variance"},
-            },
-            use_threads));
-    Datum expected = ArrayFromJSON(struct_({
-                                       field("key", int64()),
-                                       field("hash_stddev", float64()),
-                                       field("hash_stddev", float64()),
-                                       field("hash_stddev", float64()),
-                                       field("hash_variance", float64()),
-                                       field("hash_variance", float64()),
-                                       field("hash_variance", float64()),
-                                   }),
-                                   R"([
-         [1, null,    0.471405, null,    null,   0.222222, null  ],
-         [2, 1.29904, 1.29904,  1.29904, 1.6875, 1.6875,   1.6875],
-         [3, 0.0,     null,     null,    0.0,    null,     null  ],
-         [4, null,    0.471405, null,    null,   0.222222, null  ]
-       ])");
-    ValidateOutput(expected);
-    AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
+  auto skew_keep_nulls = std::make_shared<SkewOptions>(/*skip_nulls=*/false,
+                                                       /*min_count=*/0);
+  auto skew_min_count =
+      std::make_shared<SkewOptions>(/*skip_nulls=*/true, /*min_count=*/3);
+  auto skew_keep_nulls_min_count = std::make_shared<SkewOptions>(
+      /*skip_nulls=*/false, /*min_count=*/3);
 
-    ASSERT_OK_AND_ASSIGN(
-        actual,
-        RunGroupBy(
-            input, {"key"},
-            {
-                {"hash_stddev", keep_nulls, "argument1", "hash_stddev"},
-                {"hash_stddev", min_count, "argument1", "hash_stddev"},
-                {"hash_stddev", keep_nulls_min_count, "argument1", "hash_stddev"},
-                {"hash_variance", keep_nulls, "argument1", "hash_variance"},
-                {"hash_variance", min_count, "argument1", "hash_variance"},
-                {"hash_variance", keep_nulls_min_count, "argument1", "hash_variance"},
-            },
-            use_threads));
-    expected = ArrayFromJSON(struct_({
-                                 field("key", int64()),
-                                 field("hash_stddev", float64()),
-                                 field("hash_stddev", float64()),
-                                 field("hash_stddev", float64()),
-                                 field("hash_variance", float64()),
-                                 field("hash_variance", float64()),
-                                 field("hash_variance", float64()),
-                             }),
-                             R"([
+  for (std::string value_column : {"argument", "argument1"}) {
+    for (bool use_threads : {false}) {
+      SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+      ASSERT_OK_AND_ASSIGN(
+          Datum actual,
+          RunGroupBy(
+              input, {"key"},
+              {
+                  {"hash_stddev", var_keep_nulls, value_column, "hash_stddev"},
+                  {"hash_stddev", var_min_count, value_column, "hash_stddev"},
+                  {"hash_stddev", var_keep_nulls_min_count, value_column, "hash_stddev"},
+                  {"hash_variance", var_keep_nulls, value_column, "hash_variance"},
+                  {"hash_variance", var_min_count, value_column, "hash_variance"},
+                  {"hash_variance", var_keep_nulls_min_count, value_column,
+                   "hash_variance"},
+              },
+              use_threads));
+      Datum expected = ArrayFromJSON(struct_({
+                                         field("key", int64()),
+                                         field("hash_stddev", float64()),
+                                         field("hash_stddev", float64()),
+                                         field("hash_stddev", float64()),
+                                         field("hash_variance", float64()),
+                                         field("hash_variance", float64()),
+                                         field("hash_variance", float64()),
+                                     }),
+                                     R"([
          [1, null,    0.471405, null,    null,   0.222222, null  ],
          [2, 1.29904, 1.29904,  1.29904, 1.6875, 1.6875,   1.6875],
-         [3, 0.0,     null,     null,    0.0,    null,     null  ],
+         [3, 2.5,     null,     null,    6.25,   null,     null  ],
          [4, null,    0.471405, null,    null,   0.222222, null  ]
-       ])");
-    ValidateOutput(expected);
-    AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
+         ])");
+      ValidateOutput(actual);
+      AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
+
+      ASSERT_OK_AND_ASSIGN(
+          actual,
+          RunGroupBy(
+              input, {"key"},
+              {
+                  {"hash_skew", skew_keep_nulls, value_column, "hash_skew"},
+                  {"hash_skew", skew_min_count, value_column, "hash_skew"},
+                  {"hash_skew", skew_keep_nulls_min_count, value_column, "hash_skew"},
+                  {"hash_kurtosis", skew_keep_nulls, value_column, "hash_kurtosis"},
+                  {"hash_kurtosis", skew_min_count, value_column, "hash_kurtosis"},
+                  {"hash_kurtosis", skew_keep_nulls_min_count, value_column,
+                   "hash_kurtosis"},
+              },
+              use_threads));
+      expected = ArrayFromJSON(struct_({
+                                   field("key", int64()),
+                                   field("hash_skew", float64()),
+                                   field("hash_skew", float64()),
+                                   field("hash_skew", float64()),
+                                   field("hash_kurtosis", float64()),
+                                   field("hash_kurtosis", float64()),
+                                   field("hash_kurtosis", float64()),
+                               }),
+                               R"([
+         [1, null,      0.707106,  null,     null,      -1.5,      null     ],
+         [2, 0.213833,  0.213833,  0.213833, -1.720164, -1.720164, -1.720164],
+         [3, 0.0,       null,      null,     -2.0,       null,     null     ],
+         [4, null,      0.707106,  null,     null,      -1.5,      null     ]
+         ])");
+      ValidateOutput(actual);
+      AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
+    }
   }
 }
 
@@ -4345,6 +4406,566 @@ TEST_P(GroupBy, OnlyKeys) {
   }
 }
 
+TEST_P(GroupBy, PivotBasics) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([
+      [1, "width",  10.5],
+      [2, "width",  11.5]
+      ])",
+                                         R"([
+      [2, "height", 12.5]
+      ])",
+                                         R"([
+      [3, "width",  13.5],
+      [1, "height", 14.5]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"height": 14.5, "width": 10.5} ],
+      [2, {"height": 12.5, "width": 11.5} ],
+      [3, {"height": null, "width": 13.5} ]
+      ])";
+  for (auto unexpected_key_behavior :
+       {PivotWiderOptions::kIgnore, PivotWiderOptions::kRaise}) {
+    PivotWiderOptions options(/*key_names=*/{"height", "width"}, unexpected_key_behavior);
+    TestPivot(key_type, value_type, options, table_json, expected_json);
+  }
+}
+
+TEST_P(GroupBy, PivotAllKeyTypes) {
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([
+      [1, "width", 10.5],
+      [2, "width", 11.5]
+      ])",
+                                         R"([
+      [2, "height", 12.5],
+      [3, "width",  13.5],
+      [1, "height", 14.5]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"height": 14.5, "width": 10.5} ],
+      [2, {"height": 12.5, "width": 11.5} ],
+      [3, {"height": null, "width": 13.5} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"height", "width"});
+
+  for (const auto& key_type : BaseBinaryTypes()) {
+    ARROW_SCOPED_TRACE("key_type = ", *key_type);
+    TestPivot(key_type, value_type, options, table_json, expected_json);
+  }
+}
+
+TEST_P(GroupBy, PivotNumericValues) {
+  auto key_type = utf8();
+  std::vector<std::string> table_json = {R"([
+      [1, "width", 10],
+      [2, "width", 11]
+      ])",
+                                         R"([
+      [2, "height", 12],
+      [3, "width",  13],
+      [1, "height", 14]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"height": 14,   "width": 10} ],
+      [2, {"height": 12,   "width": 11} ],
+      [3, {"height": null, "width": 13} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"height", "width"});
+
+  for (const auto& value_type : NumericTypes()) {
+    ARROW_SCOPED_TRACE("value_type = ", *value_type);
+    TestPivot(key_type, value_type, options, table_json, expected_json);
+  }
+}
+
+TEST_P(GroupBy, PivotBinaryLikeValues) {
+  auto key_type = utf8();
+  std::vector<std::string> table_json = {R"([
+      [1, "name",      "Bob"],
+      [2, "eye_color", "brown"]
+      ])",
+                                         R"([
+      [2, "name",      "Alice"],
+      [1, "eye_color", "gray"],
+      [3, "name",      "Mallaury"]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"name": "Bob",      "eye_color": "gray"} ],
+      [2, {"name": "Alice",    "eye_color": "brown"} ],
+      [3, {"name": "Mallaury", "eye_color": null} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"name", "eye_color"});
+
+  for (const auto& value_type : BaseBinaryTypes()) {
+    ARROW_SCOPED_TRACE("value_type = ", *value_type);
+    TestPivot(key_type, value_type, options, table_json, expected_json);
+  }
+}
+
+TEST_P(GroupBy, PivotDecimalValues) {
+  auto key_type = utf8();
+  auto value_type = decimal128(9, 1);
+  std::vector<std::string> table_json = {R"([
+      [1, "width", "10.1"],
+      [2, "width", "11.1"]
+      ])",
+                                         R"([
+      [2, "height", "12.1"],
+      [3, "width",  "13.1"],
+      [1, "height", "14.1"]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"height": "14.1", "width": "10.1"} ],
+      [2, {"height": "12.1", "width": "11.1"} ],
+      [3, {"height": null,   "width": "13.1"} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"height", "width"});
+  TestPivot(key_type, value_type, options, table_json, expected_json);
+}
+
+TEST_P(GroupBy, PivotStructValues) {
+  auto key_type = utf8();
+  auto value_type = struct_({{"value", float32()}});
+  std::vector<std::string> table_json = {R"([
+      [1, "width", [10.1]],
+      [2, "width", [11.1]]
+      ])",
+                                         R"([
+      [2, "height", [12.1]],
+      [3, "width",  [13.1]],
+      [1, "height", [14.1]]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"height": [14.1], "width": [10.1]} ],
+      [2, {"height": [12.1], "width": [11.1]} ],
+      [3, {"height": null,   "width": [13.1]} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"height", "width"});
+  TestPivot(key_type, value_type, options, table_json, expected_json);
+}
+
+TEST_P(GroupBy, PivotListValues) {
+  auto key_type = utf8();
+  auto value_type = list(float32());
+  std::vector<std::string> table_json = {R"([
+      [1, "foo", [10.5, 11.5]],
+      [2, "bar", [12.5]]
+      ])",
+                                         R"([
+      [2, "foo", []],
+      [3, "bar", [13.5]],
+      [1, "foo", null]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"foo": [10.5, 11.5], "bar": null}   ],
+      [2, {"foo": [],           "bar": [12.5]} ],
+      [3, {"foo": null,         "bar": [13.5]} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"foo", "bar"});
+  TestPivot(key_type, value_type, options, table_json, expected_json);
+}
+
+TEST_P(GroupBy, PivotNullValueType) {
+  auto key_type = utf8();
+  auto value_type = null();
+  std::vector<std::string> table_json = {R"([
+      [1, "foo", null],
+      [2, "bar", null]
+      ])",
+                                         R"([
+      [2, "foo", null],
+      [3, "bar", null],
+      [1, "foo", null]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"foo": null, "bar": null} ],
+      [2, {"foo": null, "bar": null} ],
+      [3, {"foo": null, "bar": null} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"foo", "bar"});
+  TestPivot(key_type, value_type, options, table_json, expected_json);
+}
+
+TEST_P(GroupBy, PivotNullValues) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([
+      [1, "width", 10.5],
+      [2, "width", null]
+      ])",
+                                         R"([
+      [2, "height", 12.5],
+      [2, "width",  13.5],
+      [1, "width",  null],
+      [2, "height", null]
+      ])",
+                                         R"([
+      [1, "width",  null],
+      [2, "height", null]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"height": null, "width": 10.5} ],
+      [2, {"height": 12.5, "width": 13.5} ]
+      ])";
+  PivotWiderOptions options(/*key_names=*/{"height", "width"}, PivotWiderOptions::kRaise);
+  TestPivot(key_type, value_type, options, table_json, expected_json);
+}
+
+TEST_P(GroupBy, PivotScalarKey) {
+  BatchesWithSchema input;
+  std::vector<TypeHolder> types = {int32(), utf8(), float32()};
+  std::vector<ArgShape> shapes = {ArgShape::ARRAY, ArgShape::SCALAR, ArgShape::ARRAY};
+  input.batches = {
+      ExecBatchFromJSON(types, shapes,
+                        R"([
+        [1, "width",  10.5],
+        [2, "width",  11.5]
+        ])"),
+      ExecBatchFromJSON(types, shapes,
+                        R"([
+        [2, "width",  null]
+        ])"),
+      ExecBatchFromJSON(types, shapes,
+                        R"([
+        [3, "height", null],
+        [3, "height", null]
+        ])"),
+      ExecBatchFromJSON(types, shapes,
+                        R"([
+        [3, "height", 12.5],
+        [1, "height", 13.5]
+        ])"),
+  };
+  input.schema = schema({field("group_key", int32()), field("pivot_key", utf8()),
+                         field("pivot_value", float32())});
+  Datum expected = ArrayFromJSON(
+      struct_({field("group_key", int32()),
+               field("pivoted",
+                     struct_({field("height", float32()), field("width", float32())}))}),
+      R"([
+      [1, {"height": 13.5, "width": 10.5} ],
+      [2, {"height": null, "width": 11.5} ],
+      [3, {"height": 12.5, "width": null} ]
+      ])");
+  auto options = std::make_shared<PivotWiderOptions>(
+      PivotWiderOptions(/*key_names=*/{"height", "width"}));
+  Aggregate aggregate{"hash_pivot_wider", options,
+                      std::vector<FieldRef>{"pivot_key", "pivot_value"}, "pivoted"};
+  for (bool use_threads : {false, true}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+    ASSERT_OK_AND_ASSIGN(Datum actual,
+                         RunGroupBy(input, {"group_key"}, {aggregate}, use_threads));
+    ValidateOutput(actual);
+    AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
+  }
+}
+
+TEST_P(GroupBy, PivotUnusedKeyName) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([
+      [1, "width", 10.5],
+      [2, "width", 11.5]
+      ])",
+                                         R"([
+      [2, "height", 12.5],
+      [3, "width",  13.5],
+      [1, "height", 14.5]
+      ])"};
+  std::string expected_json = R"([
+      [1, {"height": 14.5, "depth": null, "width": 10.5} ],
+      [2, {"height": 12.5, "depth": null, "width": 11.5} ],
+      [3, {"height": null, "depth": null, "width": 13.5} ]
+      ])";
+  for (auto unexpected_key_behavior :
+       {PivotWiderOptions::kIgnore, PivotWiderOptions::kRaise}) {
+    PivotWiderOptions options(/*key_names=*/{"height", "depth", "width"},
+                              unexpected_key_behavior);
+    TestPivot(key_type, value_type, options, table_json, expected_json);
+  }
+}
+
+TEST_P(GroupBy, PivotUnexpectedKeyName) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([
+      [1, "width", 10.5],
+      [2, "width", 11.5]
+      ])",
+                                         R"([
+      [2, "height", 12.5],
+      [3, "width",  13.5],
+      [1, "depth",  15.5],
+      [1, "height", 14.5]
+      ])"};
+  PivotWiderOptions options(/*key_names=*/{"height", "width"});
+  std::string expected_json = R"([
+      [1, {"height": 14.5, "width": 10.5} ],
+      [2, {"height": 12.5, "width": 11.5} ],
+      [3, {"height": null, "width": 13.5} ]
+      ])";
+  TestPivot(key_type, value_type, options, table_json, expected_json);
+  options.unexpected_key_behavior = PivotWiderOptions::kRaise;
+  for (bool use_threads : {false, true}) {
+    ARROW_SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        KeyError, HasSubstr("Unexpected pivot key: depth"),
+        RunPivot(key_type, value_type, options, table_json, use_threads));
+  }
+}
+TEST_P(GroupBy, PivotNullKeys) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([
+      [1, "width", 10.5],
+      [2, null,    11.5]
+      ])"};
+  PivotWiderOptions options(/*key_names=*/{"height", "width"});
+  for (bool use_threads : {false, true}) {
+    ARROW_SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        KeyError, HasSubstr("pivot key name cannot be null"),
+        RunPivot(key_type, value_type, options, table_json, use_threads));
+  }
+}
+
+TEST_P(GroupBy, PivotDuplicateKeys) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  std::vector<std::string> table_json = {R"([])"};
+  PivotWiderOptions options(/*key_names=*/{"height", "width", "height"});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      KeyError, HasSubstr("Duplicate key name 'height' in PivotWiderOptions"),
+      RunPivot(key_type, value_type, options, table_json));
+}
+
+TEST_P(GroupBy, PivotDuplicateValues) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  PivotWiderOptions options(/*key_names=*/{"height", "width"});
+
+  for (bool use_threads : {false, true}) {
+    ARROW_SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+
+    // Duplicate values in same chunk
+    std::vector<std::string> table_json = {R"([
+        [1, "width", 10.5],
+        [2, "width", 11.5],
+        [1, "width", 11.5]
+        ])"};
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                    HasSubstr("Encountered more than one non-null value"),
+                                    RunPivot(key_type, value_type, options, table_json));
+
+    // Duplicate values in different chunks
+    table_json = {R"([
+        [1, "width", 10.5],
+        [2, "width", 11.5]
+        ])",
+                  R"([
+        [1, "width", 11.5]
+        ])"};
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                    HasSubstr("Encountered more than one non-null value"),
+                                    RunPivot(key_type, value_type, options, table_json));
+  }
+}
+
+TEST_P(GroupBy, PivotScalarKeyWithDuplicateValues) {
+  BatchesWithSchema input;
+  std::vector<TypeHolder> types = {int32(), utf8(), float32()};
+  std::vector<ArgShape> shapes = {ArgShape::ARRAY, ArgShape::SCALAR, ArgShape::ARRAY};
+  input.schema = schema({field("group_key", int32()), field("pivot_key", utf8()),
+                         field("pivot_value", float32())});
+  auto options = std::make_shared<PivotWiderOptions>(
+      PivotWiderOptions(/*key_names=*/{"height", "width"}));
+  Aggregate aggregate{"hash_pivot_wider", options,
+                      std::vector<FieldRef>{"pivot_key", "pivot_value"}, "pivoted"};
+
+  // Duplicate values in same chunk
+  input.batches = {
+      ExecBatchFromJSON(types, shapes,
+                        R"([
+        [1, "width",  10.5],
+        [1, "width",  11.5]
+        ])"),
+  };
+  for (bool use_threads : {false, true}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, HasSubstr("Encountered more than one non-null value"),
+        RunGroupBy(input, {"group_key"}, {aggregate}, use_threads));
+  }
+
+  // Duplicate values in different chunks
+  input.batches = {
+      ExecBatchFromJSON(types, shapes,
+                        R"([
+        [1, "width",  10.5],
+        [2, "width",  11.5]
+        ])"),
+      ExecBatchFromJSON(types, shapes,
+                        R"([
+        [2, "width",  12.5]
+        ])"),
+  };
+  for (bool use_threads : {false, true}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, HasSubstr("Encountered more than one non-null value"),
+        RunGroupBy(input, {"group_key"}, {aggregate}, use_threads));
+  }
+}
+
+struct RandomPivotTestCase {
+  PivotWiderOptions options;
+  std::shared_ptr<RecordBatch> input;
+  std::shared_ptr<Array> expected_output;
+};
+
+Result<RandomPivotTestCase> MakeRandomPivot(int64_t length) {
+  constexpr double kKeyPresenceProbability = 0.8;
+  constexpr double kValueValidityProbability = 0.7;
+
+  const std::vector<std::string> key_names = {"height", "width", "depth"};
+  std::default_random_engine gen(42);
+  std::uniform_real_distribution<float> value_dist(0.0f, 1.0f);
+  std::bernoulli_distribution key_presence_dist(kKeyPresenceProbability);
+  std::bernoulli_distribution value_validity_dist(kValueValidityProbability);
+
+  Int64Builder group_key_builder;
+  StringBuilder key_builder;
+  FloatBuilder value_builder;
+  RETURN_NOT_OK(group_key_builder.Reserve(length));
+  RETURN_NOT_OK(key_builder.Reserve(length));
+  RETURN_NOT_OK(value_builder.Reserve(length));
+
+  // The last input key name will not be part of the result
+  PivotWiderOptions options(
+      std::vector<std::string>(key_names.begin(), key_names.end() - 1));
+  Int64Builder pivoted_group_builder;
+  std::vector<FloatBuilder> pivoted_value_builders(options.key_names.size());
+
+  auto finish_group = [&](int64_t group_key) -> Status {
+    // First check if *any* pivoted column was populated (otherwise there was
+    // no valid value at all in this group, and no output row should be generated).
+    RETURN_NOT_OK(pivoted_group_builder.Append(group_key));
+    // Make sure all pivoted columns are populated and in sync with the group key column
+    for (auto& pivoted_value_builder : pivoted_value_builders) {
+      if (pivoted_value_builder.length() < pivoted_group_builder.length()) {
+        RETURN_NOT_OK(pivoted_value_builder.AppendNull());
+      }
+      EXPECT_EQ(pivoted_value_builder.length(), pivoted_group_builder.length());
+    }
+    return Status::OK();
+  };
+
+  int64_t group_key = 1000;
+  bool group_started = false;
+  int key_id = 0;
+  while (group_key_builder.length() < length) {
+    // For the current group_key and key_id we can either:
+    // 1. not add a row
+    // 2. add a row with a null value
+    // 3. add a row with a non-null value
+    //    3a. the row will end up in the pivoted data iff the key is part of
+    //        the PivotWiderOptions.key_names
+    if (key_presence_dist(gen)) {
+      group_key_builder.UnsafeAppend(group_key);
+      group_started = true;
+      RETURN_NOT_OK(key_builder.Append(key_names[key_id]));
+      if (value_validity_dist(gen)) {
+        const auto value = value_dist(gen);
+        value_builder.UnsafeAppend(value);
+        if (key_id < static_cast<int>(pivoted_value_builders.size())) {
+          RETURN_NOT_OK(pivoted_value_builders[key_id].Append(value));
+        }
+      } else {
+        value_builder.UnsafeAppendNull();
+      }
+    }
+    if (++key_id >= static_cast<int>(key_names.size())) {
+      // We've considered all keys for this group.
+      // Emit a pivoted row only if any key was emitted in the input.
+      if (group_started) {
+        RETURN_NOT_OK(finish_group(group_key));
+      }
+      // Initiate new group
+      ++group_key;
+      group_started = false;
+      key_id = 0;
+    }
+  }
+  if (group_started) {
+    // We've started this group, finish it
+    RETURN_NOT_OK(finish_group(group_key));
+  }
+  ARROW_ASSIGN_OR_RAISE(auto group_keys, group_key_builder.Finish());
+  ARROW_ASSIGN_OR_RAISE(auto keys, key_builder.Finish());
+  ARROW_ASSIGN_OR_RAISE(auto values, value_builder.Finish());
+  auto input_schema =
+      schema({{"group_key", int64()}, {"key", utf8()}, {"value", float32()}});
+  auto input = RecordBatch::Make(input_schema, length, {group_keys, keys, values});
+  RETURN_NOT_OK(input->Validate());
+
+  ARROW_ASSIGN_OR_RAISE(auto pivoted_groups, pivoted_group_builder.Finish());
+  ArrayVector pivoted_value_columns;
+  for (auto& pivoted_value_builder : pivoted_value_builders) {
+    ARROW_ASSIGN_OR_RAISE(pivoted_value_columns.emplace_back(),
+                          pivoted_value_builder.Finish());
+  }
+  ARROW_ASSIGN_OR_RAISE(
+      auto pivoted_values,
+      StructArray::Make(std::move(pivoted_value_columns), options.key_names));
+  ARROW_ASSIGN_OR_RAISE(auto output,
+                        StructArray::Make({pivoted_groups, pivoted_values},
+                                          std::vector<std::string>{"key_0", "out"}));
+  RETURN_NOT_OK(output->Validate());
+
+  return RandomPivotTestCase{std::move(options), std::move(input), std::move(output)};
+}
+
+TEST_P(GroupBy, PivotRandom) {
+  constexpr int64_t kLength = 900;
+  // Larger than 256 to exercise take-index dispatch in pivot implementation
+  constexpr int64_t kChunkLength = 300;
+  ASSERT_OK_AND_ASSIGN(auto pivot_case, MakeRandomPivot(kLength));
+
+  for (bool shuffle : {false, true}) {
+    ARROW_SCOPED_TRACE("shuffle = ", shuffle);
+    auto input = Datum(pivot_case.input);
+    if (shuffle) {
+      // Since the "value" column is random-generated, sorting on it produces
+      // a random shuffle.
+      ASSERT_OK_AND_ASSIGN(
+          auto shuffle_indices,
+          SortIndices(pivot_case.input, SortOptions({SortKey("value")})));
+      ASSERT_OK_AND_ASSIGN(input, Take(input, shuffle_indices));
+    }
+    ASSERT_EQ(input.kind(), Datum::RECORD_BATCH);
+    RecordBatchVector chunks;
+    for (int64_t start = 0; start < kLength; start += kChunkLength) {
+      const auto chunk_length = std::min(kLength - start, kChunkLength);
+      chunks.push_back(input.record_batch()->Slice(start, chunk_length));
+    }
+    ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatches(chunks));
+
+    for (bool use_threads : {false, true}) {
+      ARROW_SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+      ASSERT_OK_AND_ASSIGN(auto pivoted, RunPivot(utf8(), float32(), pivot_case.options,
+                                                  table, use_threads));
+      // XXX For some reason this works even in the shuffled case
+      // (I would expect the test to require sorting of the output).
+      // This might depend on implementation details of group id generation
+      // by the hash-aggregate logic (the pivot implementation implicitly
+      // orders the output by ascending group id).
+      AssertDatumsEqual(pivot_case.expected_output, pivoted, /*verbose=*/true);
+    }
+  }
+}
+
 INSTANTIATE_TEST_SUITE_P(GroupBy, GroupBy, ::testing::Values(RunGroupByImpl));
 
 class SegmentedScalarGroupBy : public GroupBy {};
@@ -4620,6 +5241,101 @@ TEST_P(SegmentedKeyGroupBy, MultiSegmentKeyCombined) {
   TestMultiSegmentKey(GetParam(), GetMultiSegmentInputAsCombined);
 }
 
+TEST_P(SegmentedKeyGroupBy, PivotSegmentKey) {
+  auto group_by = GetParam();
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  std::vector<std::string> table_json = {R"([
+      [1, "width",  10.5],
+      [1, "height", 11.5]
+      ])",
+                                         R"([
+      [2, "height", 12.5],
+      [2, "width",  13.5],
+      [3, "width",  14.5]
+      ])",
+                                         R"([
+      [3, "width",  null],
+      [4, "height", 15.5]
+      ])"};
+  std::vector<std::string> expected_json = {
+      R"([[1, {"height": 11.5, "width": 10.5}]])",
+      R"([[2, {"height": 12.5, "width": 13.5}]])",
+      R"([[3, {"height": null, "width": 14.5}]])",
+      R"([[4, {"height": 15.5, "width": null}]])",
+  };
+
+  auto table =
+      TableFromJSON(schema({field("segment_key", int64()), field("pivot_key", key_type),
+                            field("pivot_value", value_type)}),
+                    table_json);
+
+  auto options = std::make_shared<PivotWiderOptions>(
+      PivotWiderOptions(/*key_names=*/{"height", "width"}));
+  Aggregate aggregate{"pivot_wider", options, std::vector<FieldRef>{"agg_0", "agg_1"},
+                      "pivoted"};
+  ASSERT_OK_AND_ASSIGN(Datum actual,
+                       group_by(
+                           {
+                               table->GetColumnByName("pivot_key"),
+                               table->GetColumnByName("pivot_value"),
+                           },
+                           {}, {table->GetColumnByName("segment_key")}, {aggregate},
+                           /*use_threads=*/false, /*naive=*/false));
+  ValidateOutput(actual);
+  auto expected = ChunkedArrayFromJSON(
+      struct_({field("key_0", int64()),
+               field("pivoted", struct_({field("height", value_type),
+                                         field("width", value_type)}))}),
+      expected_json);
+  AssertDatumsEqual(expected, actual, /*verbose=*/true);
+}
+
+TEST_P(SegmentedKeyGroupBy, PivotSegmentKeyDuplicateValues) {
+  // NOTE: besides testing "pivot_wider" behavior, this test also checks that errors
+  // produced when consuming or merging an aggregate don't corrupt
+  // execution engine internals.
+  auto group_by = GetParam();
+  auto key_type = utf8();
+  auto value_type = float32();
+  auto options = std::make_shared<PivotWiderOptions>(
+      PivotWiderOptions(/*key_names=*/{"height", "width"}));
+  auto table_schema = schema({field("segment_key", int64()), field("pivot_key", key_type),
+                              field("pivot_value", value_type)});
+
+  auto test_duplicate_values = [&](const std::vector<std::string>& table_json) {
+    auto table = TableFromJSON(table_schema, table_json);
+    Aggregate aggregate{"pivot_wider", options, std::vector<FieldRef>{"agg_0", "agg_1"},
+                        "pivoted"};
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        HasSubstr("Encountered more than one non-null value for the same pivot key"),
+        group_by(
+            {
+                table->GetColumnByName("pivot_key"),
+                table->GetColumnByName("pivot_value"),
+            },
+            {}, {table->GetColumnByName("segment_key")}, {aggregate},
+            /*use_threads=*/false, /*naive=*/false));
+  };
+
+  // Duplicate values in the same chunk
+  test_duplicate_values({R"([
+      [1, "width",  10.5],
+      [2, "width",  11.5],
+      [2, "width",  12.5]
+      ])"});
+  // Duplicate values in two different chunks
+  test_duplicate_values({R"([
+      [1, "width",  10.5],
+      [2, "width",  11.5]
+      ])",
+                         R"([
+      [2, "width",  12.5]
+      ])"});
+}
+
 INSTANTIATE_TEST_SUITE_P(SegmentedScalarGroupBy, SegmentedScalarGroupBy,
                          ::testing::Values(RunSegmentedGroupByImpl));
 
diff --git a/cpp/src/arrow/acero/hash_join.h b/cpp/src/arrow/acero/hash_join.h
index a81ff274e5e3a..c0faacf04baf0 100644
--- a/cpp/src/arrow/acero/hash_join.h
+++ b/cpp/src/arrow/acero/hash_join.h
@@ -37,7 +37,7 @@ namespace acero {
 
 using util::AccumulationQueue;
 
-class HashJoinImpl {
+class ARROW_ACERO_EXPORT HashJoinImpl {
  public:
   using OutputBatchCallback = std::function<Status(int64_t, ExecBatch)>;
   using BuildFinishedCallback = std::function<Status(size_t)>;
diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc
index 0a56194f2a3c8..c01e8a58933f6 100644
--- a/cpp/src/arrow/acero/hash_join_benchmark.cc
+++ b/cpp/src/arrow/acero/hash_join_benchmark.cc
@@ -32,8 +32,6 @@
 #include <cstdio>
 #include <memory>
 
-#include <omp.h>
-
 namespace arrow {
 namespace acero {
 struct BenchmarkSettings {
@@ -56,6 +54,8 @@ struct BenchmarkSettings {
   int var_length_max = 20;   // Maximum length of any var length types
 
   Expression residual_filter = literal(true);
+
+  bool stats_probe_rows = true;
 };
 
 class JoinBenchmark {
@@ -128,6 +128,7 @@ class JoinBenchmark {
     for (ExecBatch& batch : r_batches_with_schema.batches)
       r_batches_.InsertBatch(std::move(batch));
 
+    stats_.num_build_rows = settings.num_build_batches * settings.batch_size;
     stats_.num_probe_rows = settings.num_probe_batches * settings.batch_size;
 
     schema_mgr_ = std::make_unique<HashJoinSchema>();
@@ -141,14 +142,9 @@ class JoinBenchmark {
       join_ = *HashJoinImpl::MakeSwiss();
     }
 
-    omp_set_num_threads(settings.num_threads);
-    auto schedule_callback = [](std::function<Status(size_t)> func) -> Status {
-#pragma omp task
-      { DCHECK_OK(func(omp_get_thread_num())); }
-      return Status::OK();
-    };
-
     scheduler_ = TaskScheduler::Make();
+    thread_pool_ = arrow::internal::GetCpuThreadPool();
+    DCHECK_OK(thread_pool_->SetCapacity(settings.num_threads));
     DCHECK_OK(ctx_.Init(nullptr));
 
     auto register_task_group_callback = [&](std::function<Status(size_t, int64_t)> task,
@@ -157,7 +153,7 @@ class JoinBenchmark {
     };
 
     auto start_task_group_callback = [&](int task_group_id, int64_t num_tasks) {
-      return scheduler_->StartTaskGroup(omp_get_thread_num(), task_group_id, num_tasks);
+      return scheduler_->StartTaskGroup(/*thread_id=*/0, task_group_id, num_tasks);
     };
 
     DCHECK_OK(join_->Init(
@@ -165,7 +161,7 @@ class JoinBenchmark {
         &(schema_mgr_->proj_maps[1]), std::move(key_cmp), settings.residual_filter,
         std::move(register_task_group_callback), std::move(start_task_group_callback),
         [](int64_t, ExecBatch) { return Status::OK(); },
-        [](int64_t) { return Status::OK(); }));
+        [&](int64_t) { return Status::OK(); }));
 
     task_group_probe_ = scheduler_->RegisterTaskGroup(
         [this](size_t thread_index, int64_t task_id) -> Status {
@@ -178,25 +174,27 @@ class JoinBenchmark {
     scheduler_->RegisterEnd();
 
     DCHECK_OK(scheduler_->StartScheduling(
-        0 /*thread index*/, std::move(schedule_callback),
-        static_cast<int>(2 * settings.num_threads) /*concurrent tasks*/,
-        settings.num_threads == 1));
+        /*thread_id=*/0,
+        [&](std::function<Status(size_t)> task) -> Status {
+          return thread_pool_->Spawn([&, task]() { DCHECK_OK(task(thread_indexer_())); });
+        },
+        thread_pool_->GetCapacity(), settings.num_threads == 1));
   }
 
   void RunJoin() {
-#pragma omp parallel
-    {
-      int tid = omp_get_thread_num();
-#pragma omp single
-      DCHECK_OK(
-          join_->BuildHashTable(tid, std::move(r_batches_), [this](size_t thread_index) {
-            return scheduler_->StartTaskGroup(thread_index, task_group_probe_,
-                                              l_batches_.batch_count());
-          }));
-    }
+    DCHECK_OK(join_->BuildHashTable(
+        /*thread_id=*/0, std::move(r_batches_), [this](size_t thread_index) {
+          return scheduler_->StartTaskGroup(thread_index, task_group_probe_,
+                                            l_batches_.batch_count());
+        }));
+
+    thread_pool_->WaitForIdle();
   }
 
   std::unique_ptr<TaskScheduler> scheduler_;
+  ThreadIndexer thread_indexer_;
+  arrow::internal::ThreadPool* thread_pool_;
+
   AccumulationQueue l_batches_;
   AccumulationQueue r_batches_;
   std::unique_ptr<HashJoinSchema> schema_mgr_;
@@ -205,6 +203,7 @@ class JoinBenchmark {
   int task_group_probe_;
 
   struct {
+    uint64_t num_build_rows;
     uint64_t num_probe_rows;
   } stats_;
 };
@@ -219,11 +218,13 @@ static void HashJoinBasicBenchmarkImpl(benchmark::State& st,
       st.ResumeTiming();
       bm.RunJoin();
       st.PauseTiming();
-      total_rows += bm.stats_.num_probe_rows;
+      total_rows += (settings.stats_probe_rows ? bm.stats_.num_probe_rows
+                                               : bm.stats_.num_build_rows);
     }
     st.ResumeTiming();
   }
-  st.counters["rows/sec"] = benchmark::Counter(total_rows, benchmark::Counter::kIsRate);
+  st.counters["rows/sec"] =
+      benchmark::Counter(static_cast<double>(total_rows), benchmark::Counter::kIsRate);
 }
 
 template <typename... Args>
@@ -302,6 +303,7 @@ static void BM_HashJoinBasic_BuildParallelism(benchmark::State& st) {
   settings.num_threads = static_cast<int>(st.range(0));
   settings.num_build_batches = static_cast<int>(st.range(1));
   settings.num_probe_batches = settings.num_threads;
+  settings.stats_probe_rows = false;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
diff --git a/cpp/src/arrow/acero/options.h b/cpp/src/arrow/acero/options.h
index 2beacfe26baa1..26293725582b1 100644
--- a/cpp/src/arrow/acero/options.h
+++ b/cpp/src/arrow/acero/options.h
@@ -29,10 +29,8 @@
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/expression.h"
-#include "arrow/record_batch.h"
 #include "arrow/result.h"
-#include "arrow/util/async_generator.h"
-#include "arrow/util/async_util.h"
+#include "arrow/util/future.h"
 
 namespace arrow {
 
@@ -55,7 +53,7 @@ namespace acero {
 /// \brief This must not be used in release-mode
 struct DebugOptions;
 
-using AsyncExecBatchGenerator = AsyncGenerator<std::optional<ExecBatch>>;
+using AsyncExecBatchGenerator = std::function<Future<std::optional<ExecBatch>>()>;
 
 /// \addtogroup acero-nodes
 /// @{
@@ -103,8 +101,8 @@ class ARROW_ACERO_EXPORT SourceNodeOptions : public ExecNodeOptions {
   std::shared_ptr<Schema> output_schema;
   /// \brief an asynchronous stream of batches ending with std::nullopt
   std::function<Future<std::optional<ExecBatch>>()> generator;
-
-  Ordering ordering = Ordering::Unordered();
+  /// \brief the order of the data, defaults to Ordering::Unordered
+  Ordering ordering;
 };
 
 /// \brief a node that generates data from a table already loaded in memory
diff --git a/cpp/src/arrow/acero/order_by_node_test.cc b/cpp/src/arrow/acero/order_by_node_test.cc
index d77b0f3184f1a..37e6862ed0f52 100644
--- a/cpp/src/arrow/acero/order_by_node_test.cc
+++ b/cpp/src/arrow/acero/order_by_node_test.cc
@@ -42,8 +42,7 @@ static constexpr int kRowsPerBatch = 4;
 static constexpr int kNumBatches = 32;
 
 std::shared_ptr<Table> TestTable() {
-  return gen::Gen({{"up", gen::Step()},
-                   {"down", gen::Step(/*start=*/0, /*step=*/-1, /*signed_int=*/true)}})
+  return gen::Gen({{"up", gen::Step()}, {"down", gen::Step(/*start=*/0, /*step=*/-1)}})
       ->FailOnError()
       ->Table(kRowsPerBatch, kNumBatches);
 }
diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc b/cpp/src/arrow/acero/sorted_merge_node.cc
index c49aca17fb20a..125eb6e3ed0f9 100644
--- a/cpp/src/arrow/acero/sorted_merge_node.cc
+++ b/cpp/src/arrow/acero/sorted_merge_node.cc
@@ -586,7 +586,7 @@ class SortedMergeNode : public ExecNode {
   void EmitBatches() {
     while (true) {
       // Implementation note: If the queue is empty, we will block here
-      if (process_queue.Pop() == kPoisonPill) {
+      if (process_queue.WaitAndPop() == kPoisonPill) {
         EndFromProcessThread();
       }
       // Either we're out of data or something went wrong
diff --git a/cpp/src/arrow/acero/sorted_merge_node_test.cc b/cpp/src/arrow/acero/sorted_merge_node_test.cc
index 55446d631d90c..82b630420c4ae 100644
--- a/cpp/src/arrow/acero/sorted_merge_node_test.cc
+++ b/cpp/src/arrow/acero/sorted_merge_node_test.cc
@@ -36,8 +36,7 @@ namespace arrow::acero {
 
 std::shared_ptr<Table> TestTable(int start, int step, int rows_per_batch,
                                  int num_batches) {
-  return gen::Gen({{"timestamp", gen::Step(start, step, /*signed_int=*/true)},
-                   {"str", gen::Random(utf8())}})
+  return gen::Gen({{"timestamp", gen::Step(start, step)}, {"str", gen::Random(utf8())}})
       ->FailOnError()
       ->Table(rows_per_batch, num_batches);
 }
diff --git a/cpp/src/arrow/acero/source_node.cc b/cpp/src/arrow/acero/source_node.cc
index ac34e4b6a09fc..2d3e2a1da1735 100644
--- a/cpp/src/arrow/acero/source_node.cc
+++ b/cpp/src/arrow/acero/source_node.cc
@@ -407,7 +407,7 @@ struct SchemaSourceNode : public SourceNode {
 struct RecordBatchReaderSourceNode : public SourceNode {
   RecordBatchReaderSourceNode(ExecPlan* plan, std::shared_ptr<Schema> schema,
                               arrow::AsyncGenerator<std::optional<ExecBatch>> generator)
-      : SourceNode(plan, schema, generator, Ordering::Implicit()) {}
+      : SourceNode(plan, schema, generator) {}
 
   static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
                                 const ExecNodeOptions& options) {
diff --git a/cpp/src/arrow/acero/source_node_test.cc b/cpp/src/arrow/acero/source_node_test.cc
index 132dc05e6fd64..79ff5852815c5 100644
--- a/cpp/src/arrow/acero/source_node_test.cc
+++ b/cpp/src/arrow/acero/source_node_test.cc
@@ -21,6 +21,7 @@
 #include "arrow/acero/map_node.h"
 #include "arrow/acero/options.h"
 #include "arrow/acero/test_nodes.h"
+#include "arrow/record_batch.h"
 
 namespace arrow {
 namespace acero {
diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc
index 85e14ac469ce7..b4d89df290214 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -643,37 +643,38 @@ void SwissTableMerge::MergePartition(SwissTable* target, const SwissTable* sourc
   //
   int source_group_id_bits =
       SwissTable::num_groupid_bits_from_log_blocks(source->log_blocks());
-  uint64_t source_group_id_mask = ~0ULL >> (64 - source_group_id_bits);
-  int64_t source_block_bytes = source_group_id_bits + 8;
+  int source_block_bytes =
+      SwissTable::num_block_bytes_from_num_groupid_bits(source_group_id_bits);
+  uint32_t source_group_id_mask =
+      SwissTable::group_id_mask_from_num_groupid_bits(source_group_id_bits);
   ARROW_DCHECK(source_block_bytes % sizeof(uint64_t) == 0);
 
   // Compute index of the last block in target that corresponds to the given
   // partition.
   //
   ARROW_DCHECK(num_partition_bits <= target->log_blocks());
-  int64_t target_max_block_id =
+  uint32_t target_max_block_id =
       ((partition_id + 1) << (target->log_blocks() - num_partition_bits)) - 1;
 
   overflow_group_ids->clear();
   overflow_hashes->clear();
 
   // For each source block...
-  int64_t source_blocks = 1LL << source->log_blocks();
-  for (int64_t block_id = 0; block_id < source_blocks; ++block_id) {
-    uint8_t* block_bytes = source->blocks() + block_id * source_block_bytes;
+  uint32_t source_blocks = 1 << source->log_blocks();
+  for (uint32_t block_id = 0; block_id < source_blocks; ++block_id) {
+    const uint8_t* block_bytes = source->block_data(block_id, source_block_bytes);
     uint64_t block = *reinterpret_cast<const uint64_t*>(block_bytes);
 
     // For each non-empty source slot...
     constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL;
-    constexpr int kSlotsPerBlock = 8;
-    int num_full_slots =
-        kSlotsPerBlock - static_cast<int>(ARROW_POPCOUNT64(block & kHighBitOfEachByte));
+    int num_full_slots = SwissTable::kSlotsPerBlock -
+                         static_cast<int>(ARROW_POPCOUNT64(block & kHighBitOfEachByte));
     for (int local_slot_id = 0; local_slot_id < num_full_slots; ++local_slot_id) {
       // Read group id and hash for this slot.
       //
-      uint64_t group_id =
-          source->extract_group_id(block_bytes, local_slot_id, source_group_id_mask);
-      int64_t global_slot_id = block_id * kSlotsPerBlock + local_slot_id;
+      uint32_t group_id = SwissTable::extract_group_id(
+          block_bytes, local_slot_id, source_group_id_bits, source_group_id_mask);
+      uint32_t global_slot_id = SwissTable::global_slot_id(block_id, local_slot_id);
       uint32_t hash = source->hashes()[global_slot_id];
       // Insert partition id into the highest bits of hash, shifting the
       // remaining hash bits right.
@@ -696,17 +697,18 @@ void SwissTableMerge::MergePartition(SwissTable* target, const SwissTable* sourc
   }
 }
 
-inline bool SwissTableMerge::InsertNewGroup(SwissTable* target, uint64_t group_id,
-                                            uint32_t hash, int64_t max_block_id) {
+inline bool SwissTableMerge::InsertNewGroup(SwissTable* target, uint32_t group_id,
+                                            uint32_t hash, uint32_t max_block_id) {
   // Load the first block to visit for this hash
   //
-  int64_t block_id = hash >> (SwissTable::bits_hash_ - target->log_blocks());
-  int64_t block_id_mask = ((1LL << target->log_blocks()) - 1);
+  uint32_t block_id = SwissTable::block_id_from_hash(hash, target->log_blocks());
+  uint32_t block_id_mask = (1 << target->log_blocks()) - 1;
   int num_group_id_bits =
       SwissTable::num_groupid_bits_from_log_blocks(target->log_blocks());
-  int64_t num_block_bytes = num_group_id_bits + sizeof(uint64_t);
+  int num_block_bytes =
+      SwissTable::num_block_bytes_from_num_groupid_bits(num_group_id_bits);
   ARROW_DCHECK(num_block_bytes % sizeof(uint64_t) == 0);
-  uint8_t* block_bytes = target->blocks() + block_id * num_block_bytes;
+  const uint8_t* block_bytes = target->block_data(block_id, num_block_bytes);
   uint64_t block = *reinterpret_cast<const uint64_t*>(block_bytes);
 
   // Search for the first block with empty slots.
@@ -715,25 +717,23 @@ inline bool SwissTableMerge::InsertNewGroup(SwissTable* target, uint64_t group_i
   constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL;
   while ((block & kHighBitOfEachByte) == 0 && block_id < max_block_id) {
     block_id = (block_id + 1) & block_id_mask;
-    block_bytes = target->blocks() + block_id * num_block_bytes;
+    block_bytes = target->block_data(block_id, num_block_bytes);
     block = *reinterpret_cast<const uint64_t*>(block_bytes);
   }
   if ((block & kHighBitOfEachByte) == 0) {
     return false;
   }
-  constexpr int kSlotsPerBlock = 8;
-  int local_slot_id =
-      kSlotsPerBlock - static_cast<int>(ARROW_POPCOUNT64(block & kHighBitOfEachByte));
-  int64_t global_slot_id = block_id * kSlotsPerBlock + local_slot_id;
-  target->insert_into_empty_slot(static_cast<uint32_t>(global_slot_id), hash,
-                                 static_cast<uint32_t>(group_id));
+  int local_slot_id = SwissTable::kSlotsPerBlock -
+                      static_cast<int>(ARROW_POPCOUNT64(block & kHighBitOfEachByte));
+  uint32_t global_slot_id = SwissTable::global_slot_id(block_id, local_slot_id);
+  target->insert_into_empty_slot(global_slot_id, hash, group_id);
   return true;
 }
 
 void SwissTableMerge::InsertNewGroups(SwissTable* target,
                                       const std::vector<uint32_t>& group_ids,
                                       const std::vector<uint32_t>& hashes) {
-  int64_t num_blocks = 1LL << target->log_blocks();
+  uint32_t num_blocks = 1 << target->log_blocks();
   for (size_t i = 0; i < group_ids.size(); ++i) {
     std::ignore = InsertNewGroup(target, group_ids[i], hashes[i], num_blocks);
   }
@@ -1102,7 +1102,8 @@ uint32_t SwissTableForJoin::payload_id_to_key_id(uint32_t payload_id) const {
 }
 
 Status SwissTableForJoinBuild::Init(SwissTableForJoin* target, int dop, int64_t num_rows,
-                                    bool reject_duplicate_keys, bool no_payload,
+                                    int64_t num_batches, bool reject_duplicate_keys,
+                                    bool no_payload,
                                     const std::vector<KeyColumnMetadata>& key_types,
                                     const std::vector<KeyColumnMetadata>& payload_types,
                                     MemoryPool* pool, int64_t hardware_flags) {
@@ -1112,7 +1113,7 @@ Status SwissTableForJoinBuild::Init(SwissTableForJoin* target, int dop, int64_t
 
   // Make sure that we do not use many partitions if there are not enough rows.
   //
-  constexpr int64_t min_num_rows_per_prtn = 1 << 18;
+  constexpr int64_t min_num_rows_per_prtn = 1 << 12;
   log_num_prtns_ =
       std::min(bit_util::Log2(dop_),
                bit_util::Log2(bit_util::CeilDiv(num_rows, min_num_rows_per_prtn)));
@@ -1123,9 +1124,9 @@ Status SwissTableForJoinBuild::Init(SwissTableForJoin* target, int dop, int64_t
   pool_ = pool;
   hardware_flags_ = hardware_flags;
 
+  batch_states_.resize(num_batches);
   prtn_states_.resize(num_prtns_);
   thread_states_.resize(dop_);
-  prtn_locks_.Init(dop_, num_prtns_);
 
   RowTableMetadata key_row_metadata;
   key_row_metadata.FromColumnMetadataVector(key_types,
@@ -1154,91 +1155,74 @@ Status SwissTableForJoinBuild::Init(SwissTableForJoin* target, int dop, int64_t
   return Status::OK();
 }
 
-Status SwissTableForJoinBuild::PushNextBatch(int64_t thread_id,
-                                             const ExecBatch& key_batch,
-                                             const ExecBatch* payload_batch_maybe_null,
-                                             arrow::util::TempVectorStack* temp_stack) {
-  ARROW_DCHECK(thread_id < dop_);
+Status SwissTableForJoinBuild::PartitionBatch(size_t thread_id, int64_t batch_id,
+                                              const ExecBatch& key_batch,
+                                              arrow::util::TempVectorStack* temp_stack) {
+  DCHECK_LT(thread_id, thread_states_.size());
+  DCHECK_LT(batch_id, static_cast<int64_t>(batch_states_.size()));
   ThreadState& locals = thread_states_[thread_id];
+  BatchState& batch_state = batch_states_[batch_id];
+  uint16_t num_rows = static_cast<uint16_t>(key_batch.length);
 
   // Compute hash
   //
-  locals.batch_hashes.resize(key_batch.length);
-  RETURN_NOT_OK(Hashing32::HashBatch(
-      key_batch, locals.batch_hashes.data(), locals.temp_column_arrays, hardware_flags_,
-      temp_stack, /*start_row=*/0, static_cast<int>(key_batch.length)));
+  batch_state.hashes.resize(num_rows);
+  RETURN_NOT_OK(Hashing32::HashBatch(key_batch, batch_state.hashes.data(),
+                                     locals.temp_column_arrays, hardware_flags_,
+                                     temp_stack, /*start_row=*/0, num_rows));
 
   // Partition on hash
   //
-  locals.batch_prtn_row_ids.resize(locals.batch_hashes.size());
-  locals.batch_prtn_ranges.resize(num_prtns_ + 1);
-  int num_rows = static_cast<int>(locals.batch_hashes.size());
+  batch_state.prtn_ranges.resize(num_prtns_ + 1);
+  batch_state.prtn_row_ids.resize(num_rows);
   if (num_prtns_ == 1) {
     // We treat single partition case separately to avoid extra checks in row
     // partitioning implementation for general case.
     //
-    locals.batch_prtn_ranges[0] = 0;
-    locals.batch_prtn_ranges[1] = num_rows;
-    for (int i = 0; i < num_rows; ++i) {
-      locals.batch_prtn_row_ids[i] = i;
+    batch_state.prtn_ranges[0] = 0;
+    batch_state.prtn_ranges[1] = num_rows;
+    for (uint16_t i = 0; i < num_rows; ++i) {
+      batch_state.prtn_row_ids[i] = i;
     }
   } else {
     PartitionSort::Eval(
-        static_cast<int>(locals.batch_hashes.size()), num_prtns_,
-        locals.batch_prtn_ranges.data(),
-        [this, &locals](int64_t i) {
+        num_rows, num_prtns_, batch_state.prtn_ranges.data(),
+        [this, &batch_state](int64_t i) {
           // SwissTable uses the highest bits of the hash for block index.
           // We want each partition to correspond to a range of block indices,
           // so we also partition on the highest bits of the hash.
           //
-          return locals.batch_hashes[i] >> (31 - log_num_prtns_) >> 1;
+          return batch_state.hashes[i] >> (SwissTable::bits_hash_ - log_num_prtns_);
         },
-        [&locals](int64_t i, int pos) {
-          locals.batch_prtn_row_ids[pos] = static_cast<uint16_t>(i);
+        [&batch_state](int64_t i, int pos) {
+          batch_state.prtn_row_ids[pos] = static_cast<uint16_t>(i);
         });
-  }
 
-  // Update hashes, shifting left to get rid of the bits that were already used
-  // for partitioning.
-  //
-  for (size_t i = 0; i < locals.batch_hashes.size(); ++i) {
-    locals.batch_hashes[i] <<= log_num_prtns_;
+    // Update hashes, shifting left to get rid of the bits that were already used
+    // for partitioning.
+    //
+    for (size_t i = 0; i < batch_state.hashes.size(); ++i) {
+      batch_state.hashes[i] <<= log_num_prtns_;
+    }
   }
 
-  // For each partition:
-  // - map keys to unique integers using (this partition's) hash table
-  // - append payloads (if present) to (this partition's) row array
-  //
-  locals.temp_prtn_ids.resize(num_prtns_);
-
-  RETURN_NOT_OK(prtn_locks_.ForEachPartition(
-      thread_id, locals.temp_prtn_ids.data(),
-      /*is_prtn_empty_fn=*/
-      [&](int prtn_id) {
-        return locals.batch_prtn_ranges[prtn_id + 1] == locals.batch_prtn_ranges[prtn_id];
-      },
-      /*process_prtn_fn=*/
-      [&](int prtn_id) {
-        return ProcessPartition(thread_id, key_batch, payload_batch_maybe_null,
-                                temp_stack, prtn_id);
-      }));
-
   return Status::OK();
 }
 
-Status SwissTableForJoinBuild::ProcessPartition(int64_t thread_id,
-                                                const ExecBatch& key_batch,
-                                                const ExecBatch* payload_batch_maybe_null,
-                                                arrow::util::TempVectorStack* temp_stack,
-                                                int prtn_id) {
-  ARROW_DCHECK(thread_id < dop_);
+Status SwissTableForJoinBuild::ProcessPartition(
+    size_t thread_id, int64_t batch_id, int prtn_id, const ExecBatch& key_batch,
+    const ExecBatch* payload_batch_maybe_null, arrow::util::TempVectorStack* temp_stack) {
+  DCHECK_LT(thread_id, thread_states_.size());
+  DCHECK_LT(batch_id, static_cast<int64_t>(batch_states_.size()));
+  DCHECK_LT(static_cast<size_t>(prtn_id), prtn_states_.size());
   ThreadState& locals = thread_states_[thread_id];
+  BatchState& batch_state = batch_states_[batch_id];
+  PartitionState& prtn_state = prtn_states_[prtn_id];
 
   int num_rows_new =
-      locals.batch_prtn_ranges[prtn_id + 1] - locals.batch_prtn_ranges[prtn_id];
+      batch_state.prtn_ranges[prtn_id + 1] - batch_state.prtn_ranges[prtn_id];
   const uint16_t* row_ids =
-      locals.batch_prtn_row_ids.data() + locals.batch_prtn_ranges[prtn_id];
-  PartitionState& prtn_state = prtn_states_[prtn_id];
+      batch_state.prtn_row_ids.data() + batch_state.prtn_ranges[prtn_id];
   size_t num_rows_before = prtn_state.key_ids.size();
   // Insert new keys into hash table associated with the current partition
   // and map existing keys to integer ids.
@@ -1247,7 +1231,7 @@ Status SwissTableForJoinBuild::ProcessPartition(int64_t thread_id,
   SwissTableWithKeys::Input input(&key_batch, num_rows_new, row_ids, temp_stack,
                                   &locals.temp_column_arrays, &locals.temp_group_ids);
   RETURN_NOT_OK(prtn_state.keys.MapWithInserts(
-      &input, locals.batch_hashes.data(), prtn_state.key_ids.data() + num_rows_before));
+      &input, batch_state.hashes.data(), prtn_state.key_ids.data() + num_rows_before));
   // Append input batch rows from current partition to an array of payload
   // rows for this partition.
   //
@@ -2504,6 +2488,13 @@ class SwissJoin : public HashJoinImpl {
   }
 
   void InitTaskGroups() {
+    task_group_partition_ = register_task_group_callback_(
+        [this](size_t thread_index, int64_t task_id) -> Status {
+          return PartitionTask(thread_index, task_id);
+        },
+        [this](size_t thread_index) -> Status {
+          return PartitionFinished(thread_index);
+        });
     task_group_build_ = register_task_group_callback_(
         [this](size_t thread_index, int64_t task_id) -> Status {
           return BuildTask(thread_index, task_id);
@@ -2590,18 +2581,19 @@ class SwissJoin : public HashJoinImpl {
           ColumnMetadataFromDataType(schema->data_type(HashJoinProjection::PAYLOAD, i)));
       payload_types.push_back(metadata);
     }
-    RETURN_NOT_OK(CancelIfNotOK(hash_table_build_.Init(
+    hash_table_build_ = std::make_unique<SwissTableForJoinBuild>();
+    RETURN_NOT_OK(CancelIfNotOK(hash_table_build_->Init(
         &hash_table_, num_threads_, build_side_batches_.row_count(),
-        reject_duplicate_keys, no_payload, key_types, payload_types, pool_,
-        hardware_flags_)));
+        build_side_batches_.batch_count(), reject_duplicate_keys, no_payload, key_types,
+        payload_types, pool_, hardware_flags_)));
 
     // Process all input batches
     //
-    return CancelIfNotOK(
-        start_task_group_callback_(task_group_build_, build_side_batches_.batch_count()));
+    return CancelIfNotOK(start_task_group_callback_(task_group_partition_,
+                                                    build_side_batches_.batch_count()));
   }
 
-  Status BuildTask(size_t thread_id, int64_t batch_id) {
+  Status PartitionTask(size_t thread_id, int64_t batch_id) {
     if (IsCancelled()) {
       return Status::OK();
     }
@@ -2609,39 +2601,78 @@ class SwissJoin : public HashJoinImpl {
     DCHECK_GT(build_side_batches_[batch_id].length, 0);
 
     const HashJoinProjectionMaps* schema = schema_[1];
-    bool no_payload = hash_table_build_.no_payload();
-
     ExecBatch input_batch;
     ARROW_ASSIGN_OR_RAISE(
         input_batch, KeyPayloadFromInput(/*side=*/1, &build_side_batches_[batch_id]));
 
-    // Split batch into key batch and optional payload batch
-    //
-    // Input batch is key-payload batch (key columns followed by payload
-    // columns). We split it into two separate batches.
-    //
-    // TODO: Change SwissTableForJoinBuild interface to use key-payload
-    // batch instead to avoid this operation, which involves increasing
-    // shared pointer ref counts.
-    //
     ExecBatch key_batch({}, input_batch.length);
     key_batch.values.resize(schema->num_cols(HashJoinProjection::KEY));
     for (size_t icol = 0; icol < key_batch.values.size(); ++icol) {
       key_batch.values[icol] = input_batch.values[icol];
     }
-    ExecBatch payload_batch({}, input_batch.length);
+    arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack;
 
+    DCHECK_NE(hash_table_build_, nullptr);
+    return hash_table_build_->PartitionBatch(static_cast<int64_t>(thread_id), batch_id,
+                                             key_batch, temp_stack);
+  }
+
+  Status PartitionFinished(size_t thread_id) {
+    RETURN_NOT_OK(status());
+
+    DCHECK_NE(hash_table_build_, nullptr);
+    return CancelIfNotOK(
+        start_task_group_callback_(task_group_build_, hash_table_build_->num_prtns()));
+  }
+
+  Status BuildTask(size_t thread_id, int64_t prtn_id) {
+    if (IsCancelled()) {
+      return Status::OK();
+    }
+
+    const HashJoinProjectionMaps* schema = schema_[1];
+    DCHECK_NE(hash_table_build_, nullptr);
+    bool no_payload = hash_table_build_->no_payload();
+    ExecBatch key_batch, payload_batch;
+    auto num_keys = schema->num_cols(HashJoinProjection::KEY);
+    auto num_payloads = schema->num_cols(HashJoinProjection::PAYLOAD);
+    key_batch.values.resize(num_keys);
     if (!no_payload) {
-      payload_batch.values.resize(schema->num_cols(HashJoinProjection::PAYLOAD));
-      for (size_t icol = 0; icol < payload_batch.values.size(); ++icol) {
-        payload_batch.values[icol] =
-            input_batch.values[schema->num_cols(HashJoinProjection::KEY) + icol];
-      }
+      payload_batch.values.resize(num_payloads);
     }
     arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack;
-    RETURN_NOT_OK(CancelIfNotOK(hash_table_build_.PushNextBatch(
-        static_cast<int64_t>(thread_id), key_batch, no_payload ? nullptr : &payload_batch,
-        temp_stack)));
+
+    for (int64_t batch_id = 0;
+         batch_id < static_cast<int64_t>(build_side_batches_.batch_count()); ++batch_id) {
+      ExecBatch input_batch;
+      ARROW_ASSIGN_OR_RAISE(
+          input_batch, KeyPayloadFromInput(/*side=*/1, &build_side_batches_[batch_id]));
+
+      // Split batch into key batch and optional payload batch
+      //
+      // Input batch is key-payload batch (key columns followed by payload
+      // columns). We split it into two separate batches.
+      //
+      // TODO: Change SwissTableForJoinBuild interface to use key-payload
+      // batch instead to avoid this operation, which involves increasing
+      // shared pointer ref counts.
+      //
+      key_batch.length = input_batch.length;
+      for (size_t icol = 0; icol < key_batch.values.size(); ++icol) {
+        key_batch.values[icol] = input_batch.values[icol];
+      }
+
+      if (!no_payload) {
+        payload_batch.length = input_batch.length;
+        for (size_t icol = 0; icol < payload_batch.values.size(); ++icol) {
+          payload_batch.values[icol] = input_batch.values[num_keys + icol];
+        }
+      }
+
+      RETURN_NOT_OK(CancelIfNotOK(hash_table_build_->ProcessPartition(
+          thread_id, batch_id, static_cast<int>(prtn_id), key_batch,
+          no_payload ? nullptr : &payload_batch, temp_stack)));
+    }
 
     return Status::OK();
   }
@@ -2654,23 +2685,26 @@ class SwissJoin : public HashJoinImpl {
     // On a single thread prepare for merging partitions of the resulting hash
     // table.
     //
-    RETURN_NOT_OK(CancelIfNotOK(hash_table_build_.PreparePrtnMerge()));
+    DCHECK_NE(hash_table_build_, nullptr);
+    RETURN_NOT_OK(CancelIfNotOK(hash_table_build_->PreparePrtnMerge()));
     return CancelIfNotOK(
-        start_task_group_callback_(task_group_merge_, hash_table_build_.num_prtns()));
+        start_task_group_callback_(task_group_merge_, hash_table_build_->num_prtns()));
   }
 
   Status MergeTask(size_t /*thread_id*/, int64_t prtn_id) {
     if (IsCancelled()) {
       return Status::OK();
     }
-    hash_table_build_.PrtnMerge(static_cast<int>(prtn_id));
+    DCHECK_NE(hash_table_build_, nullptr);
+    hash_table_build_->PrtnMerge(static_cast<int>(prtn_id));
     return Status::OK();
   }
 
   Status MergeFinished(size_t thread_id) {
     RETURN_NOT_OK(status());
     arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack;
-    hash_table_build_.FinishPrtnMerge(temp_stack);
+    DCHECK_NE(hash_table_build_, nullptr);
+    hash_table_build_->FinishPrtnMerge(temp_stack);
     return CancelIfNotOK(OnBuildHashTableFinished(static_cast<int64_t>(thread_id)));
   }
 
@@ -2679,6 +2713,9 @@ class SwissJoin : public HashJoinImpl {
       return status();
     }
 
+    DCHECK_NE(hash_table_build_, nullptr);
+    hash_table_build_.reset();
+
     for (int i = 0; i < num_threads_; ++i) {
       local_states_[i].materialize.SetBuildSide(hash_table_.keys()->keys(),
                                                 hash_table_.payloads(),
@@ -2888,6 +2925,7 @@ class SwissJoin : public HashJoinImpl {
   const HashJoinProjectionMaps* schema_[2];
 
   // Task scheduling
+  int task_group_partition_;
   int task_group_build_;
   int task_group_merge_;
   int task_group_scan_;
@@ -2910,7 +2948,8 @@ class SwissJoin : public HashJoinImpl {
   SwissTableForJoin hash_table_;
   JoinProbeProcessor probe_processor_;
   JoinResidualFilter residual_filter_;
-  SwissTableForJoinBuild hash_table_build_;
+  // Temporarily used during build phase, and released afterward.
+  std::unique_ptr<SwissTableForJoinBuild> hash_table_build_;
   AccumulationQueue build_side_batches_;
 
   // Atomic state flags.
diff --git a/cpp/src/arrow/acero/swiss_join_internal.h b/cpp/src/arrow/acero/swiss_join_internal.h
index 85f443b0323c7..365f2917d8eff 100644
--- a/cpp/src/arrow/acero/swiss_join_internal.h
+++ b/cpp/src/arrow/acero/swiss_join_internal.h
@@ -175,7 +175,7 @@ class RowArrayAccessor {
 // Read operations (row comparison, column decoding)
 // can be called by multiple threads concurrently.
 //
-struct RowArray {
+struct ARROW_ACERO_EXPORT RowArray {
   RowArray() : is_initialized_(false), hardware_flags_(0) {}
 
   Status InitIfNeeded(MemoryPool* pool, int64_t hardware_flags, const ExecBatch& batch);
@@ -380,8 +380,8 @@ class SwissTableMerge {
   // Max block id value greater or equal to the number of blocks guarantees that
   // the search will not be stopped.
   //
-  static inline bool InsertNewGroup(SwissTable* target, uint64_t group_id, uint32_t hash,
-                                    int64_t max_block_id);
+  static inline bool InsertNewGroup(SwissTable* target, uint32_t group_id, uint32_t hash,
+                                    uint32_t max_block_id);
 };
 
 struct SwissTableWithKeys {
@@ -523,19 +523,27 @@ class SwissTableForJoin {
 //
 class SwissTableForJoinBuild {
  public:
-  Status Init(SwissTableForJoin* target, int dop, int64_t num_rows,
+  Status Init(SwissTableForJoin* target, int dop, int64_t num_rows, int64_t num_batches,
               bool reject_duplicate_keys, bool no_payload,
               const std::vector<KeyColumnMetadata>& key_types,
               const std::vector<KeyColumnMetadata>& payload_types, MemoryPool* pool,
               int64_t hardware_flags);
 
-  // In the first phase of parallel hash table build, threads pick unprocessed
-  // exec batches, partition the rows based on hash, and update all of the
-  // partitions with information related to that batch of rows.
+  // In the first phase of parallel hash table build, each thread picks unprocessed exec
+  // batches, hashes the batches and preserve the hashes, then partition the rows based on
+  // hashes.
   //
-  Status PushNextBatch(int64_t thread_id, const ExecBatch& key_batch,
-                       const ExecBatch* payload_batch_maybe_null,
-                       arrow::util::TempVectorStack* temp_stack);
+  Status PartitionBatch(size_t thread_id, int64_t batch_id, const ExecBatch& key_batch,
+                        arrow::util::TempVectorStack* temp_stack);
+
+  // In the second phase of parallel hash table build, each thread picks the given
+  // partition of all batches, and updates that particular partition with information
+  // related to that batch of rows.
+  //
+  Status ProcessPartition(size_t thread_id, int64_t batch_id, int prtn_id,
+                          const ExecBatch& key_batch,
+                          const ExecBatch* payload_batch_maybe_null,
+                          arrow::util::TempVectorStack* temp_stack);
 
   // Allocate memory and initialize counters required for parallel merging of
   // hash table partitions.
@@ -543,7 +551,7 @@ class SwissTableForJoinBuild {
   //
   Status PreparePrtnMerge();
 
-  // Second phase of parallel hash table build.
+  // Third phase of parallel hash table build.
   // Each partition can be processed by a different thread.
   // Parallel step.
   //
@@ -564,9 +572,6 @@ class SwissTableForJoinBuild {
 
  private:
   void InitRowArray();
-  Status ProcessPartition(int64_t thread_id, const ExecBatch& key_batch,
-                          const ExecBatch* payload_batch_maybe_null,
-                          arrow::util::TempVectorStack* temp_stack, int prtn_id);
 
   SwissTableForJoin* target_;
   // DOP stands for Degree Of Parallelism - the maximum number of participating
@@ -604,6 +609,22 @@ class SwissTableForJoinBuild {
   MemoryPool* pool_;
   int64_t hardware_flags_;
 
+  // One per batch.
+  //
+  // Informations like hashes and partitions of each batch gathered in the partition phase
+  // and used in the build phase.
+  //
+  struct BatchState {
+    // Hashes for the batch, preserved in the partition phase to avoid recomputation in
+    // the build phase. One element per row in the batch.
+    std::vector<uint32_t> hashes;
+    // Accumulative number of rows in each partition for the batch. `num_prtns_` + 1
+    // elements.
+    std::vector<uint16_t> prtn_ranges;
+    // Row ids after partition sorting the batch. One element per row in the batch.
+    std::vector<uint16_t> prtn_row_ids;
+  };
+
   // One per partition.
   //
   struct PartitionState {
@@ -620,17 +641,13 @@ class SwissTableForJoinBuild {
   // batches.
   //
   struct ThreadState {
-    std::vector<uint32_t> batch_hashes;
-    std::vector<uint16_t> batch_prtn_ranges;
-    std::vector<uint16_t> batch_prtn_row_ids;
-    std::vector<int> temp_prtn_ids;
     std::vector<uint32_t> temp_group_ids;
     std::vector<KeyColumnArray> temp_column_arrays;
   };
 
+  std::vector<BatchState> batch_states_;
   std::vector<PartitionState> prtn_states_;
   std::vector<ThreadState> thread_states_;
-  PartitionLocks prtn_locks_;
 
   std::vector<int64_t> partition_keys_first_row_id_;
   std::vector<int64_t> partition_payloads_first_row_id_;
diff --git a/cpp/src/arrow/acero/task_util.cc b/cpp/src/arrow/acero/task_util.cc
index 85378eaeeb27c..082ec99946e9f 100644
--- a/cpp/src/arrow/acero/task_util.cc
+++ b/cpp/src/arrow/acero/task_util.cc
@@ -91,11 +91,11 @@ class TaskSchedulerImpl : public TaskScheduler {
   AbortContinuationImpl abort_cont_impl_;
 
   std::vector<TaskGroup> task_groups_;
-  bool aborted_;
   bool register_finished_;
   std::mutex mutex_;  // Mutex protecting task_groups_ (state_ and num_tasks_present_
-                      // fields), aborted_ flag and register_finished_ flag
+                      // fields) and register_finished_ flag
 
+  AtomicWithPadding<bool> aborted_;
   AtomicWithPadding<int> num_tasks_to_schedule_;
   // If a task group adds tasks it's possible for a thread inside
   // ScheduleMore to miss this fact.  This serves as a flag to
@@ -105,10 +105,8 @@ class TaskSchedulerImpl : public TaskScheduler {
 };
 
 TaskSchedulerImpl::TaskSchedulerImpl()
-    : use_sync_execution_(false),
-      num_concurrent_tasks_(0),
-      aborted_(false),
-      register_finished_(false) {
+    : use_sync_execution_(false), num_concurrent_tasks_(0), register_finished_(false) {
+  aborted_.value.store(false);
   num_tasks_to_schedule_.value.store(0);
   tasks_added_recently_.value.store(false);
 }
@@ -131,13 +129,11 @@ Status TaskSchedulerImpl::StartTaskGroup(size_t thread_id, int group_id,
   ARROW_DCHECK(group_id >= 0 && group_id < static_cast<int>(task_groups_.size()));
   TaskGroup& task_group = task_groups_[group_id];
 
-  bool aborted = false;
+  bool aborted = aborted_.value.load();
   bool all_tasks_finished = false;
   {
     std::lock_guard<std::mutex> lock(mutex_);
 
-    aborted = aborted_;
-
     if (task_group.state_ == TaskGroupState::NOT_READY) {
       task_group.num_tasks_present_ = total_num_tasks;
       if (total_num_tasks == 0) {
@@ -212,7 +208,7 @@ std::vector<std::pair<int, int64_t>> TaskSchedulerImpl::PickTasks(int num_tasks,
 
 Status TaskSchedulerImpl::ExecuteTask(size_t thread_id, int group_id, int64_t task_id,
                                       bool* task_group_finished) {
-  if (!aborted_) {
+  if (!aborted_.value.load()) {
     RETURN_NOT_OK(task_groups_[group_id].task_impl_(thread_id, task_id));
   }
   *task_group_finished = PostExecuteTask(thread_id, group_id);
@@ -228,11 +224,10 @@ bool TaskSchedulerImpl::PostExecuteTask(size_t thread_id, int group_id) {
 
 Status TaskSchedulerImpl::OnTaskGroupFinished(size_t thread_id, int group_id,
                                               bool* all_task_groups_finished) {
-  bool aborted = false;
+  bool aborted = aborted_.value.load();
   {
     std::lock_guard<std::mutex> lock(mutex_);
 
-    aborted = aborted_;
     TaskGroup& task_group = task_groups_[group_id];
     task_group.state_ = TaskGroupState::ALL_TASKS_FINISHED;
     *all_task_groups_finished = true;
@@ -260,7 +255,7 @@ Status TaskSchedulerImpl::ExecuteMore(size_t thread_id, int num_tasks_to_execute
 
   int last_id = 0;
   for (;;) {
-    if (aborted_) {
+    if (aborted_.value.load()) {
       return Status::Cancelled("Scheduler cancelled");
     }
 
@@ -278,8 +273,8 @@ Status TaskSchedulerImpl::ExecuteMore(size_t thread_id, int num_tasks_to_execute
       bool task_group_finished = false;
       Status status = ExecuteTask(thread_id, group_id, task_id, &task_group_finished);
       if (!status.ok()) {
-        // Mark the remaining picked tasks as finished
-        for (size_t j = i + 1; j < tasks.size(); ++j) {
+        // Mark the current and remaining picked tasks as finished
+        for (size_t j = i; j < tasks.size(); ++j) {
           if (PostExecuteTask(thread_id, tasks[j].first)) {
             bool all_task_groups_finished = false;
             RETURN_NOT_OK(
@@ -328,7 +323,7 @@ Status TaskSchedulerImpl::StartScheduling(size_t thread_id, ScheduleImpl schedul
 }
 
 Status TaskSchedulerImpl::ScheduleMore(size_t thread_id, int num_tasks_finished) {
-  if (aborted_) {
+  if (aborted_.value.load()) {
     return Status::Cancelled("Scheduler cancelled");
   }
 
@@ -369,17 +364,25 @@ Status TaskSchedulerImpl::ScheduleMore(size_t thread_id, int num_tasks_finished)
     int group_id = tasks[i].first;
     int64_t task_id = tasks[i].second;
     RETURN_NOT_OK(schedule_impl_([this, group_id, task_id](size_t thread_id) -> Status {
-      RETURN_NOT_OK(ScheduleMore(thread_id, 1));
-
       bool task_group_finished = false;
-      RETURN_NOT_OK(ExecuteTask(thread_id, group_id, task_id, &task_group_finished));
+      // PostExecuteTask must be called later if any error ocurres during task execution
+      // (including ScheduleMore), so we preserve the status.
+      auto status = [&]() {
+        RETURN_NOT_OK(ScheduleMore(thread_id, 1));
+        return ExecuteTask(thread_id, group_id, task_id, &task_group_finished);
+      }();
+
+      if (!status.ok()) {
+        task_group_finished = PostExecuteTask(thread_id, group_id);
+      }
 
       if (task_group_finished) {
         bool all_task_groups_finished = false;
-        return OnTaskGroupFinished(thread_id, group_id, &all_task_groups_finished);
+        RETURN_NOT_OK(
+            OnTaskGroupFinished(thread_id, group_id, &all_task_groups_finished));
       }
 
-      return Status::OK();
+      return status;
     }));
   }
 
@@ -388,31 +391,43 @@ Status TaskSchedulerImpl::ScheduleMore(size_t thread_id, int num_tasks_finished)
 
 void TaskSchedulerImpl::Abort(AbortContinuationImpl impl) {
   bool all_finished = true;
+  DCHECK_EQ(aborted_.value.load(), false);
+  aborted_.value.store(true);
   {
     std::lock_guard<std::mutex> lock(mutex_);
-    aborted_ = true;
     abort_cont_impl_ = std::move(impl);
     if (register_finished_) {
       for (size_t i = 0; i < task_groups_.size(); ++i) {
         TaskGroup& task_group = task_groups_[i];
-        if (task_group.state_ == TaskGroupState::NOT_READY) {
-          task_group.state_ = TaskGroupState::ALL_TASKS_FINISHED;
-        } else if (task_group.state_ == TaskGroupState::READY) {
-          int64_t expected = task_group.num_tasks_started_.value.load();
-          for (;;) {
-            if (task_group.num_tasks_started_.value.compare_exchange_strong(
-                    expected, task_group.num_tasks_present_)) {
-              break;
+        switch (task_group.state_) {
+          case TaskGroupState::NOT_READY: {
+            task_group.state_ = TaskGroupState::ALL_TASKS_FINISHED;
+            break;
+          }
+          case TaskGroupState::READY: {
+            int64_t expected = task_group.num_tasks_started_.value.load();
+            for (;;) {
+              if (task_group.num_tasks_started_.value.compare_exchange_strong(
+                      expected, task_group.num_tasks_present_)) {
+                break;
+              }
             }
+            int64_t before_add = task_group.num_tasks_finished_.value.fetch_add(
+                task_group.num_tasks_present_ - expected);
+            if (before_add >= expected) {
+              task_group.state_ = TaskGroupState::ALL_TASKS_FINISHED;
+            } else {
+              all_finished = false;
+              task_group.state_ = TaskGroupState::ALL_TASKS_STARTED;
+            }
+            break;
           }
-          int64_t before_add = task_group.num_tasks_finished_.value.fetch_add(
-              task_group.num_tasks_present_ - expected);
-          if (before_add >= expected) {
-            task_group.state_ = TaskGroupState::ALL_TASKS_FINISHED;
-          } else {
+          case TaskGroupState::ALL_TASKS_STARTED: {
             all_finished = false;
-            task_group.state_ = TaskGroupState::ALL_TASKS_STARTED;
+            break;
           }
+          default:
+            break;
         }
       }
     }
diff --git a/cpp/src/arrow/acero/task_util_test.cc b/cpp/src/arrow/acero/task_util_test.cc
index d5196ad4e0a03..30f80012e5c40 100644
--- a/cpp/src/arrow/acero/task_util_test.cc
+++ b/cpp/src/arrow/acero/task_util_test.cc
@@ -231,5 +231,97 @@ TEST(TaskScheduler, StressTwo) {
   }
 }
 
+TEST(TaskScheduler, AbortContOnTaskErrorSerial) {
+  constexpr int kNumTasks = 16;
+
+  auto scheduler = TaskScheduler::Make();
+  auto task = [&](std::size_t, int64_t task_id) {
+    if (task_id == kNumTasks / 2) {
+      return Status::Invalid("Task failed");
+    }
+    return Status::OK();
+  };
+
+  int task_group =
+      scheduler->RegisterTaskGroup(task, [](std::size_t) { return Status::OK(); });
+  scheduler->RegisterEnd();
+
+  ASSERT_OK(scheduler->StartScheduling(
+      /*thread_id=*/0,
+      /*schedule_impl=*/
+      [](TaskScheduler::TaskGroupContinuationImpl) { return Status::OK(); },
+      /*num_concurrent_tasks=*/1, /*use_sync_execution=*/true));
+  ASSERT_RAISES_WITH_MESSAGE(
+      Invalid, "Invalid: Task failed",
+      scheduler->StartTaskGroup(/*thread_id=*/0, task_group, kNumTasks));
+
+  int num_abort_cont_calls = 0;
+  auto abort_cont = [&]() { ++num_abort_cont_calls; };
+
+  scheduler->Abort(abort_cont);
+
+  ASSERT_EQ(num_abort_cont_calls, 1);
+}
+
+TEST(TaskScheduler, AbortContOnTaskErrorParallel) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+  constexpr int kNumThreads = 16;
+
+  ThreadIndexer thread_indexer;
+  int num_threads = std::min(static_cast<int>(thread_indexer.Capacity()), kNumThreads);
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<ThreadPool> thread_pool,
+                       MakePrimedThreadPool(num_threads));
+  TaskScheduler::ScheduleImpl schedule =
+      [&](TaskScheduler::TaskGroupContinuationImpl task) {
+        return thread_pool->Spawn([&, task] {
+          std::size_t thread_id = thread_indexer();
+          auto status = task(thread_id);
+          ASSERT_TRUE(status.ok() || status.IsInvalid() || status.IsCancelled())
+              << status;
+        });
+      };
+
+  for (int num_tasks :
+       {2, num_threads - 1, num_threads, num_threads + 1, 2 * num_threads}) {
+    ARROW_SCOPED_TRACE("num_tasks = ", num_tasks);
+    for (int num_concurrent_tasks :
+         {1, num_tasks - 1, num_tasks, num_tasks + 1, 2 * num_tasks}) {
+      ARROW_SCOPED_TRACE("num_concurrent_tasks = ", num_concurrent_tasks);
+      for (int aborting_task_id = 0; aborting_task_id < num_tasks; ++aborting_task_id) {
+        ARROW_SCOPED_TRACE("aborting_task_id = ", aborting_task_id);
+        auto scheduler = TaskScheduler::Make();
+
+        int num_abort_cont_calls = 0;
+        auto abort_cont = [&]() { ++num_abort_cont_calls; };
+
+        auto task = [&](std::size_t, int64_t task_id) {
+          if (task_id == aborting_task_id) {
+            scheduler->Abort(abort_cont);
+          }
+          if (task_id % 2 == 0) {
+            return Status::Invalid("Task failed");
+          }
+          return Status::OK();
+        };
+
+        int task_group =
+            scheduler->RegisterTaskGroup(task, [](std::size_t) { return Status::OK(); });
+        scheduler->RegisterEnd();
+
+        ASSERT_OK(scheduler->StartScheduling(/*thread_id=*/0, schedule,
+                                             num_concurrent_tasks,
+                                             /*use_sync_execution=*/false));
+        ASSERT_OK(scheduler->StartTaskGroup(/*thread_id=*/0, task_group, num_tasks));
+
+        thread_pool->WaitForIdle();
+
+        ASSERT_EQ(num_abort_cont_calls, 1);
+      }
+    }
+  }
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/util_test.cc b/cpp/src/arrow/acero/util_test.cc
index a291075a0a9a7..8273bae237eea 100644
--- a/cpp/src/arrow/acero/util_test.cc
+++ b/cpp/src/arrow/acero/util_test.cc
@@ -15,12 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <future>
+#include "arrow/acero/concurrent_queue_internal.h"
 #include "arrow/acero/hash_join_node.h"
 #include "arrow/acero/schema_util.h"
 #include "arrow/testing/extension_type.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
-
 using testing::Eq;
 
 namespace arrow {
@@ -184,5 +185,120 @@ TEST(FieldMap, ExtensionTypeHashJoin) {
   EXPECT_EQ(i.get(0), 0);
 }
 
+template <typename Queue>
+void ConcurrentQueueBasicTest(Queue& queue) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading enabled";
+#endif
+  ASSERT_TRUE(queue.Empty());
+  queue.Push(1);
+  ASSERT_FALSE(queue.Empty());
+  ASSERT_EQ(queue.TryPop(), std::make_optional(1));
+  ASSERT_TRUE(queue.Empty());
+
+  auto fut_pop = std::async(std::launch::async, [&]() { return queue.WaitAndPop(); });
+  ASSERT_EQ(fut_pop.wait_for(std::chrono::milliseconds(10)), std::future_status::timeout);
+  queue.Push(2);
+  queue.Push(3);
+  queue.Push(4);
+  ASSERT_EQ(fut_pop.wait_for(std::chrono::milliseconds(10)), std::future_status::ready);
+  ASSERT_EQ(fut_pop.get(), 2);
+  fut_pop = std::async(std::launch::async, [&]() { return queue.WaitAndPop(); });
+  ASSERT_EQ(fut_pop.wait_for(std::chrono::milliseconds(10)), std::future_status::ready);
+  ASSERT_EQ(fut_pop.get(), 3);
+  ASSERT_FALSE(queue.Empty());
+  ASSERT_EQ(queue.TryPop(), std::make_optional(4));
+  ASSERT_EQ(queue.TryPop(), std::nullopt);
+  queue.Push(5);
+  ASSERT_FALSE(queue.Empty());
+  ASSERT_EQ(queue.Front(), 5);
+  ASSERT_FALSE(queue.Empty());
+  queue.Clear();
+  ASSERT_TRUE(queue.Empty());
+}
+
+TEST(ConcurrentQueue, BasicTest) {
+  ConcurrentQueue<int> queue;
+  ConcurrentQueueBasicTest(queue);
+}
+
+class BackpressureTestExecNode : public ExecNode {
+ public:
+  BackpressureTestExecNode() : ExecNode(nullptr, {}, {}, nullptr) {}
+  const char* kind_name() const override { return "BackpressureTestNode"; }
+  Status InputReceived(ExecNode* input, ExecBatch batch) override {
+    return Status::NotImplemented("Test only node");
+  }
+  Status InputFinished(ExecNode* input, int total_batches) override {
+    return Status::NotImplemented("Test only node");
+  }
+  Status StartProducing() override { return Status::NotImplemented("Test only node"); }
+
+ protected:
+  Status StopProducingImpl() override {
+    stopped = true;
+    return Status::OK();
+  }
+
+ public:
+  void PauseProducing(ExecNode* output, int32_t counter) override { paused = true; }
+  void ResumeProducing(ExecNode* output, int32_t counter) override { paused = false; }
+  bool paused{false};
+  bool stopped{false};
+};
+
+class TestBackpressureControl : public BackpressureControl {
+ public:
+  explicit TestBackpressureControl(BackpressureTestExecNode* test_node)
+      : test_node(test_node) {}
+  virtual void Pause() { test_node->PauseProducing(nullptr, 0); }
+  virtual void Resume() { test_node->ResumeProducing(nullptr, 0); }
+  BackpressureTestExecNode* test_node;
+};
+
+TEST(BackpressureConcurrentQueue, BasicTest) {
+  BackpressureTestExecNode dummy_node;
+  auto ctrl = std::make_unique<TestBackpressureControl>(&dummy_node);
+  ASSERT_OK_AND_ASSIGN(auto handler,
+                       BackpressureHandler::Make(&dummy_node, 2, 4, std::move(ctrl)));
+  BackpressureConcurrentQueue<int> queue(std::move(handler));
+
+  ConcurrentQueueBasicTest(queue);
+  ASSERT_FALSE(dummy_node.paused);
+  ASSERT_FALSE(dummy_node.stopped);
+}
+
+TEST(BackpressureConcurrentQueue, BackpressureTest) {
+  BackpressureTestExecNode dummy_node;
+  auto ctrl = std::make_unique<TestBackpressureControl>(&dummy_node);
+  ASSERT_OK_AND_ASSIGN(auto handler,
+                       BackpressureHandler::Make(&dummy_node, 2, 4, std::move(ctrl)));
+  BackpressureConcurrentQueue<int> queue(std::move(handler));
+
+  queue.Push(6);
+  queue.Push(7);
+  queue.Push(8);
+  ASSERT_FALSE(dummy_node.paused);
+  ASSERT_FALSE(dummy_node.stopped);
+  queue.Push(9);
+  ASSERT_TRUE(dummy_node.paused);
+  ASSERT_FALSE(dummy_node.stopped);
+  ASSERT_EQ(queue.TryPop(), std::make_optional(6));
+  ASSERT_TRUE(dummy_node.paused);
+  ASSERT_FALSE(dummy_node.stopped);
+  ASSERT_EQ(queue.TryPop(), std::make_optional(7));
+  ASSERT_FALSE(dummy_node.paused);
+  ASSERT_FALSE(dummy_node.stopped);
+  queue.Push(10);
+  ASSERT_FALSE(dummy_node.paused);
+  ASSERT_FALSE(dummy_node.stopped);
+  queue.Push(11);
+  ASSERT_TRUE(dummy_node.paused);
+  ASSERT_FALSE(dummy_node.stopped);
+  ASSERT_OK(queue.ForceShutdown());
+  ASSERT_FALSE(dummy_node.paused);
+  ASSERT_TRUE(dummy_node.stopped);
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 21faa3f4279ea..fa77f4ff4ed95 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -277,15 +277,15 @@ class ARROW_EXPORT FlatArray : public Array {
 /// Base class for arrays of fixed-size logical types
 class ARROW_EXPORT PrimitiveArray : public FlatArray {
  public:
+  /// Does not account for any slice offset
+  const std::shared_ptr<Buffer>& values() const { return data_->buffers[1]; }
+
+ protected:
   PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
                  const std::shared_ptr<Buffer>& data,
                  const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
                  int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
-  /// Does not account for any slice offset
-  const std::shared_ptr<Buffer>& values() const { return data_->buffers[1]; }
-
- protected:
   PrimitiveArray() : raw_values_(NULLPTR) {}
 
   void SetData(const std::shared_ptr<ArrayData>& data) {
diff --git a/cpp/src/arrow/array/array_union_test.cc b/cpp/src/arrow/array/array_union_test.cc
index 545425c264619..77ba2477791bb 100644
--- a/cpp/src/arrow/array/array_union_test.cc
+++ b/cpp/src/arrow/array/array_union_test.cc
@@ -166,6 +166,36 @@ TEST(TestSparseUnionArray, Validate) {
   ASSERT_RAISES(Invalid, arr->ValidateFull());
 }
 
+TEST(TestSparseUnionArray, Comparison) {
+  auto ints1 = ArrayFromJSON(int32(), "[1, 2, 3, 4, 5, 6]");
+  auto ints2 = ArrayFromJSON(int32(), "[1, 2, -3, 4, -5, 6]");
+  auto strs1 = ArrayFromJSON(utf8(), R"(["a", "b", "c", "d", "e", "f"])");
+  auto strs2 = ArrayFromJSON(utf8(), R"(["a", "*", "c", "d", "e", "*"])");
+  std::vector<int8_t> type_codes{8, 42};
+
+  auto check_equality = [&](const std::string& type_ids_json1,
+                            const std::string& type_ids_json2, bool expected_equals) {
+    auto type_ids1 = ArrayFromJSON(int8(), type_ids_json1);
+    auto type_ids2 = ArrayFromJSON(int8(), type_ids_json2);
+    ASSERT_OK_AND_ASSIGN(auto arr1,
+                         SparseUnionArray::Make(*type_ids1, {ints1, strs1}, type_codes));
+    ASSERT_OK_AND_ASSIGN(auto arr2,
+                         SparseUnionArray::Make(*type_ids2, {ints2, strs2}, type_codes));
+    ASSERT_EQ(arr1->Equals(arr2), expected_equals);
+    ASSERT_EQ(arr2->Equals(arr1), expected_equals);
+  };
+
+  // Same type ids
+  check_equality("[8, 8, 42, 42, 42, 8]", "[8, 8, 42, 42, 42, 8]", true);
+  check_equality("[8, 8, 42, 42, 42, 42]", "[8, 8, 42, 42, 42, 42]", false);
+  check_equality("[8, 8, 8, 42, 42, 8]", "[8, 8, 8, 42, 42, 8]", false);
+  check_equality("[8, 42, 42, 42, 42, 8]", "[8, 42, 42, 42, 42, 8]", false);
+
+  // Different type ids
+  check_equality("[42, 8, 42, 42, 42, 8]", "[8, 8, 42, 42, 42, 8]", false);
+  check_equality("[8, 8, 42, 42, 42, 8]", "[8, 8, 42, 42, 42, 42]", false);
+}
+
 // -------------------------------------------------------------------------
 // Tests for MakeDense and MakeSparse
 
diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc
index 0eb22a9d1553d..9667e1590ea16 100644
--- a/cpp/src/arrow/buffer.cc
+++ b/cpp/src/arrow/buffer.cc
@@ -63,28 +63,28 @@ Status CheckBufferSlice(const Buffer& buffer, int64_t offset) {
 
 }  // namespace
 
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(std::shared_ptr<Buffer> buffer,
                                                 int64_t offset) {
   RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
-  return SliceBuffer(buffer, offset);
+  return SliceBuffer(std::move(buffer), offset);
 }
 
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(std::shared_ptr<Buffer> buffer,
                                                 int64_t offset, int64_t length) {
   RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
-  return SliceBuffer(buffer, offset, length);
+  return SliceBuffer(std::move(buffer), offset, length);
 }
 
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
-    const std::shared_ptr<Buffer>& buffer, int64_t offset) {
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(std::shared_ptr<Buffer> buffer,
+                                                       int64_t offset) {
   RETURN_NOT_OK(CheckBufferSlice(*buffer, offset));
-  return SliceMutableBuffer(buffer, offset);
+  return SliceMutableBuffer(std::move(buffer), offset);
 }
 
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
-    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length) {
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(std::shared_ptr<Buffer> buffer,
+                                                       int64_t offset, int64_t length) {
   RETURN_NOT_OK(CheckBufferSlice(*buffer, offset, length));
-  return SliceMutableBuffer(buffer, offset, length);
+  return SliceMutableBuffer(std::move(buffer), offset, length);
 }
 
 std::string Buffer::ToHexString() {
@@ -167,9 +167,9 @@ std::shared_ptr<Buffer> Buffer::FromString(std::string data) {
   return std::make_shared<StlStringBuffer>(std::move(data));
 }
 
-std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
+std::shared_ptr<Buffer> SliceMutableBuffer(std::shared_ptr<Buffer> buffer,
                                            const int64_t offset, const int64_t length) {
-  return std::make_shared<MutableBuffer>(buffer, offset, length);
+  return std::make_shared<MutableBuffer>(std::move(buffer), offset, length);
 }
 
 MutableBuffer::MutableBuffer(const std::shared_ptr<Buffer>& parent, const int64_t offset,
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index fbf4a22e350ca..1b546a83ccc4d 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -396,33 +396,33 @@ class ARROW_EXPORT Buffer {
 /// \brief Construct a view on a buffer at the given offset and length.
 ///
 /// This function cannot fail and does not check for errors (except in debug builds)
-static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+static inline std::shared_ptr<Buffer> SliceBuffer(std::shared_ptr<Buffer> buffer,
                                                   const int64_t offset,
                                                   const int64_t length) {
-  return std::make_shared<Buffer>(buffer, offset, length);
+  return std::make_shared<Buffer>(std::move(buffer), offset, length);
 }
 
 /// \brief Construct a view on a buffer at the given offset, up to the buffer's end.
 ///
 /// This function cannot fail and does not check for errors (except in debug builds)
-static inline std::shared_ptr<Buffer> SliceBuffer(const std::shared_ptr<Buffer>& buffer,
+static inline std::shared_ptr<Buffer> SliceBuffer(std::shared_ptr<Buffer> buffer,
                                                   const int64_t offset) {
   int64_t length = buffer->size() - offset;
-  return SliceBuffer(buffer, offset, length);
+  return SliceBuffer(std::move(buffer), offset, length);
 }
 
 /// \brief Input-checking version of SliceBuffer
 ///
 /// An Invalid Status is returned if the requested slice falls out of bounds.
 ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(std::shared_ptr<Buffer> buffer,
                                                 int64_t offset);
 /// \brief Input-checking version of SliceBuffer
 ///
 /// An Invalid Status is returned if the requested slice falls out of bounds.
 /// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
 ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& buffer,
+Result<std::shared_ptr<Buffer>> SliceBufferSafe(std::shared_ptr<Buffer> buffer,
                                                 int64_t offset, int64_t length);
 
 /// \brief Like SliceBuffer, but construct a mutable buffer slice.
@@ -430,32 +430,32 @@ Result<std::shared_ptr<Buffer>> SliceBufferSafe(const std::shared_ptr<Buffer>& b
 /// If the parent buffer is not mutable, behavior is undefined (it may abort
 /// in debug builds).
 ARROW_EXPORT
-std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
+std::shared_ptr<Buffer> SliceMutableBuffer(std::shared_ptr<Buffer> buffer,
                                            const int64_t offset, const int64_t length);
 
 /// \brief Like SliceBuffer, but construct a mutable buffer slice.
 ///
 /// If the parent buffer is not mutable, behavior is undefined (it may abort
 /// in debug builds).
-static inline std::shared_ptr<Buffer> SliceMutableBuffer(
-    const std::shared_ptr<Buffer>& buffer, const int64_t offset) {
+static inline std::shared_ptr<Buffer> SliceMutableBuffer(std::shared_ptr<Buffer> buffer,
+                                                         const int64_t offset) {
   int64_t length = buffer->size() - offset;
-  return SliceMutableBuffer(buffer, offset, length);
+  return SliceMutableBuffer(std::move(buffer), offset, length);
 }
 
 /// \brief Input-checking version of SliceMutableBuffer
 ///
 /// An Invalid Status is returned if the requested slice falls out of bounds.
 ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
-    const std::shared_ptr<Buffer>& buffer, int64_t offset);
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(std::shared_ptr<Buffer> buffer,
+                                                       int64_t offset);
 /// \brief Input-checking version of SliceMutableBuffer
 ///
 /// An Invalid Status is returned if the requested slice falls out of bounds.
 /// Note that unlike SliceBuffer, `length` isn't clamped to the available buffer size.
 ARROW_EXPORT
-Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(
-    const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length);
+Result<std::shared_ptr<Buffer>> SliceMutableBufferSafe(std::shared_ptr<Buffer> buffer,
+                                                       int64_t offset, int64_t length);
 
 /// @}
 
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 23a921cc5a0a4..e0e6d183393a7 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -381,21 +381,49 @@ class RangeDataEqualsImpl {
     const int8_t* right_codes = right_.GetValues<int8_t>(1);
 
     // Unions don't have a null bitmap
+    int64_t run_start = 0;  // Start index of the current run
+
     for (int64_t i = 0; i < range_length_; ++i) {
-      const auto type_id = left_codes[left_start_idx_ + i];
-      if (type_id != right_codes[right_start_idx_ + i]) {
+      const auto current_type_id = left_codes[left_start_idx_ + i];
+
+      if (current_type_id != right_codes[right_start_idx_ + i]) {
         result_ = false;
         break;
       }
-      const auto child_num = child_ids[type_id];
-      // XXX can we instead detect runs of same-child union values?
+      // Check if the current element breaks the run
+      if (i > 0 && current_type_id != left_codes[left_start_idx_ + i - 1]) {
+        // Compare the previous run
+        const auto previous_child_num = child_ids[left_codes[left_start_idx_ + i - 1]];
+        int64_t run_length = i - run_start;
+
+        RangeDataEqualsImpl impl(
+            options_, floating_approximate_, *left_.child_data[previous_child_num],
+            *right_.child_data[previous_child_num],
+            left_start_idx_ + left_.offset + run_start,
+            right_start_idx_ + right_.offset + run_start, run_length);
+
+        if (!impl.Compare()) {
+          result_ = false;
+          break;
+        }
+
+        // Start a new run
+        run_start = i;
+      }
+    }
+
+    // Handle the final run
+    if (result_) {
+      const auto final_child_num = child_ids[left_codes[left_start_idx_ + run_start]];
+      int64_t final_run_length = range_length_ - run_start;
+
       RangeDataEqualsImpl impl(
-          options_, floating_approximate_, *left_.child_data[child_num],
-          *right_.child_data[child_num], left_start_idx_ + left_.offset + i,
-          right_start_idx_ + right_.offset + i, 1);
+          options_, floating_approximate_, *left_.child_data[final_child_num],
+          *right_.child_data[final_child_num], left_start_idx_ + left_.offset + run_start,
+          right_start_idx_ + right_.offset + run_start, final_run_length);
+
       if (!impl.Compare()) {
         result_ = false;
-        break;
       }
     }
     return Status::OK();
diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc
index 49d8709660684..20d3ce2faf256 100644
--- a/cpp/src/arrow/compute/api_aggregate.cc
+++ b/cpp/src/arrow/compute/api_aggregate.cc
@@ -24,8 +24,8 @@
 #include "arrow/util/logging.h"
 
 namespace arrow {
-
 namespace internal {
+
 template <>
 struct EnumTraits<compute::CountOptions::CountMode>
     : BasicEnumTraits<compute::CountOptions::CountMode, compute::CountOptions::ONLY_VALID,
@@ -67,6 +67,24 @@ struct EnumTraits<compute::QuantileOptions::Interpolation>
     return "<INVALID>";
   }
 };
+
+template <>
+struct EnumTraits<compute::PivotWiderOptions::UnexpectedKeyBehavior>
+    : BasicEnumTraits<compute::PivotWiderOptions::UnexpectedKeyBehavior,
+                      compute::PivotWiderOptions::kIgnore,
+                      compute::PivotWiderOptions::kRaise> {
+  static std::string name() { return "PivotWiderOptions::UnexpectedKeyBehavior"; }
+  static std::string value_name(compute::PivotWiderOptions::UnexpectedKeyBehavior value) {
+    switch (value) {
+      case compute::PivotWiderOptions::kIgnore:
+        return "kIgnore";
+      case compute::PivotWiderOptions::kRaise:
+        return "kRaise";
+    }
+    return "<INVALID>";
+  }
+};
+
 }  // namespace internal
 
 namespace compute {
@@ -91,6 +109,9 @@ static auto kVarianceOptionsType = GetFunctionOptionsType<VarianceOptions>(
     DataMember("ddof", &VarianceOptions::ddof),
     DataMember("skip_nulls", &VarianceOptions::skip_nulls),
     DataMember("min_count", &VarianceOptions::min_count));
+static auto kSkewOptionsType = GetFunctionOptionsType<SkewOptions>(
+    DataMember("skip_nulls", &SkewOptions::skip_nulls),
+    DataMember("min_count", &SkewOptions::min_count));
 static auto kQuantileOptionsType = GetFunctionOptionsType<QuantileOptions>(
     DataMember("q", &QuantileOptions::q),
     DataMember("interpolation", &QuantileOptions::interpolation),
@@ -101,6 +122,9 @@ static auto kTDigestOptionsType = GetFunctionOptionsType<TDigestOptions>(
     DataMember("buffer_size", &TDigestOptions::buffer_size),
     DataMember("skip_nulls", &TDigestOptions::skip_nulls),
     DataMember("min_count", &TDigestOptions::min_count));
+static auto kPivotOptionsType = GetFunctionOptionsType<PivotWiderOptions>(
+    DataMember("key_names", &PivotWiderOptions::key_names),
+    DataMember("unexpected_key_behavior", &PivotWiderOptions::unexpected_key_behavior));
 static auto kIndexOptionsType =
     GetFunctionOptionsType<IndexOptions>(DataMember("value", &IndexOptions::value));
 }  // namespace
@@ -130,6 +154,11 @@ VarianceOptions::VarianceOptions(int ddof, bool skip_nulls, uint32_t min_count)
       min_count(min_count) {}
 constexpr char VarianceOptions::kTypeName[];
 
+SkewOptions::SkewOptions(bool skip_nulls, uint32_t min_count)
+    : FunctionOptions(internal::kSkewOptionsType),
+      skip_nulls(skip_nulls),
+      min_count(min_count) {}
+
 QuantileOptions::QuantileOptions(double q, enum Interpolation interpolation,
                                  bool skip_nulls, uint32_t min_count)
     : FunctionOptions(internal::kQuantileOptionsType),
@@ -164,6 +193,13 @@ TDigestOptions::TDigestOptions(std::vector<double> q, uint32_t delta,
       min_count{min_count} {}
 constexpr char TDigestOptions::kTypeName[];
 
+PivotWiderOptions::PivotWiderOptions(std::vector<std::string> key_names,
+                                     UnexpectedKeyBehavior unexpected_key_behavior)
+    : FunctionOptions(internal::kPivotOptionsType),
+      key_names(std::move(key_names)),
+      unexpected_key_behavior(unexpected_key_behavior) {}
+PivotWiderOptions::PivotWiderOptions() : FunctionOptions(internal::kPivotOptionsType) {}
+
 IndexOptions::IndexOptions(std::shared_ptr<Scalar> value)
     : FunctionOptions(internal::kIndexOptionsType), value{std::move(value)} {}
 IndexOptions::IndexOptions() : IndexOptions(std::make_shared<NullScalar>()) {}
@@ -175,8 +211,10 @@ void RegisterAggregateOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kCountOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kModeOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kVarianceOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kSkewOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kQuantileOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kTDigestOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kPivotOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kIndexOptionsType));
 }
 }  // namespace internal
@@ -242,6 +280,14 @@ Result<Datum> Variance(const Datum& value, const VarianceOptions& options,
   return CallFunction("variance", {value}, &options, ctx);
 }
 
+Result<Datum> Skew(const Datum& value, const SkewOptions& options, ExecContext* ctx) {
+  return CallFunction("skew", {value}, &options, ctx);
+}
+
+Result<Datum> Kurtosis(const Datum& value, const SkewOptions& options, ExecContext* ctx) {
+  return CallFunction("kurtosis", {value}, &options, ctx);
+}
+
 Result<Datum> Quantile(const Datum& value, const QuantileOptions& options,
                        ExecContext* ctx) {
   return CallFunction("quantile", {value}, &options, ctx);
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 2e5210b073ee4..61bab4cdb86f2 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -114,6 +114,20 @@ class ARROW_EXPORT VarianceOptions : public FunctionOptions {
   uint32_t min_count;
 };
 
+/// \brief Control Skew and Kurtosis kernel behavior
+class ARROW_EXPORT SkewOptions : public FunctionOptions {
+ public:
+  explicit SkewOptions(bool skip_nulls = true, uint32_t min_count = 0);
+  static constexpr char const kTypeName[] = "SkewOptions";
+  static SkewOptions Defaults() { return SkewOptions{}; }
+
+  /// If true (the default), null values are ignored. Otherwise, if any value is null,
+  /// emit null.
+  bool skip_nulls;
+  /// If less than this many non-null values are observed, emit null.
+  uint32_t min_count;
+};
+
 /// \brief Control Quantile kernel behavior
 ///
 /// By default, returns the median value.
@@ -175,6 +189,88 @@ class ARROW_EXPORT TDigestOptions : public FunctionOptions {
   uint32_t min_count;
 };
 
+/// \brief Control Pivot kernel behavior
+///
+/// These options apply to the "pivot_wider" and "hash_pivot_wider" functions.
+///
+/// Constraints:
+/// - The corresponding `Aggregate::target` must have two FieldRef elements;
+///   the first one points to the pivot key column, the second points to the
+///   pivoted data column.
+/// - The pivot key column must be string-like; its values will be matched
+///   against `key_names` in order to dispatch the pivoted data into the
+///   output.
+///
+/// "pivot_wider" example
+/// ---------------------
+///
+/// Assuming the following two input columns with types utf8 and int16 (respectively):
+/// ```
+/// width   |  11
+/// height  |  13
+/// ```
+/// and the options `PivotWiderOptions(.key_names = {"height", "width"})`
+///
+/// then the output will be a scalar with the type
+/// `struct{"height": int16, "width": int16}`
+/// and the value `{"height": 13, "width": 11}`.
+///
+/// "hash_pivot_wider" example
+/// --------------------------
+///
+/// Assuming the following input with schema
+/// `{"group": int32, "key": utf8, "value": int16}`:
+/// ```
+///  group |  key     |  value
+/// -----------------------------
+///   1    |  height  |    11
+///   1    |  width   |    12
+///   2    |  width   |    13
+///   3    |  height  |    14
+///   3    |  depth   |    15
+/// ```
+/// and the following settings:
+/// - a hash grouping key "group"
+/// - Aggregate(
+///     .function = "hash_pivot_wider",
+///     .options = PivotWiderOptions(.key_names = {"height", "width"}),
+///     .target = {"key", "value"},
+///     .name = {"properties"})
+///
+/// then the output will have the schema
+/// `{"group": int32, "properties": struct{"height": int16, "width": int16}}`
+/// and the following value:
+/// ```
+///  group |     properties
+///        |  height  |   width
+/// -----------------------------
+///   1    |   11     |    12
+///   2    |   null   |    13
+///   3    |   14     |    null
+/// ```
+class ARROW_EXPORT PivotWiderOptions : public FunctionOptions {
+ public:
+  /// Configure the behavior of pivot keys not in `key_names`
+  enum UnexpectedKeyBehavior {
+    /// Unexpected pivot keys are ignored silently
+    kIgnore,
+    /// Unexpected pivot keys return a KeyError
+    kRaise
+  };
+
+  explicit PivotWiderOptions(std::vector<std::string> key_names,
+                             UnexpectedKeyBehavior unexpected_key_behavior = kIgnore);
+  // Default constructor for serialization
+  PivotWiderOptions();
+  static constexpr char const kTypeName[] = "PivotWiderOptions";
+  static PivotWiderOptions Defaults() { return PivotWiderOptions{}; }
+
+  /// The values expected in the pivot key column
+  std::vector<std::string> key_names;
+  /// The behavior when pivot keys not in `key_names` are encountered
+  UnexpectedKeyBehavior unexpected_key_behavior = kIgnore;
+};
+
 /// \brief Control Index kernel behavior
 class ARROW_EXPORT IndexOptions : public FunctionOptions {
  public:
@@ -421,6 +517,34 @@ Result<Datum> Variance(const Datum& value,
                        const VarianceOptions& options = VarianceOptions::Defaults(),
                        ExecContext* ctx = NULLPTR);
 
+/// \brief Calculate the skewness of a numeric array
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see SkewOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return datum of the computed skewness as a DoubleScalar
+///
+/// \since 20.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Skew(const Datum& value,
+                   const SkewOptions& options = SkewOptions::Defaults(),
+                   ExecContext* ctx = NULLPTR);
+
+/// \brief Calculate the kurtosis of a numeric array
+///
+/// \param[in] value input datum, expecting Array or ChunkedArray
+/// \param[in] options see SkewOptions for more information
+/// \param[in] ctx the function execution context, optional
+/// \return datum of the computed kurtosis as a DoubleScalar
+///
+/// \since 20.0.0
+/// \note API not yet finalized
+ARROW_EXPORT
+Result<Datum> Kurtosis(const Datum& value,
+                       const SkewOptions& options = SkewOptions::Defaults(),
+                       ExecContext* ctx = NULLPTR);
+
 /// \brief Calculate the quantiles of a numeric array
 ///
 /// \param[in] value input datum, expecting Array or ChunkedArray
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index 61335de6ac09a..53ceed1b0893e 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -270,6 +270,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType));
+  DCHECK_OK(registry->AddFunctionOptionsType(kRankQuantileOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kPairwiseOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kListFlattenOptionsType));
   DCHECK_OK(registry->AddFunctionOptionsType(kInversePermutationOptionsType));
diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index e2f3195db5493..e970cd3175add 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/util/config.h"
+
 #include "arrow/compute/expression.h"
 
 #include <algorithm>
@@ -30,8 +32,10 @@
 #include "arrow/compute/function_internal.h"
 #include "arrow/compute/util.h"
 #include "arrow/io/memory.h"
-#include "arrow/ipc/reader.h"
-#include "arrow/ipc/writer.h"
+#ifdef ARROW_IPC
+#  include "arrow/ipc/reader.h"
+#  include "arrow/ipc/writer.h"
+#endif
 #include "arrow/util/hash_util.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
@@ -1492,6 +1496,7 @@ Result<Expression> RemoveNamedRefs(Expression src) {
 // this in the schema of a RecordBatch. Embedded arrays and scalars are stored in its
 // columns. Finally, the RecordBatch is written to an IPC file.
 Result<std::shared_ptr<Buffer>> Serialize(const Expression& expr) {
+#ifdef ARROW_IPC
   struct {
     std::shared_ptr<KeyValueMetadata> metadata_ = std::make_shared<KeyValueMetadata>();
     ArrayVector columns_;
@@ -1567,9 +1572,13 @@ Result<std::shared_ptr<Buffer>> Serialize(const Expression& expr) {
   RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
   RETURN_NOT_OK(writer->Close());
   return stream->Finish();
+#else
+  return Status::NotImplemented("IPC feature isn't enabled");
+#endif
 }
 
 Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
+#ifdef ARROW_IPC
   io::BufferReader stream(std::move(buffer));
   ARROW_ASSIGN_OR_RAISE(auto reader, ipc::RecordBatchFileReader::Open(&stream));
   ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(0));
@@ -1670,6 +1679,9 @@ Result<Expression> Deserialize(std::shared_ptr<Buffer> buffer) {
   };
 
   return FromRecordBatch{*batch, 0}.GetOne();
+#else
+  return Status::NotImplemented("IPC feature isn't enabled");
+#endif
 }
 
 Expression project(std::vector<Expression> values, std::vector<std::string> names) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_internal.h b/cpp/src/arrow/compute/kernels/aggregate_internal.h
index 9dab049821d5c..23aa20eddc397 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_internal.h
@@ -17,6 +17,9 @@
 
 #pragma once
 
+#include <cmath>
+#include <initializer_list>
+
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -24,9 +27,7 @@
 #include "arrow/util/int128_internal.h"
 #include "arrow/util/logging.h"
 
-namespace arrow {
-namespace compute {
-namespace internal {
+namespace arrow::compute::internal {
 
 // Find the largest compatible primitive type for a primitive type.
 template <typename I, typename Enable = void>
@@ -254,6 +255,4 @@ SumType SumArray(const ArraySpan& data) {
       data, [](ValueType v) { return static_cast<SumType>(v); });
 }
 
-}  // namespace internal
-}  // namespace compute
-}  // namespace arrow
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_pivot.cc b/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
new file mode 100644
index 0000000000000..bcc2f53ac1544
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/aggregate_pivot.cc
@@ -0,0 +1,188 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/kernels/pivot_internal.h"
+#include "arrow/scalar.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/logging.h"
+
+namespace arrow::compute::internal {
+namespace {
+
+using arrow::internal::VisitSetBitRunsVoid;
+using arrow::util::span;
+
+struct PivotImpl : public ScalarAggregator {
+  Status Init(const PivotWiderOptions& options, const std::vector<TypeHolder>& in_types) {
+    options_ = &options;
+    key_type_ = in_types[0].GetSharedPtr();
+    auto value_type = in_types[1].GetSharedPtr();
+    FieldVector fields;
+    fields.reserve(options_->key_names.size());
+    values_.reserve(options_->key_names.size());
+    for (const auto& key_name : options_->key_names) {
+      fields.push_back(field(key_name, value_type));
+      values_.push_back(MakeNullScalar(value_type));
+    }
+    out_type_ = struct_(std::move(fields));
+    ARROW_ASSIGN_OR_RAISE(key_mapper_, PivotWiderKeyMapper::Make(*key_type_, options_));
+    return Status::OK();
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    DCHECK_EQ(batch.num_values(), 2);
+    if (batch[0].is_array()) {
+      ARROW_ASSIGN_OR_RAISE(span<const PivotWiderKeyIndex> keys,
+                            key_mapper_->MapKeys(batch[0].array));
+      if (batch[1].is_array()) {
+        // Array keys, array values
+        auto values = batch[1].array.ToArray();
+        for (int64_t i = 0; i < batch.length; ++i) {
+          PivotWiderKeyIndex key = keys[i];
+          if (key != kNullPivotKey && !values->IsNull(i)) {
+            if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
+              return DuplicateValue();
+            }
+            ARROW_ASSIGN_OR_RAISE(values_[key], values->GetScalar(i));
+            DCHECK(values_[key]->is_valid);
+          }
+        }
+      } else {
+        // Array keys, scalar value
+        const Scalar* value = batch[1].scalar;
+        if (value->is_valid) {
+          for (int64_t i = 0; i < batch.length; ++i) {
+            PivotWiderKeyIndex key = keys[i];
+            if (key != kNullPivotKey) {
+              if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
+                return DuplicateValue();
+              }
+              values_[key] = value->GetSharedPtr();
+            }
+          }
+        }
+      }
+    } else {
+      ARROW_ASSIGN_OR_RAISE(PivotWiderKeyIndex key,
+                            key_mapper_->MapKey(*batch[0].scalar));
+      if (key != kNullPivotKey) {
+        if (batch[1].is_array()) {
+          // Scalar key, array values
+          auto values = batch[1].array.ToArray();
+          for (int64_t i = 0; i < batch.length; ++i) {
+            if (!values->IsNull(i)) {
+              if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
+                return DuplicateValue();
+              }
+              ARROW_ASSIGN_OR_RAISE(values_[key], values->GetScalar(i));
+              DCHECK(values_[key]->is_valid);
+            }
+          }
+        } else {
+          // Scalar key, scalar value
+          const Scalar* value = batch[1].scalar;
+          if (value->is_valid) {
+            if (batch.length > 1 || values_[key]->is_valid) {
+              return DuplicateValue();
+            }
+            values_[key] = value->GetSharedPtr();
+          }
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other_state = checked_cast<const PivotImpl&>(src);
+    for (int64_t key = 0; key < static_cast<int64_t>(values_.size()); ++key) {
+      if (other_state.values_[key]->is_valid) {
+        if (ARROW_PREDICT_FALSE(values_[key]->is_valid)) {
+          return DuplicateValue();
+        }
+        values_[key] = other_state.values_[key];
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext* ctx, Datum* out) override {
+    *out = std::make_shared<StructScalar>(std::move(values_), out_type_);
+    return Status::OK();
+  }
+
+  Status DuplicateValue() {
+    return Status::Invalid(
+        "Encountered more than one non-null value for the same pivot key");
+  }
+
+  std::shared_ptr<DataType> out_type() const { return out_type_; }
+
+  std::shared_ptr<DataType> key_type_;
+  std::shared_ptr<DataType> out_type_;
+  const PivotWiderOptions* options_;
+  std::unique_ptr<PivotWiderKeyMapper> key_mapper_;
+  ScalarVector values_;
+};
+
+Result<std::unique_ptr<KernelState>> PivotInit(KernelContext* ctx,
+                                               const KernelInitArgs& args) {
+  const auto& options = checked_cast<const PivotWiderOptions&>(*args.options);
+  DCHECK_EQ(args.inputs.size(), 2);
+  DCHECK(is_base_binary_like(args.inputs[0].id()));
+  auto state = std::make_unique<PivotImpl>();
+  RETURN_NOT_OK(state->Init(options, args.inputs));
+  return state;
+}
+
+Result<TypeHolder> ResolveOutputType(KernelContext* ctx, const std::vector<TypeHolder>&) {
+  return checked_cast<PivotImpl*>(ctx->state())->out_type();
+}
+
+const FunctionDoc pivot_doc{
+    "Pivot values according to a pivot key column",
+    ("Output is a struct with as many fields as `PivotWiderOptions.key_names`.\n"
+     "All output struct fields have the same type as `pivot_values`.\n"
+     "Each pivot key decides in which output field the corresponding pivot value\n"
+     "is emitted. If a pivot key doesn't appear, null is emitted.\n"
+     "If more than one non-null value is encountered for a given pivot key,\n"
+     "Invalid is raised.\n"
+     "Behavior of unexpected pivot keys is controlled by `unexpected_key_behavior`\n"
+     "in PivotWiderOptions."),
+    {"pivot_keys", "pivot_values"},
+    "PivotWiderOptions"};
+
+}  // namespace
+
+void RegisterScalarAggregatePivot(FunctionRegistry* registry) {
+  static auto default_pivot_options = PivotWiderOptions::Defaults();
+
+  auto func = std::make_shared<ScalarAggregateFunction>(
+      "pivot_wider", Arity::Binary(), pivot_doc, &default_pivot_options);
+
+  for (auto key_type : BaseBinaryTypes()) {
+    auto sig = KernelSignature::Make({key_type->id(), InputType::Any()},
+                                     OutputType(ResolveOutputType));
+    AddAggKernel(std::move(sig), PivotInit, func.get());
+  }
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index e6ad915fd5667..d64c740a8e70d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -42,6 +42,7 @@
 
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
+#include "arrow/testing/math.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/logging.h"
 
@@ -3386,6 +3387,9 @@ TEST_F(TestVarStdKernelMergeStability, Basics) {
 #ifndef __MINGW32__  // MinGW has precision issues
   // XXX: The reference value from numpy is actually wrong due to floating
   // point limits. The correct result should equals variance(90, 0) = 4050.
+  // The problem is that the mean is not exactly representable as floating-point,
+  // and that small inaccuracy produces a large deviation when plugged into the M2
+  // calculation.
   std::vector<std::string> chunks = {"[40000008000000490]", "[40000008000000400]"};
   this->AssertVarStdIs(chunks, options, 3904.0);
 #endif
@@ -3430,12 +3434,21 @@ TEST_F(TestVarStdKernelUInt32, Basics) {
   this->AssertVarStdIs("[0, 0, 4294967295]", options, 6.148914688373205e+18);
 }
 
-// https://en.wikipedia.org/wiki/Kahan_summation_algorithm
 void KahanSum(double& sum, double& adjust, double addend) {
-  double y = addend - adjust;
-  double t = sum + y;
-  adjust = (t - sum) - y;
-  sum = t;
+  // Backported enhancement from Neumaier's algorithm: consider case where
+  // sum is small compared to addend.
+  // https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements
+  if (abs(sum) >= abs(addend)) {
+    double y = addend - adjust;
+    double t = sum + y;
+    adjust = (t - sum) - y;
+    sum = t;
+  } else {
+    double y = sum - adjust;
+    double t = addend + y;
+    adjust = (t - addend) - y;
+    sum = t;
+  }
 }
 
 // Calculate reference variance with Welford's online algorithm + Kahan summation
@@ -3534,7 +3547,8 @@ TEST_F(TestVarStdKernelIntegerLength, Basics) {
 
 TEST(TestVarStdKernel, Decimal) {
   // Effectively treated as double, sanity check results here
-  for (const auto& ty : {decimal128(3, 2), decimal256(3, 2)}) {
+  for (const auto& ty :
+       {decimal32(3, 2), decimal64(3, 2), decimal128(3, 2), decimal256(3, 2)}) {
     CheckVarStd(ArrayFromJSON(ty, R"(["1.00"])"), VarianceOptions(), 0);
     CheckVarStd(ArrayFromJSON(ty, R"([null, "1.00", "2.00", "3.00"])"), VarianceOptions(),
                 0.6666666666666666);
@@ -3544,6 +3558,154 @@ TEST(TestVarStdKernel, Decimal) {
   }
 }
 
+//
+// Skew and Kurtosis
+//
+
+constexpr int kSkewUlps = 3;
+constexpr int kKurtosisUlps = 6;
+
+void CheckSkewKurtosis(const Datum& array, const SkewOptions& options,
+                       double expected_skew, double expected_kurtosis, int n_ulps = -1) {
+  ARROW_SCOPED_TRACE("type = ", *array.type());
+  ASSERT_OK_AND_ASSIGN(Datum out_skew, Skew(array, options));
+  ASSERT_OK_AND_ASSIGN(Datum out_kurtosis, Kurtosis(array, options));
+  const auto& skew = checked_cast<const DoubleScalar&>(*out_skew.scalar());
+  const auto& kurtosis = checked_cast<const DoubleScalar&>(*out_kurtosis.scalar());
+  ASSERT_TRUE(skew.is_valid && kurtosis.is_valid);
+  AssertWithinUlp(expected_skew, skew.value, n_ulps >= 0 ? n_ulps : kSkewUlps);
+  AssertWithinUlp(expected_kurtosis, kurtosis.value,
+                  n_ulps >= 0 ? n_ulps : kKurtosisUlps);
+}
+
+class TestSkewKurtosis : public ::testing::Test {
+ public:
+  void AssertSkewKurtosisAre(const Array& array, const SkewOptions& options,
+                             double expected_skew, double expected_kurtosis,
+                             int n_ulps = -1) {
+    CheckSkewKurtosis(array, options, expected_skew, expected_kurtosis, n_ulps);
+  }
+
+  void AssertSkewKurtosisAre(const std::shared_ptr<ChunkedArray>& array,
+                             const SkewOptions& options, double expected_skew,
+                             double expected_kurtosis, int n_ulps = -1) {
+    CheckSkewKurtosis(array, options, expected_skew, expected_kurtosis, n_ulps);
+  }
+
+  void AssertSkewKurtosisAre(const std::shared_ptr<DataType>& type, std::string_view json,
+                             const SkewOptions& options, double expected_skew,
+                             double expected_kurtosis, int n_ulps = -1) {
+    auto array = ArrayFromJSON(type, json);
+    CheckSkewKurtosis(array, options, expected_skew, expected_kurtosis, n_ulps);
+  }
+
+  void AssertSkewKurtosisAre(const std::shared_ptr<DataType>& type,
+                             const std::vector<std::string>& json,
+                             const SkewOptions& options, double expected_skew,
+                             double expected_kurtosis, int n_ulps = -1) {
+    auto array = ChunkedArrayFromJSON(type, json);
+    CheckSkewKurtosis(array, options, expected_skew, expected_kurtosis, n_ulps);
+  }
+
+  void AssertSkewKurtosisInvalid(const Array& array, const SkewOptions& options) {
+    AssertSkewKurtosisInvalidInternal(array, options);
+  }
+
+  void AssertSkewKurtosisInvalid(const std::shared_ptr<ChunkedArray>& array,
+                                 const SkewOptions& options) {
+    AssertSkewKurtosisInvalidInternal(array, options);
+  }
+
+  void AssertSkewKurtosisInvalid(const std::shared_ptr<DataType>& type,
+                                 std::string_view json, const SkewOptions& options) {
+    auto array = ArrayFromJSON(type, json);
+    AssertSkewKurtosisInvalidInternal(array, options);
+  }
+
+  void AssertSkewKurtosisInvalid(const std::shared_ptr<DataType>& type,
+                                 const std::vector<std::string>& json,
+                                 const SkewOptions& options) {
+    auto array = ChunkedArrayFromJSON(type, json);
+    AssertSkewKurtosisInvalidInternal(array, options);
+  }
+
+ private:
+  void AssertSkewKurtosisInvalidInternal(const Datum& array, const SkewOptions& options) {
+    ASSERT_OK_AND_ASSIGN(Datum out_skew, Skew(array, options));
+    ASSERT_OK_AND_ASSIGN(Datum out_kurtosis, Kurtosis(array, options));
+    const auto& skew = checked_cast<const DoubleScalar&>(*out_skew.scalar());
+    const auto& kurtosis = checked_cast<const DoubleScalar&>(*out_kurtosis.scalar());
+    ASSERT_FALSE(skew.is_valid || kurtosis.is_valid);
+  }
+};
+
+TEST_F(TestSkewKurtosis, Basics) {
+  // Test sample from SciPy, with results obtained using numpy.float128
+  auto options = SkewOptions::Defaults();
+  AssertSkewKurtosisAre(float64(), "[1.165, 0.6268, 0.0751, 0.3516, -0.6965]", options,
+                        -0.29322304336607355496, -0.83411431970273759);
+  // Results are slightly different because the input doesn't losslessly convert
+  // to float32.
+  AssertSkewKurtosisAre(float32(), "[1.165, 0.6268, 0.0751, 0.3516, -0.6965]", options,
+                        -0.2932230870440958164, -0.8341143229437093939);
+}
+
+TEST_F(TestSkewKurtosis, Chunked) {
+  auto options = SkewOptions::Defaults();
+  AssertSkewKurtosisAre(float64(), {"[1.165, 0.6268]", "[]", "[0.0751, 0.3516, -0.6965]"},
+                        options, -0.29322304336607355496, -0.83411431970273759);
+  AssertSkewKurtosisAre(float32(), {"[1.165, 0.6268]", "[]", "[0.0751, 0.3516, -0.6965]"},
+                        options, -0.2932230870440958164, -0.8341143229437093939);
+}
+
+TEST_F(TestSkewKurtosis, Decimal) {
+  auto options = SkewOptions::Defaults();
+  for (auto type :
+       {decimal32(5, 4), decimal64(5, 4), decimal128(5, 4), decimal256(5, 4)}) {
+    AssertSkewKurtosisAre(type, R"(["1.1650", "0.6268", "0.0751", "0.3516", "-0.6965"])",
+                          options, -0.29322304336607355496, -0.83411431970273759);
+  }
+}
+
+TEST_F(TestSkewKurtosis, Integral) {
+  auto options = SkewOptions::Defaults();
+  for (auto type : IntTypes()) {
+    AssertSkewKurtosisAre(type, "[1, 2, 3, 5]", options, 0.4346507595746657,
+                          -1.1542857142857144);
+  }
+}
+
+TEST_F(TestSkewKurtosis, SpecialCases) {
+  auto options = SkewOptions::Defaults();
+  for (auto type : {float64(), float32()}) {
+    AssertSkewKurtosisAre(type, "[0, 1, 2]", options, 0.0, -1.5, /*n_ulps=*/0);
+    AssertSkewKurtosisAre(type, "[1]", options, std::nan(""), std::nan(""));
+    AssertSkewKurtosisAre(type, "[1, 1, 1, 1, 1, 1]", options, std::nan(""),
+                          std::nan(""));
+  }
+}
+
+TEST_F(TestSkewKurtosis, Options) {
+  for (auto type : {float64(), float32()}) {
+    auto options = SkewOptions::Defaults();
+    AssertSkewKurtosisInvalid(type, "[]", options);
+    AssertSkewKurtosisInvalid(type, std::vector<std::string>{}, options);
+    AssertSkewKurtosisInvalid(type, {"[]", "[]", "[]"}, options);
+    AssertSkewKurtosisAre(type, "[0, 1, null, 2]", options, 0.0, -1.5);
+    AssertSkewKurtosisAre(type, {"[0, 1]", "[]", "[null, 2]"}, options, 0.0, -1.5);
+    options.min_count = 3;
+    AssertSkewKurtosisAre(type, "[0, 1, null, 2]", options, 0.0, -1.5);
+    AssertSkewKurtosisAre(type, {"[0, 1]", "[]", "[null, 2]"}, options, 0.0, -1.5);
+    options.skip_nulls = false;
+    AssertSkewKurtosisInvalid(type, "[0, 1, null, 2]", options);
+    AssertSkewKurtosisInvalid(type, {"[0, 1]", "[]", "[null, 2]"}, options);
+    options.skip_nulls = true;
+    options.min_count = 4;
+    AssertSkewKurtosisInvalid(type, "[0, 1, null, 2]", options);
+    AssertSkewKurtosisInvalid(type, {"[0, 1]", "[]", "[null, 2]"}, options);
+  }
+}
+
 //
 // Quantile
 //
@@ -4307,5 +4469,294 @@ TEST(TestTDigestKernel, ApproximateMedian) {
   }
 }
 
+//
+// Pivot
+//
+
+class TestPivotKernel : public ::testing::Test {
+ public:
+  void AssertPivot(const Datum& keys, const Datum& values, const Scalar& expected,
+                   const PivotWiderOptions& options) {
+    SCOPED_TRACE(options.ToString());
+    ASSERT_OK_AND_ASSIGN(Datum out,
+                         CallFunction("pivot_wider", {keys, values}, &options));
+    ValidateOutput(out);
+    ASSERT_TRUE(out.is_scalar());
+    AssertScalarsEqual(expected, *out.scalar(), /*verbose=*/true);
+  }
+};
+
+TEST_F(TestPivotKernel, Basics) {
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  auto keys = ArrayFromJSON(key_type, R"(["width", "height"])");
+  auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
+  auto expected = ScalarFromJSON(
+      struct_({field("height", value_type), field("width", value_type)}), "[11.5, 10.5]");
+  AssertPivot(keys, values, *expected,
+              PivotWiderOptions(/*key_names=*/{"height", "width"}));
+}
+
+TEST_F(TestPivotKernel, AllKeyTypes) {
+  for (auto key_type : BaseBinaryTypes()) {
+    auto value_type = float32();
+
+    auto keys = ArrayFromJSON(key_type, R"(["width", "height"])");
+    auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
+    auto expected =
+        ScalarFromJSON(struct_({field("height", value_type), field("width", value_type)}),
+                       "[11.5, 10.5]");
+    AssertPivot(keys, values, *expected,
+                PivotWiderOptions(/*key_names=*/{"height", "width"}));
+  }
+}
+
+TEST_F(TestPivotKernel, Numbers) {
+  auto key_type = utf8();
+  for (auto value_type : NumericTypes()) {
+    auto keys = ArrayFromJSON(key_type, R"(["width", "height"])");
+    auto values = ArrayFromJSON(value_type, "[10, 11]");
+    auto expected = ScalarFromJSON(
+        struct_({field("height", value_type), field("width", value_type)}), "[11, 10]");
+    AssertPivot(keys, values, *expected,
+                PivotWiderOptions(/*key_names=*/{"height", "width"}));
+  }
+}
+
+TEST_F(TestPivotKernel, Binary) {
+  auto key_type = utf8();
+  for (auto value_type : BaseBinaryTypes()) {
+    auto keys = ArrayFromJSON(key_type, R"(["abc", "def"])");
+    auto values = ArrayFromJSON(value_type, R"(["foo", "bar"])");
+    auto expected =
+        ScalarFromJSON(struct_({field("abc", value_type), field("def", value_type)}),
+                       R"(["foo", "bar"])");
+    AssertPivot(keys, values, *expected, PivotWiderOptions(/*key_names=*/{"abc", "def"}));
+  }
+}
+
+TEST_F(TestPivotKernel, NullType) {
+  auto key_type = utf8();
+  auto value_type = null();
+
+  auto keys = ArrayFromJSON(key_type, R"(["abc", "def"])");
+  auto values = ArrayFromJSON(value_type, "[null, null]");
+  auto expected = ScalarFromJSON(
+      struct_({field("abc", value_type), field("def", value_type)}), R"([null, null])");
+  AssertPivot(keys, values, *expected, PivotWiderOptions(/*key_names=*/{"abc", "def"}));
+}
+
+TEST_F(TestPivotKernel, NullValues) {
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  auto keys = ArrayFromJSON(key_type, R"(["width", "height", "height", "width"])");
+  auto values = ArrayFromJSON(value_type, "[null, 10.5, null, 11.5]");
+  auto expected = ScalarFromJSON(
+      struct_({field("height", value_type), field("width", value_type)}), "[10.5, 11.5]");
+  AssertPivot(keys, values, *expected,
+              PivotWiderOptions(/*key_names=*/{"height", "width"}));
+}
+
+TEST_F(TestPivotKernel, ChunkedInput) {
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  auto keys = ChunkedArrayFromJSON(key_type,
+                                   {R"(["width"])", R"(["height", "height", "width"])"});
+  auto values = ChunkedArrayFromJSON(value_type, {"[null, 10.5]", "[null, 11.5]"});
+  auto expected = ScalarFromJSON(
+      struct_({field("height", value_type), field("width", value_type)}), "[10.5, 11.5]");
+  AssertPivot(keys, values, *expected,
+              PivotWiderOptions(/*key_names=*/{"height", "width"}));
+}
+
+TEST_F(TestPivotKernel, AllInputKinds) {
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  DatumVector key_args = {
+      ScalarFromJSON(key_type, R"("width")"),
+      ArrayFromJSON(key_type, R"(["width"])"),
+      ChunkedArrayFromJSON(key_type, {R"(["width"])"}),
+  };
+  DatumVector value_args = {
+      ScalarFromJSON(value_type, "11.5"),
+      ArrayFromJSON(value_type, "[11.5]"),
+      ChunkedArrayFromJSON(value_type, {"[11.5]"}),
+  };
+  auto expected = ScalarFromJSON(
+      struct_({field("height", value_type), field("width", value_type)}), "[null, 11.5]");
+
+  for (const Datum& keys : key_args) {
+    ARROW_SCOPED_TRACE("keys = ", keys.ToString());
+    for (const Datum& values : value_args) {
+      ARROW_SCOPED_TRACE("values = ", keys.ToString());
+      AssertPivot(keys, values, *expected,
+                  PivotWiderOptions(/*key_names=*/{"height", "width"}));
+    }
+  }
+}
+
+TEST_F(TestPivotKernel, ScalarKey) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  auto expected_type = struct_({field("height", value_type), field("width", value_type)});
+
+  auto keys = ScalarFromJSON(key_type, R"("width")");
+  auto values = ArrayFromJSON(value_type, "[null, 11.5, null]");
+  auto expected = ScalarFromJSON(expected_type, "[null, 11.5]");
+  AssertPivot(keys, values, *expected,
+              PivotWiderOptions(/*key_names=*/{"height", "width"}));
+}
+
+TEST_F(TestPivotKernel, ScalarValue) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  auto expected_type = struct_({field("height", value_type), field("width", value_type)});
+
+  auto keys = ArrayFromJSON(key_type, R"(["width", "height"])");
+  auto values = ScalarFromJSON(value_type, "11.5");
+  auto expected = ScalarFromJSON(expected_type, "[11.5, 11.5]");
+  AssertPivot(keys, values, *expected,
+              PivotWiderOptions(/*key_names=*/{"height", "width"}));
+}
+
+TEST_F(TestPivotKernel, EmptyInput) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  auto options = PivotWiderOptions(/*key_names=*/{"height", "width"});
+  auto expected_type = struct_({field("height", value_type), field("width", value_type)});
+  auto expected = ScalarFromJSON(expected_type, "[null, null]");
+
+  AssertPivot(ArrayFromJSON(key_type, "[]"), ArrayFromJSON(value_type, "[]"), *expected,
+              options);
+  AssertPivot(ChunkedArrayFromJSON(key_type, {}), ChunkedArrayFromJSON(value_type, {}),
+              *expected, options);
+}
+
+TEST_F(TestPivotKernel, MissingKey) {
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  auto keys = ArrayFromJSON(key_type, R"(["width", "height"])");
+  auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
+  auto options = PivotWiderOptions(/*key_names=*/{"height", "width", "depth"});
+  auto expected =
+      ScalarFromJSON(struct_({field("height", value_type), field("width", value_type),
+                              field("depth", value_type)}),
+                     "[11.5, 10.5, null]");
+  AssertPivot(keys, values, *expected, options);
+}
+
+TEST_F(TestPivotKernel, UnexpectedKey) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  auto expected_type = struct_({field("height", value_type), field("width", value_type)});
+
+  auto options = PivotWiderOptions(/*key_names=*/{"height", "width"});
+  auto options_raise =
+      PivotWiderOptions(/*key_names=*/{"height", "width"}, PivotWiderOptions::kRaise);
+
+  {
+    auto keys = ArrayFromJSON(key_type, R"(["width", "height", "depth"])");
+    auto values = ArrayFromJSON(value_type, "[10.5, 11.5, 12.5]");
+    auto expected = ScalarFromJSON(expected_type, "[11.5, 10.5]");
+    AssertPivot(keys, values, *expected, options);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        KeyError, ::testing::HasSubstr("Unexpected pivot key: depth"),
+        CallFunction("pivot_wider", {keys, values}, &options_raise));
+  }
+  {
+    // Scalar key
+    auto keys = ScalarFromJSON(key_type, R"("depth")");
+    auto expected = ScalarFromJSON(expected_type, "[null, null]");
+    for (const Datum& values : DatumVector{ArrayFromJSON(value_type, "[10.5]"),
+                                           ScalarFromJSON(value_type, "10.5")}) {
+      AssertPivot(keys, values, *expected, options);
+      EXPECT_RAISES_WITH_MESSAGE_THAT(
+          KeyError, ::testing::HasSubstr("Unexpected pivot key: depth"),
+          CallFunction("pivot_wider", {keys, values}, &options_raise));
+    }
+  }
+  {
+    // Scalar value
+    auto values = ScalarFromJSON(value_type, "10.5");
+    auto expected = ScalarFromJSON(expected_type, "[null, null]");
+    for (const Datum& keys : DatumVector{ArrayFromJSON(key_type, R"(["depth"])"),
+                                         ScalarFromJSON(key_type, R"("depth")")}) {
+      AssertPivot(keys, values, *expected, options);
+      EXPECT_RAISES_WITH_MESSAGE_THAT(
+          KeyError, ::testing::HasSubstr("Unexpected pivot key: depth"),
+          CallFunction("pivot_wider", {keys, values}, &options_raise));
+    }
+  }
+}
+
+TEST_F(TestPivotKernel, NullKey) {
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  auto keys = ArrayFromJSON(key_type, R"(["width", null])");
+  auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
+  auto options = PivotWiderOptions(/*key_names=*/{"height", "width"});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(KeyError,
+                                  ::testing::HasSubstr("pivot key name cannot be null"),
+                                  CallFunction("pivot_wider", {keys, values}, &options));
+}
+
+TEST_F(TestPivotKernel, DuplicateKeyNames) {
+  auto key_type = utf8();
+  auto value_type = float32();
+
+  auto keys = ArrayFromJSON(key_type, "[]");
+  auto values = ArrayFromJSON(value_type, "[]");
+  auto options = PivotWiderOptions(/*key_names=*/{"height", "height", "width"});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      KeyError, ::testing::HasSubstr("Duplicate key name 'height' in PivotWiderOptions"),
+      CallFunction("pivot_wider", {keys, values}, &options));
+}
+
+TEST_F(TestPivotKernel, DuplicateValues) {
+  auto key_type = utf8();
+  auto value_type = float32();
+  auto options = PivotWiderOptions(/*key_names=*/{"height", "width"});
+
+  {
+    // Duplicate values in the same chunk
+    auto keys = ArrayFromJSON(key_type, R"(["width", "height", "height"])");
+    auto values = ArrayFromJSON(value_type, "[10.5, 11.5, 12.5]");
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Encountered more than one non-null value"),
+        CallFunction("pivot_wider", {keys, values}, &options));
+  }
+  {
+    // Duplicate values in different chunks
+    auto keys =
+        ChunkedArrayFromJSON(key_type, {R"(["width", "height"])", R"(["height"])"});
+    auto values = ChunkedArrayFromJSON(value_type, {"[10.5, 11.5]", "[12.5]"});
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Encountered more than one non-null value"),
+        CallFunction("pivot_wider", {keys, values}, &options));
+  }
+  {
+    // Duplicate values with scalar key
+    auto keys = ScalarFromJSON(key_type, R"("width")");
+    auto values = ArrayFromJSON(value_type, "[10.5, 11.5]");
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Encountered more than one non-null value"),
+        CallFunction("pivot_wider", {keys, values}, &options));
+  }
+  {
+    // Duplicate values with scalar value
+    auto keys = ArrayFromJSON(key_type, R"(["width", "height", "height"])");
+    auto values = ScalarFromJSON(value_type, "10.5");
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::HasSubstr("Encountered more than one non-null value"),
+        CallFunction("pivot_wider", {keys, values}, &options));
+  }
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
index e4189f9b62b17..8d2da195b09a4 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std.cc
@@ -15,18 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <array>
 #include <cmath>
+#include <type_traits>
 
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/util/bit_run_reader.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/int128_internal.h"
 
-namespace arrow {
-namespace compute {
-namespace internal {
+namespace arrow::compute::internal {
+
+using ::arrow::internal::checked_cast;
 
 namespace {
 
@@ -34,13 +37,14 @@ using arrow::internal::int128_t;
 using arrow::internal::VisitSetBitRunsVoid;
 
 template <typename ArrowType>
-struct VarStdState {
+struct MomentsState {
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
   using CType = typename TypeTraits<ArrowType>::CType;
-  using ThisType = VarStdState<ArrowType>;
+  using SumType = typename internal::GetSumType<ArrowType>::SumType;
+  using ThisType = MomentsState<ArrowType>;
 
-  explicit VarStdState(int32_t decimal_scale, VarianceOptions options)
-      : decimal_scale(decimal_scale), options(options) {}
+  MomentsState(int level, int32_t decimal_scale, bool skip_nulls)
+      : level(level), decimal_scale(decimal_scale), skip_nulls(skip_nulls) {}
 
   template <typename T>
   double ToDouble(T value) const {
@@ -51,89 +55,90 @@ struct VarStdState {
   double ToDouble(const Decimal128& value) const { return value.ToDouble(decimal_scale); }
   double ToDouble(const Decimal256& value) const { return value.ToDouble(decimal_scale); }
 
-  // float/double/int64/decimal: calculate `m2` (sum((X-mean)^2)) with `two pass
-  // algorithm`
-  // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
-  template <typename T = ArrowType>
-  enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4) ||
-              (!is_integer_type<T>::value && sizeof(CType) == 4)>
-  Consume(const ArraySpan& array) {
+  int64_t count() const { return moments.count; }
+
+  void Consume(const ArraySpan& array) {
+    constexpr bool kCanUseIntArithmetic = std::is_integral_v<CType> && sizeof(CType) <= 4;
+
     this->all_valid = array.GetNullCount() == 0;
-    int64_t count = array.length - array.GetNullCount();
-    if (count == 0 || (!this->all_valid && !options.skip_nulls)) {
+    int64_t valid_count = array.length - array.GetNullCount();
+    if (valid_count == 0 || (!this->all_valid && !this->skip_nulls)) {
       return;
     }
 
-    using SumType = typename internal::GetSumType<T>::SumType;
+    if constexpr (kCanUseIntArithmetic) {
+      if (level == 2) {
+        // int32/16/8: textbook one pass algorithm for M2 with integer arithmetic
+
+        // max number of elements that sum will not overflow int64 (2Gi int32 elements)
+        // for uint32:    0 <= sum < 2^63 (int64 >= 0)
+        // for int32: -2^62 <= sum < 2^62
+        constexpr int64_t kMaxChunkLength = 1ULL << (63 - sizeof(CType) * 8);
+        int64_t start_index = 0;
+
+        ArraySpan slice = array;
+        while (valid_count > 0) {
+          // process in chunks that overflow will never happen
+          slice.SetSlice(start_index + array.offset,
+                         std::min(kMaxChunkLength, array.length - start_index));
+          const int64_t count = slice.length - slice.GetNullCount();
+          start_index += slice.length;
+          valid_count -= count;
+
+          if (count > 0) {
+            IntegerVarStd var_std;
+            const CType* values = slice.GetValues<CType>(1);
+            VisitSetBitRunsVoid(slice.buffers[0].data, slice.offset, slice.length,
+                                [&](int64_t pos, int64_t len) {
+                                  for (int64_t i = 0; i < len; ++i) {
+                                    const auto value = values[pos + i];
+                                    var_std.ConsumeOne(value);
+                                  }
+                                });
+
+            // merge variance
+            auto slice_moments = Moments(var_std.count, var_std.mean(), var_std.m2());
+            this->moments.MergeFrom(level, slice_moments);
+          }
+        }
+        return;
+      }
+    }
+
+    // float/double/int64/decimal: calculate each moment in a separate pass.
+    // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
     SumType sum = internal::SumArray<CType, SumType, SimdLevel::NONE>(array);
 
-    const double mean = ToDouble(sum) / count;
+    const double mean = ToDouble(sum) / valid_count;
     const double m2 = internal::SumArray<CType, double, SimdLevel::NONE>(
         array, [this, mean](CType value) {
           const double v = ToDouble(value);
           return (v - mean) * (v - mean);
         });
-
-    ThisType state(decimal_scale, options);
-    state.count = count;
-    state.mean = mean;
-    state.m2 = m2;
-    this->MergeFrom(state);
-  }
-
-  // int32/16/8: textbook one pass algorithm with integer arithmetic
-  template <typename T = ArrowType>
-  enable_if_t<is_integer_type<T>::value && (sizeof(CType) <= 4)> Consume(
-      const ArraySpan& array) {
-    // max number of elements that sum will not overflow int64 (2Gi int32 elements)
-    // for uint32:    0 <= sum < 2^63 (int64 >= 0)
-    // for int32: -2^62 <= sum < 2^62
-    constexpr int64_t max_length = 1ULL << (63 - sizeof(CType) * 8);
-
-    this->all_valid = array.GetNullCount() == 0;
-    if (!this->all_valid && !options.skip_nulls) return;
-    int64_t start_index = 0;
-    int64_t valid_count = array.length - array.GetNullCount();
-
-    ArraySpan slice = array;
-    while (valid_count > 0) {
-      // process in chunks that overflow will never happen
-      slice.SetSlice(start_index + array.offset,
-                     std::min(max_length, array.length - start_index));
-      const int64_t count = slice.length - slice.GetNullCount();
-      start_index += slice.length;
-      valid_count -= count;
-
-      if (count > 0) {
-        IntegerVarStd<ArrowType> var_std;
-        const CType* values = slice.GetValues<CType>(1);
-        VisitSetBitRunsVoid(slice.buffers[0].data, slice.offset, slice.length,
-                            [&](int64_t pos, int64_t len) {
-                              for (int64_t i = 0; i < len; ++i) {
-                                const auto value = values[pos + i];
-                                var_std.ConsumeOne(value);
-                              }
-                            });
-
-        // merge variance
-        ThisType state(decimal_scale, options);
-        state.count = var_std.count;
-        state.mean = var_std.mean();
-        state.m2 = var_std.m2();
-        this->MergeFrom(state);
+    double m3 = 0, m4 = 0;
+    if (level >= 3) {
+      m3 = internal::SumArray<CType, double, SimdLevel::NONE>(
+          array, [this, mean](CType value) {
+            const double v = ToDouble(value);
+            return (v - mean) * (v - mean) * (v - mean);
+          });
+      if (level >= 4) {
+        m4 = internal::SumArray<CType, double, SimdLevel::NONE>(
+            array, [this, mean](CType value) {
+              const double v = ToDouble(value);
+              return (v - mean) * (v - mean) * (v - mean) * (v - mean);
+            });
       }
     }
+    this->moments.MergeFrom(level, Moments(valid_count, mean, m2, m3, m4));
   }
 
-  // Scalar: textbook algorithm
   void Consume(const Scalar& scalar, const int64_t count) {
-    this->m2 = 0;
     if (scalar.is_valid) {
-      this->count = count;
-      this->mean = ToDouble(UnboxScalar<ArrowType>::Unbox(scalar));
+      double value = ToDouble(UnboxScalar<ArrowType>::Unbox(scalar));
+      this->moments = Moments::FromScalar(level, value, count);
     } else {
-      this->count = 0;
-      this->mean = 0;
+      this->moments = Moments();
       this->all_valid = false;
     }
   }
@@ -142,35 +147,38 @@ struct VarStdState {
   // https://www.emathzone.com/tutorials/basic-statistics/combined-variance.html
   void MergeFrom(const ThisType& state) {
     this->all_valid = this->all_valid && state.all_valid;
-    if (state.count == 0) {
-      return;
-    }
-    if (this->count == 0) {
-      this->count = state.count;
-      this->mean = state.mean;
-      this->m2 = state.m2;
-      return;
-    }
-    MergeVarStd(this->count, this->mean, state.count, state.mean, state.m2, &this->count,
-                &this->mean, &this->m2);
+    this->moments.MergeFrom(level, state.moments);
   }
 
+  const int level;
   const int32_t decimal_scale;
-  const VarianceOptions options;
-  int64_t count = 0;
-  double mean = 0;
-  double m2 = 0;  // m2 = count*s2 = sum((X-mean)^2)
+  const bool skip_nulls;
+  Moments moments;
   bool all_valid = true;
 };
 
 template <typename ArrowType>
-struct VarStdImpl : public ScalarAggregator {
-  using ThisType = VarStdImpl<ArrowType>;
+struct StatisticImpl : public ScalarAggregator {
+  using ThisType = StatisticImpl<ArrowType>;
   using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
 
-  explicit VarStdImpl(int32_t decimal_scale, const std::shared_ptr<DataType>& out_type,
-                      const VarianceOptions& options, VarOrStd return_type)
-      : out_type(out_type), state(decimal_scale, options), return_type(return_type) {}
+  StatisticImpl(StatisticType stat_type, int32_t decimal_scale,
+                const std::shared_ptr<DataType>& out_type, const VarianceOptions& options)
+      : out_type(out_type),
+        stat_type(stat_type),
+        skip_nulls(options.skip_nulls),
+        min_count(options.min_count),
+        ddof(options.ddof),
+        state(moments_level_for_statistic(stat_type), decimal_scale, skip_nulls) {}
+
+  StatisticImpl(StatisticType stat_type, int32_t decimal_scale,
+                const std::shared_ptr<DataType>& out_type, const SkewOptions& options)
+      : out_type(out_type),
+        stat_type(stat_type),
+        skip_nulls(options.skip_nulls),
+        min_count(options.min_count),
+        ddof(0),
+        state(moments_level_for_statistic(stat_type), decimal_scale, skip_nulls) {}
 
   Status Consume(KernelContext*, const ExecSpan& batch) override {
     if (batch[0].is_array()) {
@@ -188,92 +196,90 @@ struct VarStdImpl : public ScalarAggregator {
   }
 
   Status Finalize(KernelContext*, Datum* out) override {
-    if (state.count <= state.options.ddof || state.count < state.options.min_count ||
-        (!state.all_valid && !state.options.skip_nulls)) {
+    if (state.count() <= ddof || state.count() < min_count ||
+        (!state.all_valid && !skip_nulls)) {
       out->value = std::make_shared<DoubleScalar>();
     } else {
-      double var = state.m2 / (state.count - state.options.ddof);
-      out->value =
-          std::make_shared<DoubleScalar>(return_type == VarOrStd::Var ? var : sqrt(var));
+      switch (stat_type) {
+        case StatisticType::Std:
+          out->value = std::make_shared<DoubleScalar>(state.moments.Stddev(ddof));
+          break;
+        case StatisticType::Var:
+          out->value = std::make_shared<DoubleScalar>(state.moments.Variance(ddof));
+          break;
+        case StatisticType::Skew:
+          out->value = std::make_shared<DoubleScalar>(state.moments.Skew());
+          break;
+        case StatisticType::Kurtosis:
+          out->value = std::make_shared<DoubleScalar>(state.moments.Kurtosis());
+          break;
+        default:
+          return Status::NotImplemented("Unsupported statistic type ",
+                                        static_cast<int>(stat_type));
+      }
     }
     return Status::OK();
   }
 
   std::shared_ptr<DataType> out_type;
-  VarStdState<ArrowType> state;
-  VarOrStd return_type;
-};
-
-struct VarStdInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  const std::shared_ptr<DataType>& out_type;
-  const VarianceOptions& options;
-  VarOrStd return_type;
-
-  VarStdInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type,
-                  const VarianceOptions& options, VarOrStd return_type)
-      : ctx(ctx),
-        in_type(in_type),
-        out_type(out_type),
-        options(options),
-        return_type(return_type) {}
-
-  Status Visit(const DataType&) {
-    return Status::NotImplemented("No variance/stddev implemented");
-  }
-
-  Status Visit(const HalfFloatType&) {
-    return Status::NotImplemented("No variance/stddev implemented");
-  }
-
-  template <typename Type>
-  enable_if_number<Type, Status> Visit(const Type&) {
-    state.reset(
-        new VarStdImpl<Type>(/*decimal_scale=*/0, out_type, options, return_type));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_decimal<Type, Status> Visit(const Type&) {
-    state.reset(new VarStdImpl<Type>(checked_cast<const DecimalType&>(in_type).scale(),
-                                     out_type, options, return_type));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
+  StatisticType stat_type;
+  bool skip_nulls;
+  uint32_t min_count;
+  int ddof = 0;
+  MomentsState<ArrowType> state;
 };
 
-Result<std::unique_ptr<KernelState>> StddevInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
-  VarStdInitState visitor(
-      ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const VarianceOptions&>(*args.options), VarOrStd::Std);
-  return visitor.Create();
+template <typename OptionsType>
+Result<std::unique_ptr<KernelState>> StatisticInit(
+    StatisticType stat_type, const DataType& in_type,
+    const std::shared_ptr<DataType>& out_type, const OptionsType& options) {
+  auto make_kernel_state = [&](auto&& type, int32_t decimal_scale = 0) {
+    using Type = std::decay_t<decltype(type)>;
+    return std::unique_ptr<KernelState>(
+        new StatisticImpl<Type>(stat_type, decimal_scale, out_type, options));
+  };
+
+  auto visit = [&](auto&& type) -> Result<std::unique_ptr<KernelState>> {
+    using Type = std::decay_t<decltype(type)>;
+    // Decimals
+    if constexpr (is_decimal_type<Type>::value) {
+      return make_kernel_state(type, type.scale());
+    }
+    // Numbers (except half-float)
+    if constexpr (is_number_type<Type>::value && !is_half_float_type<Type>::value) {
+      return make_kernel_state(type);
+    }
+    return Status::NotImplemented("No variance/stddev implemented for ",
+                                  in_type.ToString());
+  };
+  return VisitType(in_type, visit);
 }
 
-Result<std::unique_ptr<KernelState>> VarianceInit(KernelContext* ctx,
-                                                  const KernelInitArgs& args) {
-  VarStdInitState visitor(
-      ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
-      static_cast<const VarianceOptions&>(*args.options), VarOrStd::Var);
-  return visitor.Create();
+template <typename OptionsType, StatisticType kStatType>
+Result<std::unique_ptr<KernelState>> StatisticInit(KernelContext* ctx,
+                                                   const KernelInitArgs& args) {
+  const DataType& in_type = *args.inputs[0].type;
+  const std::shared_ptr<DataType>& out_type = args.kernel->signature->out_type().type();
+  const OptionsType& options = checked_cast<const OptionsType&>(*args.options);
+
+  return StatisticInit(kStatType, in_type, out_type, options);
 }
 
-void AddVarStdKernels(KernelInit init,
-                      const std::vector<std::shared_ptr<DataType>>& types,
-                      ScalarAggregateFunction* func) {
+void AddStatisticKernels(KernelInit init,
+                         const std::vector<std::shared_ptr<DataType>>& types,
+                         ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
     auto sig = KernelSignature::Make({InputType(ty->id())}, float64());
     AddAggKernel(std::move(sig), init, func);
   }
 }
 
+void AddStatisticKernels(KernelInit init, ScalarAggregateFunction* func) {
+  AddStatisticKernels(init, NumericTypes(), func);
+  AddStatisticKernels(
+      init, {decimal32(1, 1), decimal64(1, 1), decimal128(1, 1), decimal256(1, 1)}, func);
+}
+
 const FunctionDoc stddev_doc{
     "Calculate the standard deviation of a numeric array",
     ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
@@ -292,21 +298,53 @@ const FunctionDoc variance_doc{
     {"array"},
     "VarianceOptions"};
 
+const FunctionDoc skew_doc{
+    "Calculate the skewness of a numeric array",
+    ("Nulls are ignored by default.  If there are not enough non-null values\n"
+     "in the array to satisfy `min_count`, null is returned.\n"
+     "The behavior of nulls and the `min_count` parameter can be changed\n"
+     "in SkewOptions."),
+    {"array"},
+    "SkewOptions"};
+
+const FunctionDoc kurtosis_doc{
+    "Calculate the kurtosis of a numeric array",
+    ("Nulls are ignored by default.  If there are not enough non-null values\n"
+     "in the array to satisfy `min_count`, null is returned.\n"
+     "The behavior of nulls and the `min_count` parameter can be changed\n"
+     "in SkewOptions."),
+    {"array"},
+    "SkewOptions"};
+
 std::shared_ptr<ScalarAggregateFunction> AddStddevAggKernels() {
-  static auto default_std_options = VarianceOptions::Defaults();
+  static const auto default_std_options = VarianceOptions::Defaults();
   auto func = std::make_shared<ScalarAggregateFunction>("stddev", Arity::Unary(),
                                                         stddev_doc, &default_std_options);
-  AddVarStdKernels(StddevInit, NumericTypes(), func.get());
-  AddVarStdKernels(StddevInit, {decimal128(1, 1), decimal256(1, 1)}, func.get());
+  AddStatisticKernels(StatisticInit<VarianceOptions, StatisticType::Std>, func.get());
   return func;
 }
 
 std::shared_ptr<ScalarAggregateFunction> AddVarianceAggKernels() {
-  static auto default_var_options = VarianceOptions::Defaults();
+  static const auto default_var_options = VarianceOptions::Defaults();
   auto func = std::make_shared<ScalarAggregateFunction>(
       "variance", Arity::Unary(), variance_doc, &default_var_options);
-  AddVarStdKernels(VarianceInit, NumericTypes(), func.get());
-  AddVarStdKernels(VarianceInit, {decimal128(1, 1), decimal256(1, 1)}, func.get());
+  AddStatisticKernels(StatisticInit<VarianceOptions, StatisticType::Var>, func.get());
+  return func;
+}
+
+std::shared_ptr<ScalarAggregateFunction> AddSkewAggKernels() {
+  static const auto default_options = SkewOptions::Defaults();
+  auto func = std::make_shared<ScalarAggregateFunction>("skew", Arity::Unary(), skew_doc,
+                                                        &default_options);
+  AddStatisticKernels(StatisticInit<SkewOptions, StatisticType::Skew>, func.get());
+  return func;
+}
+
+std::shared_ptr<ScalarAggregateFunction> AddKurtosisAggKernels() {
+  static const auto default_options = SkewOptions::Defaults();
+  auto func = std::make_shared<ScalarAggregateFunction>("kurtosis", Arity::Unary(),
+                                                        kurtosis_doc, &default_options);
+  AddStatisticKernels(StatisticInit<SkewOptions, StatisticType::Kurtosis>, func.get());
   return func;
 }
 
@@ -315,8 +353,8 @@ std::shared_ptr<ScalarAggregateFunction> AddVarianceAggKernels() {
 void RegisterScalarAggregateVariance(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(AddVarianceAggKernels()));
   DCHECK_OK(registry->AddFunction(AddStddevAggKernels()));
+  DCHECK_OK(registry->AddFunction(AddSkewAggKernels()));
+  DCHECK_OK(registry->AddFunction(AddKurtosisAggKernels()));
 }
 
-}  // namespace internal
-}  // namespace compute
-}  // namespace arrow
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h b/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h
index 675ebfd91d308..f7c35bea96783 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_var_std_internal.h
@@ -18,24 +18,27 @@
 #pragma once
 
 #include "arrow/util/int128_internal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/math_internal.h"
 
-namespace arrow {
-namespace compute {
-namespace internal {
+#include <cmath>
+#include <type_traits>
+
+namespace arrow::compute::internal {
 
 using arrow::internal::int128_t;
 
 // Accumulate sum/squared sum (using naive summation)
 // Shared implementation between scalar/hash aggregate variance/stddev kernels
-template <typename ArrowType>
 struct IntegerVarStd {
-  using c_type = typename ArrowType::c_type;
-
   int64_t count = 0;
   int64_t sum = 0;
   int128_t square_sum = 0;
 
-  void ConsumeOne(const c_type value) {
+  template <typename Integer>
+  void ConsumeOne(Integer value) {
+    static_assert(std::is_integral_v<Integer>);
+    static_assert(sizeof(Integer) <= 4);
     sum += value;
     square_sum += static_cast<uint64_t>(value) * value;
     count++;
@@ -53,16 +56,87 @@ struct IntegerVarStd {
   }
 };
 
-static inline void MergeVarStd(int64_t count1, double mean1, int64_t count2, double mean2,
-                               double m22, int64_t* out_count, double* out_mean,
-                               double* out_m2) {
-  double mean = (mean1 * count1 + mean2 * count2) / (count1 + count2);
-  *out_m2 += m22 + count1 * (mean1 - mean) * (mean1 - mean) +
-             count2 * (mean2 - mean) * (mean2 - mean);
-  *out_count += count2;
-  *out_mean = mean;
+enum class StatisticType { Var, Std, Skew, Kurtosis };
+
+constexpr int moments_level_for_statistic(StatisticType s) {
+  switch (s) {
+    case StatisticType::Skew:
+      return 3;
+    case StatisticType::Kurtosis:
+      return 4;
+    default:
+      return 2;
+  }
 }
 
-}  // namespace internal
-}  // namespace compute
-}  // namespace arrow
+struct Moments {
+  int64_t count = 0;
+  double mean = 0;
+  double m2 = 0;  // m2 = sum((X-mean)^2)
+  double m3 = 0;  // m3 = sum((X-mean)^3)
+  double m4 = 0;  // m4 = sum((X-mean)^4)
+
+  Moments() = default;
+  Moments(int64_t count, double mean, double m2, double m3 = 0, double m4 = 0)
+      : count(count), mean(mean), m2(m2), m3(m3), m4(m4) {}
+
+  double Variance(int ddof) const { return m2 / (count - ddof); }
+
+  double Stddev(int ddof) const { return sqrt(Variance(ddof)); }
+
+  double Skew() const {
+    // This may return NaN for m2 == 0 and m3 == 0, which is expected
+    return sqrt(count) * m3 / sqrt(m2 * m2 * m2);
+  }
+
+  double Kurtosis() const {
+    // This may return NaN for m2 == 0 and m4 == 0, which is expected
+    return count * m4 / (m2 * m2) - 3;
+  }
+
+  void MergeFrom(int level, const Moments& other) { *this = Merge(level, *this, other); }
+
+  static Moments Merge(int level, const Moments& a, const Moments& b) {
+    using ::arrow::internal::NeumaierSum;
+
+    if (a.count == 0) {
+      return b;
+    } else if (b.count == 0) {
+      return a;
+    }
+
+    // Shorter aliases for readability
+    const int64_t na = a.count, nb = b.count;
+    const int64_t n = na + nb;
+    const double mean = (a.mean * na + b.mean * nb) / n;
+    // NOTE: there is a more common formula:
+    //   double delta = b.mean - a.mean;
+    //   double m2 = a.m2 + b.m2 + delta * delta * na * nb / n;
+    // but it gives worse results in TestVarStdKernelMergeStability.
+    const double m2 = NeumaierSum({a.m2, b.m2, na * (a.mean - mean) * (a.mean - mean),
+                                   nb * (b.mean - mean) * (b.mean - mean)});
+    double m3 = 0;
+    double m4 = 0;
+    // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
+    if (level >= 3) {
+      double delta = b.mean - a.mean;
+      double delta2 = delta * delta;
+      m3 = NeumaierSum({a.m3, b.m3, delta2 * delta * na * nb * (na - nb) / (n * n),
+                        3 * delta * (na * b.m2 - nb * a.m2) / n});
+      if (level >= 4) {
+        m4 = NeumaierSum(
+            {a.m4, b.m4,
+             (delta2 * delta2) * na * nb * (na * na - na * nb + nb * nb) / (n * n * n),
+             6 * delta2 * (na * na * b.m2 + nb * nb * a.m2) / (n * n),
+             4 * delta * (na * b.m3 - nb * a.m3) / n});
+      }
+    }
+    return Moments(n, mean, m2, m3, m4);
+  }
+
+  static Moments FromScalar(int level, double value, int64_t count) {
+    return Moments(count, /*mean=*/value, /*m2=*/0, /*m3=*/0, /*m4=*/0);
+  }
+};
+
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 21b7bd9bf6632..e84c3c2dc164d 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/array/builder_nested.h"
 #include "arrow/array/builder_primitive.h"
+#include "arrow/array/concatenate.h"
 #include "arrow/buffer_builder.h"
 #include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_vector.h"
@@ -33,6 +34,7 @@
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/compute/kernels/pivot_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/compute/row/row_encoder_internal.h"
@@ -40,6 +42,7 @@
 #include "arrow/stl_allocator.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_writer.h"
 #include "arrow/util/checked_cast.h"
@@ -47,6 +50,7 @@
 #include "arrow/util/int128_internal.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/ree_util.h"
+#include "arrow/util/span.h"
 #include "arrow/util/task_group.h"
 #include "arrow/util/tdigest.h"
 #include "arrow/util/thread_pool.h"
@@ -56,6 +60,7 @@ namespace arrow {
 
 using internal::checked_cast;
 using internal::FirstTimeBitmapWriter;
+using util::span;
 
 namespace compute {
 namespace internal {
@@ -129,10 +134,12 @@ HashAggregateKernel MakeUnaryKernel(KernelInit init) {
                     std::move(init));
 }
 
-Status AddHashAggKernels(
-    const std::vector<std::shared_ptr<DataType>>& types,
-    Result<HashAggregateKernel> make_kernel(const std::shared_ptr<DataType>&),
-    HashAggregateFunction* function) {
+using HashAggregateKernelFactory =
+    std::function<Result<HashAggregateKernel>(const std::shared_ptr<DataType>&)>;
+
+Status AddHashAggKernels(const std::vector<std::shared_ptr<DataType>>& types,
+                         HashAggregateKernelFactory make_kernel,
+                         HashAggregateFunction* function) {
   for (const auto& ty : types) {
     ARROW_ASSIGN_OR_RAISE(auto kernel, make_kernel(ty));
     RETURN_NOT_OK(function->AddKernel(std::move(kernel)));
@@ -840,28 +847,58 @@ using GroupedMeanFactory =
 using arrow::internal::int128_t;
 
 template <typename Type>
-struct GroupedVarStdImpl : public GroupedAggregator {
+struct GroupedStatisticImpl : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
+  using SumType = typename internal::GetSumType<Type>::SumType;
 
+  // This method is defined solely to make GroupedStatisticImpl instantiable
+  // in ConsumeImpl below. It will be redefined in subclasses.
   Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
-    options_ = *checked_cast<const VarianceOptions*>(args.options);
-    if (is_decimal_type<Type>::value) {
-      const int32_t scale =
+    return Status::NotImplemented("");
+  }
+
+  // Init helper for hash_variance and hash_stddev
+  Status InitInternal(ExecContext* ctx, const KernelInitArgs& args,
+                      StatisticType stat_type, const VarianceOptions& options) {
+    return InitInternal(ctx, args, stat_type, options.ddof, options.skip_nulls,
+                        options.min_count);
+  }
+
+  // Init helper for hash_skew and hash_kurtosis
+  Status InitInternal(ExecContext* ctx, const KernelInitArgs& args,
+                      StatisticType stat_type, const SkewOptions& options) {
+    return InitInternal(ctx, args, stat_type, /*ddof=*/0, options.skip_nulls,
+                        options.min_count);
+  }
+
+  Status InitInternal(ExecContext* ctx, const KernelInitArgs& args,
+                      StatisticType stat_type, int ddof, bool skip_nulls,
+                      uint32_t min_count) {
+    if constexpr (is_decimal_type<Type>::value) {
+      int32_t decimal_scale =
           checked_cast<const DecimalType&>(*args.inputs[0].type).scale();
-      return InitInternal(ctx, scale, args.options);
+      return InitInternal(ctx, stat_type, decimal_scale, ddof, skip_nulls, min_count);
+    } else {
+      return InitInternal(ctx, stat_type, /*decimal_scale=*/0, ddof, skip_nulls,
+                          min_count);
     }
-    return InitInternal(ctx, 0, args.options);
   }
 
-  Status InitInternal(ExecContext* ctx, int32_t decimal_scale,
-                      const FunctionOptions* options) {
-    options_ = *checked_cast<const VarianceOptions*>(options);
+  Status InitInternal(ExecContext* ctx, StatisticType stat_type, int32_t decimal_scale,
+                      int ddof, bool skip_nulls, uint32_t min_count) {
+    stat_type_ = stat_type;
+    moments_level_ = moments_level_for_statistic(stat_type_);
     decimal_scale_ = decimal_scale;
+    skip_nulls_ = skip_nulls;
+    min_count_ = min_count;
+    ddof_ = ddof;
     ctx_ = ctx;
     pool_ = ctx->memory_pool();
     counts_ = TypedBufferBuilder<int64_t>(pool_);
     means_ = TypedBufferBuilder<double>(pool_);
     m2s_ = TypedBufferBuilder<double>(pool_);
+    m3s_ = TypedBufferBuilder<double>(pool_);
+    m4s_ = TypedBufferBuilder<double>(pool_);
     no_nulls_ = TypedBufferBuilder<bool>(pool_);
     return Status::OK();
   }
@@ -872,6 +909,12 @@ struct GroupedVarStdImpl : public GroupedAggregator {
     RETURN_NOT_OK(counts_.Append(added_groups, 0));
     RETURN_NOT_OK(means_.Append(added_groups, 0));
     RETURN_NOT_OK(m2s_.Append(added_groups, 0));
+    if (moments_level_ >= 3) {
+      RETURN_NOT_OK(m3s_.Append(added_groups, 0));
+      if (moments_level_ >= 4) {
+        RETURN_NOT_OK(m4s_.Append(added_groups, 0));
+      }
+    }
     RETURN_NOT_OK(no_nulls_.Append(added_groups, true));
     return Status::OK();
   }
@@ -889,27 +932,30 @@ struct GroupedVarStdImpl : public GroupedAggregator {
     return value.ToDouble(decimal_scale_);
   }
 
-  Status Consume(const ExecSpan& batch) override { return ConsumeImpl(batch); }
+  Status Consume(const ExecSpan& batch) override {
+    constexpr bool kCanUseIntArithmetic = std::is_integral_v<CType> && sizeof(CType) <= 4;
+
+    if constexpr (kCanUseIntArithmetic) {
+      if (moments_level_ == 2) {
+        return ConsumeIntegral(batch);
+      }
+    }
+    return ConsumeGeneric(batch);
+  }
 
   // float/double/int64/decimal: calculate `m2` (sum((X-mean)^2)) with
-  // `two pass algorithm` (see aggregate_var_std.cc)
-  template <typename T = Type>
-  enable_if_t<is_floating_type<T>::value || (sizeof(CType) > 4) ||
-                  std::is_same_v<CType, Decimal32>,
-              Status>
-  ConsumeImpl(const ExecSpan& batch) {
-    using SumType = typename internal::GetSumType<T>::SumType;
-
-    GroupedVarStdImpl<Type> state;
-    RETURN_NOT_OK(state.InitInternal(ctx_, decimal_scale_, &options_));
+  // two pass algorithm (see aggregate_var_std.cc)
+  Status ConsumeGeneric(const ExecSpan& batch) {
+    GroupedStatisticImpl<Type> state;
+    RETURN_NOT_OK(state.InitInternal(ctx_, stat_type_, decimal_scale_, ddof_, skip_nulls_,
+                                     min_count_));
     RETURN_NOT_OK(state.Resize(num_groups_));
     int64_t* counts = state.counts_.mutable_data();
     double* means = state.means_.mutable_data();
-    double* m2s = state.m2s_.mutable_data();
     uint8_t* no_nulls = state.no_nulls_.mutable_data();
 
-    // XXX this uses naive summation; we should switch to pairwise summation as was
-    // done for the scalar aggregate kernel in ARROW-11567
+    // XXX this uses naive summation; we should switch to pairwise summation
+    // (as the scalar aggregate kernel does) or Kahan summation.
     std::vector<SumType> sums(num_groups_);
     VisitGroupedValues<Type>(
         batch,
@@ -923,27 +969,34 @@ struct GroupedVarStdImpl : public GroupedAggregator {
       means[i] = ToDouble(sums[i]) / counts[i];
     }
 
+    double* m2s = state.m2s_mutable_data();
+    double* m3s = state.m3s_mutable_data();
+    double* m4s = state.m4s_mutable_data();
+    // Having distinct VisitGroupedValuesNonNull calls based on moments_level_
+    // would increase code generation for relatively little benefit.
     VisitGroupedValuesNonNull<Type>(
         batch, [&](uint32_t g, typename TypeTraits<Type>::CType value) {
-          const double v = ToDouble(value);
-          m2s[g] += (v - means[g]) * (v - means[g]);
+          const double d = ToDouble(value) - means[g];
+          const double d2 = d * d;
+          switch (moments_level_) {
+            case 4:
+              m4s[g] += d2 * d2;
+              [[fallthrough]];
+            case 3:
+              m3s[g] += d2 * d;
+              [[fallthrough]];
+            default:
+              m2s[g] += d2;
+              break;
+          }
         });
 
-    ARROW_ASSIGN_OR_RAISE(auto mapping,
-                          AllocateBuffer(num_groups_ * sizeof(uint32_t), pool_));
-    for (uint32_t i = 0; static_cast<int64_t>(i) < num_groups_; i++) {
-      mapping->template mutable_data_as<uint32_t>()[i] = i;
-    }
-    ArrayData group_id_mapping(uint32(), num_groups_, {nullptr, std::move(mapping)},
-                               /*null_count=*/0);
-    return this->Merge(std::move(state), group_id_mapping);
+    return MergeSameGroups(std::move(state));
   }
 
-  // int32/16/8: textbook one pass algorithm with integer arithmetic (see
-  // aggregate_var_std.cc)
-  template <typename T = Type>
-  enable_if_t<is_integer_type<T>::value && (sizeof(CType) <= 4), Status> ConsumeImpl(
-      const ExecSpan& batch) {
+  // int32/16/8: textbook one pass algorithm to compute `m2` with integer arithmetic
+  // (see aggregate_var_std.cc)
+  Status ConsumeIntegral(const ExecSpan& batch) {
     // max number of elements that sum will not overflow int64 (2Gi int32 elements)
     // for uint32:    0 <= sum < 2^63 (int64 >= 0)
     // for int32: -2^62 <= sum < 2^62
@@ -958,15 +1011,7 @@ struct GroupedVarStdImpl : public GroupedAggregator {
       return Status::OK();
     }
 
-    std::vector<IntegerVarStd<Type>> var_std(num_groups_);
-
-    ARROW_ASSIGN_OR_RAISE(auto mapping,
-                          AllocateBuffer(num_groups_ * sizeof(uint32_t), pool_));
-    for (uint32_t i = 0; static_cast<int64_t>(i) < num_groups_; i++) {
-      mapping->template mutable_data_as<uint32_t>()[i] = i;
-    }
-    ArrayData group_id_mapping(uint32(), num_groups_, {nullptr, std::move(mapping)},
-                               /*null_count=*/0);
+    std::vector<IntegerVarStd> var_std(num_groups_);
 
     for (int64_t start_index = 0; start_index < batch.length; start_index += max_length) {
       // process in chunks that overflow will never happen
@@ -974,12 +1019,13 @@ struct GroupedVarStdImpl : public GroupedAggregator {
       // reset state
       var_std.clear();
       var_std.resize(num_groups_);
-      GroupedVarStdImpl<Type> state;
-      RETURN_NOT_OK(state.InitInternal(ctx_, decimal_scale_, &options_));
+      GroupedStatisticImpl<Type> state;
+      RETURN_NOT_OK(state.InitInternal(ctx_, stat_type_, decimal_scale_, ddof_,
+                                       skip_nulls_, min_count_));
       RETURN_NOT_OK(state.Resize(num_groups_));
       int64_t* other_counts = state.counts_.mutable_data();
       double* other_means = state.means_.mutable_data();
-      double* other_m2s = state.m2s_.mutable_data();
+      double* other_m2s = state.m2s_mutable_data();
       uint8_t* other_no_nulls = state.no_nulls_.mutable_data();
 
       if (batch[0].is_array()) {
@@ -1028,34 +1074,63 @@ struct GroupedVarStdImpl : public GroupedAggregator {
         other_means[i] = var_std[i].mean();
         other_m2s[i] = var_std[i].m2();
       }
-      RETURN_NOT_OK(this->Merge(std::move(state), group_id_mapping));
+      RETURN_NOT_OK(MergeSameGroups(std::move(state)));
     }
     return Status::OK();
   }
 
   Status Merge(GroupedAggregator&& raw_other,
                const ArrayData& group_id_mapping) override {
-    // Combine m2 from two chunks (see aggregate_var_std.cc)
-    auto other = checked_cast<GroupedVarStdImpl*>(&raw_other);
+    DCHECK_EQ(group_id_mapping.length,
+              checked_cast<GroupedStatisticImpl*>(&raw_other)->num_groups_);
+    const uint32_t* g = group_id_mapping.GetValues<uint32_t>(1);
+    return MergeInternal(std::move(raw_other),
+                         [g](int64_t other_g) { return g[other_g]; });
+  }
+
+  Status MergeSameGroups(GroupedAggregator&& raw_other) {
+    return MergeInternal(std::move(raw_other), [](int64_t other_g) { return other_g; });
+  }
+
+  template <typename GroupIdMapper>
+  Status MergeInternal(GroupedAggregator&& raw_other, GroupIdMapper&& group_id_mapper) {
+    // Combine moments from two chunks
+    auto other = checked_cast<GroupedStatisticImpl*>(&raw_other);
+    DCHECK_EQ(moments_level_, other->moments_level_);
 
     int64_t* counts = counts_.mutable_data();
     double* means = means_.mutable_data();
-    double* m2s = m2s_.mutable_data();
+    double* m2s = m2s_mutable_data();
+    // Moments above the current level will just be ignored.
+    double* m3s = m3s_mutable_data();
+    double* m4s = m4s_mutable_data();
     uint8_t* no_nulls = no_nulls_.mutable_data();
 
     const int64_t* other_counts = other->counts_.data();
     const double* other_means = other->means_.data();
-    const double* other_m2s = other->m2s_.data();
+    const double* other_m2s = other->m2s_data();
+    const double* other_m3s = other->m3s_data();
+    const double* other_m4s = other->m4s_data();
     const uint8_t* other_no_nulls = other->no_nulls_.data();
 
-    auto g = group_id_mapping.GetValues<uint32_t>(1);
-    for (int64_t other_g = 0; other_g < group_id_mapping.length; ++other_g, ++g) {
+    const int64_t num_other_groups = other->num_groups_;
+
+    for (int64_t other_g = 0; other_g < num_other_groups; ++other_g) {
+      const auto g = group_id_mapper(other_g);
       if (!bit_util::GetBit(other_no_nulls, other_g)) {
-        bit_util::ClearBit(no_nulls, *g);
+        bit_util::ClearBit(no_nulls, g);
       }
       if (other_counts[other_g] == 0) continue;
-      MergeVarStd(counts[*g], means[*g], other_counts[other_g], other_means[other_g],
-                  other_m2s[other_g], &counts[*g], &means[*g], &m2s[*g]);
+      auto moments = Moments::Merge(
+          moments_level_, Moments(counts[g], means[g], m2s[g], m3s[g], m4s[g]),
+          Moments(other_counts[other_g], other_means[other_g], other_m2s[other_g],
+                  other_m3s[other_g], other_m4s[other_g]));
+      counts[g] = moments.count;
+      means[g] = moments.mean;
+      // Fill moments in reverse order, in case m3s or m4s is the same as m2s.
+      m4s[g] = moments.m4;
+      m3s[g] = moments.m3;
+      m2s[g] = moments.m2;
     }
     return Status::OK();
   }
@@ -1068,11 +1143,30 @@ struct GroupedVarStdImpl : public GroupedAggregator {
 
     auto* results = values->mutable_data_as<double>();
     const int64_t* counts = counts_.data();
-    const double* m2s = m2s_.data();
+    const double* means = means_.data();
+    const double* m2s = m2s_data();
+    const double* m3s = m3s_data();
+    const double* m4s = m4s_data();
     for (int64_t i = 0; i < num_groups_; ++i) {
-      if (counts[i] > options_.ddof && counts[i] >= options_.min_count) {
-        const double variance = m2s[i] / (counts[i] - options_.ddof);
-        results[i] = result_type_ == VarOrStd::Var ? variance : std::sqrt(variance);
+      if (counts[i] > ddof_ && counts[i] >= min_count_) {
+        const auto moments = Moments(counts[i], means[i], m2s[i], m3s[i], m4s[i]);
+        switch (stat_type_) {
+          case StatisticType::Var:
+            results[i] = moments.Variance(ddof_);
+            break;
+          case StatisticType::Std:
+            results[i] = moments.Stddev(ddof_);
+            break;
+          case StatisticType::Skew:
+            results[i] = moments.Skew();
+            break;
+          case StatisticType::Kurtosis:
+            results[i] = moments.Kurtosis();
+            break;
+          default:
+            return Status::NotImplemented("Statistic type ",
+                                          static_cast<int>(stat_type_));
+        }
         continue;
       }
 
@@ -1085,7 +1179,7 @@ struct GroupedVarStdImpl : public GroupedAggregator {
       null_count += 1;
       bit_util::SetBitTo(null_bitmap->mutable_data(), i, false);
     }
-    if (!options_.skip_nulls) {
+    if (!skip_nulls_) {
       if (null_bitmap) {
         arrow::internal::BitmapAnd(null_bitmap->data(), 0, no_nulls_.data(), 0,
                                    num_groups_, 0, null_bitmap->mutable_data());
@@ -1101,56 +1195,91 @@ struct GroupedVarStdImpl : public GroupedAggregator {
 
   std::shared_ptr<DataType> out_type() const override { return float64(); }
 
-  VarOrStd result_type_;
+  const double* m2s_data() const { return m2s_.data(); }
+  // If moments_level_ < 3, the values read from m3s_data() will be ignored,
+  // but we still need to point to a valid buffer of the appropriate size.
+  // The trick is to reuse m2s_, which simplifies the code.
+  const double* m3s_data() const {
+    return (moments_level_ >= 3) ? m3s_.data() : m2s_.data();
+  }
+  const double* m4s_data() const {
+    return (moments_level_ >= 4) ? m4s_.data() : m2s_.data();
+  }
+
+  double* m2s_mutable_data() { return m2s_.mutable_data(); }
+  double* m3s_mutable_data() {
+    return (moments_level_ >= 3) ? m3s_.mutable_data() : m2s_.mutable_data();
+  }
+  double* m4s_mutable_data() {
+    return (moments_level_ >= 4) ? m4s_.mutable_data() : m2s_.mutable_data();
+  }
+
+  StatisticType stat_type_;
+  int moments_level_;
   int32_t decimal_scale_;
-  VarianceOptions options_;
+  bool skip_nulls_;
+  uint32_t min_count_;
+  int ddof_;
   int64_t num_groups_ = 0;
   // m2 = count * s2 = sum((X-mean)^2)
   TypedBufferBuilder<int64_t> counts_;
-  TypedBufferBuilder<double> means_, m2s_;
+  TypedBufferBuilder<double> means_, m2s_, m3s_, m4s_;
   TypedBufferBuilder<bool> no_nulls_;
   ExecContext* ctx_;
   MemoryPool* pool_;
 };
 
-template <typename T, VarOrStd result_type>
-Result<std::unique_ptr<KernelState>> VarStdInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
-  auto impl = std::make_unique<GroupedVarStdImpl<T>>();
-  impl->result_type_ = result_type;
-  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args));
-  // R build with openSUSE155 requires an explicit unique_ptr construction
-  return std::unique_ptr<KernelState>(std::move(impl));
-}
-
-template <VarOrStd result_type>
-struct GroupedVarStdFactory {
-  template <typename T, typename Enable = enable_if_t<is_integer_type<T>::value ||
-                                                      is_floating_type<T>::value ||
-                                                      is_decimal_type<T>::value>>
-  Status Visit(const T&) {
-    kernel = MakeKernel(std::move(argument_type), VarStdInit<T, result_type>);
-    return Status::OK();
-  }
+template <typename Type, typename OptionsType, StatisticType kStatType>
+struct ConcreteGroupedStatisticImpl : public GroupedStatisticImpl<Type> {
+  using GroupedStatisticImpl<Type>::InitInternal;
 
-  Status Visit(const HalfFloatType& type) {
-    return Status::NotImplemented("Computing variance/stddev of data of type ", type);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    const auto& options = checked_cast<const OptionsType&>(*args.options);
+    return InitInternal(ctx, args, kStatType, options);
   }
+};
 
-  Status Visit(const DataType& type) {
-    return Status::NotImplemented("Computing variance/stddev of data of type ", type);
-  }
+template <typename Type>
+using GroupedVarianceImpl =
+    ConcreteGroupedStatisticImpl<Type, VarianceOptions, StatisticType::Var>;
+template <typename Type>
+using GroupedStddevImpl =
+    ConcreteGroupedStatisticImpl<Type, VarianceOptions, StatisticType::Std>;
+template <typename Type>
+using GroupedSkewImpl =
+    ConcreteGroupedStatisticImpl<Type, SkewOptions, StatisticType::Skew>;
+template <typename Type>
+using GroupedKurtosisImpl =
+    ConcreteGroupedStatisticImpl<Type, SkewOptions, StatisticType::Kurtosis>;
+
+template <template <typename Type> typename GroupedImpl>
+Result<HashAggregateKernel> MakeGroupedStatisticKernel(
+    const std::shared_ptr<DataType>& type) {
+  auto make_kernel = [&](auto&& type) -> Result<HashAggregateKernel> {
+    using T = std::decay_t<decltype(type)>;
+    // Supporting all number types except float16
+    if constexpr (is_integer_type<T>::value ||
+                  (is_floating_type<T>::value && !is_half_float_type<T>::value) ||
+                  is_decimal_type<T>::value) {
+      return MakeKernel(InputType(T::type_id), HashAggregateInit<GroupedImpl<T>>);
+    }
+    return Status::NotImplemented("Computing higher-order statistic of data of type ",
+                                  type);
+  };
 
-  static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
-    GroupedVarStdFactory factory;
-    factory.argument_type = type->id();
-    RETURN_NOT_OK(VisitTypeInline(*type, &factory));
-    return std::move(factory.kernel);
-  }
+  return VisitType(*type, make_kernel);
+}
 
-  HashAggregateKernel kernel;
-  InputType argument_type;
-};
+Status AddHashAggregateStatisticKernels(HashAggregateFunction* func,
+                                        HashAggregateKernelFactory make_kernel) {
+  RETURN_NOT_OK(AddHashAggKernels(SignedIntTypes(), make_kernel, func));
+  RETURN_NOT_OK(AddHashAggKernels(UnsignedIntTypes(), make_kernel, func));
+  RETURN_NOT_OK(AddHashAggKernels(FloatingPointTypes(), make_kernel, func));
+  RETURN_NOT_OK(AddHashAggKernels(
+      {decimal32(1, 1), decimal64(1, 1), decimal128(1, 1), decimal256(1, 1)}, make_kernel,
+      func));
+  return Status::OK();
+}
 
 // ----------------------------------------------------------------------
 // TDigest implementation
@@ -3319,9 +3448,404 @@ struct GroupedListFactory {
   HashAggregateKernel kernel;
   InputType argument_type;
 };
-}  // namespace
 
-namespace {
+// ----------------------------------------------------------------------
+// Pivot implementation
+
+struct GroupedPivotAccumulator {
+  Status Init(ExecContext* ctx, std::shared_ptr<DataType> value_type,
+              const PivotWiderOptions* options) {
+    ctx_ = ctx;
+    value_type_ = std::move(value_type);
+    num_keys_ = static_cast<int>(options->key_names.size());
+    num_groups_ = 0;
+    columns_.resize(num_keys_);
+    scratch_buffer_ = BufferBuilder(ctx_->memory_pool());
+    return Status::OK();
+  }
+
+  Status Consume(span<const uint32_t> groups, span<const PivotWiderKeyIndex> keys,
+                 const ArraySpan& values) {
+    // To dispatch the values into the right (group, key) coordinates,
+    // we first compute a vector of take indices for each output column.
+    //
+    // For each index #i, we set take_indices[keys[#i]][groups[#i]] = #i.
+    // Unpopulated take_indices entries are null.
+    //
+    // For example, assuming we get:
+    //   groups  |  keys
+    // ===================
+    //    1      |   0
+    //    3      |   1
+    //    1      |   1
+    //    0      |   1
+    //
+    // We are going to compute:
+    // - take_indices[key = 0] = [null, 0, null, null]
+    // - take_indices[key = 1] = [3, 2, null, 1]
+    //
+    // Then each output column is computed by taking the values with the
+    // respective take_indices for the column's keys.
+    //
+
+    DCHECK_EQ(groups.size(), keys.size());
+    DCHECK_EQ(groups.size(), static_cast<size_t>(values.length));
+
+    std::shared_ptr<DataType> take_index_type;
+    std::vector<std::shared_ptr<Buffer>> take_indices(num_keys_);
+    std::vector<std::shared_ptr<Buffer>> take_bitmaps(num_keys_);
+
+    // A generic lambda that computes the take indices with the desired integer width
+    auto compute_take_indices = [&](auto typed_index) {
+      ARROW_UNUSED(typed_index);
+      using TakeIndex = std::decay_t<decltype(typed_index)>;
+      take_index_type = CTypeTraits<TakeIndex>::type_singleton();
+
+      const int64_t take_indices_size =
+          bit_util::RoundUpToMultipleOf64(num_groups_ * sizeof(TakeIndex));
+      const int64_t take_bitmap_size =
+          bit_util::RoundUpToMultipleOf64(bit_util::BytesForBits(num_groups_));
+      const int64_t total_scratch_size =
+          num_keys_ * (take_indices_size + take_bitmap_size);
+      RETURN_NOT_OK(scratch_buffer_.Resize(total_scratch_size, /*shrink_to_fit=*/false));
+
+      // Slice the scratch space into individual buffers for each output column's
+      // take_indices array.
+      std::vector<TakeIndex*> take_indices_data(num_keys_);
+      std::vector<uint8_t*> take_bitmap_data(num_keys_);
+      int64_t offset = 0;
+      for (int i = 0; i < num_keys_; ++i) {
+        take_indices[i] = std::make_shared<MutableBuffer>(
+            scratch_buffer_.mutable_data() + offset, take_indices_size);
+        take_indices_data[i] = take_indices[i]->mutable_data_as<TakeIndex>();
+        offset += take_indices_size;
+        take_bitmaps[i] = std::make_shared<MutableBuffer>(
+            scratch_buffer_.mutable_data() + offset, take_bitmap_size);
+        take_bitmap_data[i] = take_bitmaps[i]->mutable_data();
+        memset(take_bitmap_data[i], 0, take_bitmap_size);
+        offset += take_bitmap_size;
+      }
+      DCHECK_LE(offset, scratch_buffer_.capacity());
+
+      // Populate the take_indices for each output column
+      for (int64_t i = 0; i < values.length; ++i) {
+        const PivotWiderKeyIndex key = keys[i];
+        if (key != kNullPivotKey && !values.IsNull(i)) {
+          DCHECK_LT(static_cast<int>(key), num_keys_);
+          const uint32_t group = groups[i];
+          if (bit_util::GetBit(take_bitmap_data[key], group)) {
+            return DuplicateValue();
+          }
+          // For row #group in column #key, we are going to take the value at index #i
+          bit_util::SetBit(take_bitmap_data[key], group);
+          take_indices_data[key][group] = static_cast<TakeIndex>(i);
+        }
+      }
+      return Status::OK();
+    };
+
+    // Call compute_take_indices with the optimal integer width
+    if (values.length <= static_cast<int64_t>(std::numeric_limits<uint8_t>::max())) {
+      RETURN_NOT_OK(compute_take_indices(uint8_t{}));
+    } else if (values.length <=
+               static_cast<int64_t>(std::numeric_limits<uint16_t>::max())) {
+      RETURN_NOT_OK(compute_take_indices(uint16_t{}));
+    } else if (values.length <=
+               static_cast<int64_t>(std::numeric_limits<uint32_t>::max())) {
+      RETURN_NOT_OK(compute_take_indices(uint32_t{}));
+    } else {
+      RETURN_NOT_OK(compute_take_indices(uint64_t{}));
+    }
+
+    // Use take_indices to compute the output columns for this batch
+    auto values_data = values.ToArrayData();
+    ArrayVector new_columns(num_keys_);
+    TakeOptions take_options(/*boundscheck=*/false);
+    for (int i = 0; i < num_keys_; ++i) {
+      auto indices_data =
+          ArrayData::Make(take_index_type, num_groups_,
+                          {std::move(take_bitmaps[i]), std::move(take_indices[i])});
+      // If indices_data is all nulls, we can just ignore this column.
+      if (indices_data->GetNullCount() != indices_data->length) {
+        ARROW_ASSIGN_OR_RAISE(Datum grouped_column,
+                              Take(values_data, indices_data, take_options, ctx_));
+        new_columns[i] = grouped_column.make_array();
+      }
+    }
+    // Merge them with the previous columns
+    return MergeColumns(std::move(new_columns));
+  }
+
+  Status Consume(span<const uint32_t> groups, const PivotWiderKeyIndex key,
+                 const ArraySpan& values) {
+    if (key == kNullPivotKey) {
+      // Nothing to update
+      return Status::OK();
+    }
+    DCHECK_LT(static_cast<int>(key), num_keys_);
+    DCHECK_EQ(groups.size(), static_cast<size_t>(values.length));
+
+    // The algorithm is simpler than in the array-taking version of Consume()
+    // below, since only the column #key needs to be updated.
+    std::shared_ptr<DataType> take_index_type;
+    std::shared_ptr<Buffer> take_indices;
+    std::shared_ptr<Buffer> take_bitmap;
+
+    // A generic lambda that computes the take indices with the desired integer width
+    auto compute_take_indices = [&](auto typed_index) {
+      ARROW_UNUSED(typed_index);
+      using TakeIndex = std::decay_t<decltype(typed_index)>;
+      take_index_type = CTypeTraits<TakeIndex>::type_singleton();
+
+      const int64_t take_indices_size =
+          bit_util::RoundUpToMultipleOf64(num_groups_ * sizeof(TakeIndex));
+      const int64_t take_bitmap_size =
+          bit_util::RoundUpToMultipleOf64(bit_util::BytesForBits(num_groups_));
+      const int64_t total_scratch_size = take_indices_size + take_bitmap_size;
+      RETURN_NOT_OK(scratch_buffer_.Resize(total_scratch_size, /*shrink_to_fit=*/false));
+
+      take_indices = std::make_shared<MutableBuffer>(scratch_buffer_.mutable_data(),
+                                                     take_indices_size);
+      take_bitmap = std::make_shared<MutableBuffer>(
+          scratch_buffer_.mutable_data() + take_indices_size, take_bitmap_size);
+      auto take_indices_data = take_indices->mutable_data_as<TakeIndex>();
+      auto take_bitmap_data = take_bitmap->mutable_data();
+      memset(take_bitmap_data, 0, take_bitmap_size);
+
+      for (int64_t i = 0; i < values.length; ++i) {
+        const uint32_t group = groups[i];
+        if (!values.IsNull(i)) {
+          if (bit_util::GetBit(take_bitmap_data, group)) {
+            return DuplicateValue();
+          }
+          bit_util::SetBit(take_bitmap_data, group);
+          take_indices_data[group] = static_cast<TakeIndex>(i);
+        }
+      }
+      return Status::OK();
+    };
+
+    // Call compute_take_indices with the optimal integer width
+    if (values.length <= static_cast<int64_t>(std::numeric_limits<uint8_t>::max())) {
+      RETURN_NOT_OK(compute_take_indices(uint8_t{}));
+    } else if (values.length <=
+               static_cast<int64_t>(std::numeric_limits<uint16_t>::max())) {
+      RETURN_NOT_OK(compute_take_indices(uint16_t{}));
+    } else if (values.length <=
+               static_cast<int64_t>(std::numeric_limits<uint32_t>::max())) {
+      RETURN_NOT_OK(compute_take_indices(uint32_t{}));
+    } else {
+      RETURN_NOT_OK(compute_take_indices(uint64_t{}));
+    }
+
+    // Use take_indices to update column #key
+    auto values_data = values.ToArrayData();
+    auto indices_data = ArrayData::Make(
+        take_index_type, num_groups_, {std::move(take_bitmap), std::move(take_indices)});
+    TakeOptions take_options(/*boundscheck=*/false);
+    ARROW_ASSIGN_OR_RAISE(Datum grouped_column,
+                          Take(values_data, indices_data, take_options, ctx_));
+    return MergeColumn(&columns_[key], grouped_column.make_array());
+  }
+
+  Status Resize(int64_t new_num_groups) {
+    if (new_num_groups > std::numeric_limits<int32_t>::max()) {
+      return Status::NotImplemented("Pivot with more 2**31 groups");
+    }
+    return ResizeColumns(new_num_groups);
+  }
+
+  Status Merge(GroupedPivotAccumulator&& other, const ArrayData& group_id_mapping) {
+    // To merge `other` into `*this`, we simply merge their respective columns.
+    // However, we must first transpose `other`'s rows using `group_id_mapping`.
+    // This is a logical "scatter" operation.
+    //
+    // Since `scatter(indices)` is implemented as `take(inverse_permutation(indices))`,
+    // we can save time by computing `inverse_permutation(indices)` once for all
+    // columns.
+
+    // Scatter/InversePermutation only accept signed indices. We checked
+    // in Resize() above that we were inside the limites for int32.
+    auto scatter_indices = group_id_mapping.Copy();
+    scatter_indices->type = int32();
+    std::shared_ptr<DataType> take_indices_type;
+    if (num_groups_ - 1 <= std::numeric_limits<int8_t>::max()) {
+      take_indices_type = int8();
+    } else if (num_groups_ - 1 <= std::numeric_limits<int16_t>::max()) {
+      take_indices_type = int16();
+    } else {
+      DCHECK_GE(num_groups_ - 1, std::numeric_limits<int32_t>::max());
+      take_indices_type = int32();
+    }
+    InversePermutationOptions options(/*max_index=*/num_groups_ - 1, take_indices_type);
+    ARROW_ASSIGN_OR_RAISE(auto take_indices,
+                          InversePermutation(scatter_indices, options, ctx_));
+    auto scatter_column =
+        [&](const std::shared_ptr<Array>& column) -> Result<std::shared_ptr<Array>> {
+      TakeOptions take_options(/*boundscheck=*/false);
+      ARROW_ASSIGN_OR_RAISE(auto scattered,
+                            Take(column, take_indices, take_options, ctx_));
+      return scattered.make_array();
+    };
+    return MergeColumns(std::move(other.columns_), std::move(scatter_column));
+  }
+
+  Result<ArrayVector> Finalize() {
+    // Ensure that columns are allocated even if num_groups_ == 0
+    RETURN_NOT_OK(ResizeColumns(num_groups_));
+    return std::move(columns_);
+  }
+
+ protected:
+  Status ResizeColumns(int64_t new_num_groups) {
+    if (new_num_groups == num_groups_ && num_groups_ != 0) {
+      return Status::OK();
+    }
+    ARROW_ASSIGN_OR_RAISE(
+        auto array_suffix,
+        MakeArrayOfNull(value_type_, new_num_groups - num_groups_, ctx_->memory_pool()));
+    for (auto& column : columns_) {
+      if (num_groups_ != 0) {
+        DCHECK_NE(column, nullptr);
+        ARROW_ASSIGN_OR_RAISE(
+            column, Concatenate({std::move(column), array_suffix}, ctx_->memory_pool()));
+      } else {
+        column = array_suffix;
+      }
+      DCHECK_EQ(column->length(), new_num_groups);
+    }
+    num_groups_ = new_num_groups;
+    return Status::OK();
+  }
+
+  using ColumnTransform =
+      std::function<Result<std::shared_ptr<Array>>(const std::shared_ptr<Array>&)>;
+
+  Status MergeColumns(ArrayVector&& other_columns,
+                      const ColumnTransform& transform = {}) {
+    DCHECK_EQ(columns_.size(), other_columns.size());
+    for (int i = 0; i < num_keys_; ++i) {
+      if (other_columns[i]) {
+        RETURN_NOT_OK(MergeColumn(&columns_[i], std::move(other_columns[i]), transform));
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MergeColumn(std::shared_ptr<Array>* column, std::shared_ptr<Array> other_column,
+                     const ColumnTransform& transform = {}) {
+    if (other_column->null_count() == other_column->length()) {
+      // Avoid paying for the transform step below, since merging will be a no-op anyway.
+      return Status::OK();
+    }
+    if (transform) {
+      ARROW_ASSIGN_OR_RAISE(other_column, transform(other_column));
+    }
+    DCHECK_EQ(num_groups_, other_column->length());
+    if (!*column) {
+      *column = other_column;
+      return Status::OK();
+    }
+    if ((*column)->null_count() == (*column)->length()) {
+      *column = other_column;
+      return Status::OK();
+    }
+    int64_t expected_non_nulls = (num_groups_ - (*column)->null_count()) +
+                                 (num_groups_ - other_column->null_count());
+    ARROW_ASSIGN_OR_RAISE(auto coalesced,
+                          CallFunction("coalesce", {*column, other_column}, ctx_));
+    // Check that all non-null values in other_column and column were kept in the result.
+    if (expected_non_nulls != num_groups_ - coalesced.null_count()) {
+      DCHECK_GT(expected_non_nulls, num_groups_ - coalesced.null_count());
+      return DuplicateValue();
+    }
+    *column = coalesced.make_array();
+    return Status::OK();
+  }
+
+  Status DuplicateValue() {
+    return Status::Invalid(
+        "Encountered more than one non-null value for the same grouped pivot key");
+  }
+
+  ExecContext* ctx_;
+  std::shared_ptr<DataType> value_type_;
+  int num_keys_;
+  int64_t num_groups_;
+  ArrayVector columns_;
+  // A persistent scratch buffer to store the take indices in Consume
+  BufferBuilder scratch_buffer_;
+};
+
+struct GroupedPivotImpl : public GroupedAggregator {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    DCHECK_EQ(args.inputs.size(), 3);
+    key_type_ = args.inputs[0].GetSharedPtr();
+    options_ = checked_cast<const PivotWiderOptions*>(args.options);
+    DCHECK_NE(options_, nullptr);
+    auto value_type = args.inputs[1].GetSharedPtr();
+    FieldVector fields;
+    fields.reserve(options_->key_names.size());
+    for (const auto& key_name : options_->key_names) {
+      fields.push_back(field(key_name, value_type));
+    }
+    out_type_ = struct_(std::move(fields));
+    out_struct_type_ = checked_cast<const StructType*>(out_type_.get());
+    ARROW_ASSIGN_OR_RAISE(key_mapper_, PivotWiderKeyMapper::Make(*key_type_, options_));
+    RETURN_NOT_OK(accumulator_.Init(ctx, value_type, options_));
+    return Status::OK();
+  }
+
+  Status Resize(int64_t new_num_groups) override {
+    num_groups_ = new_num_groups;
+    return accumulator_.Resize(new_num_groups);
+  }
+
+  Status Merge(GroupedAggregator&& raw_other,
+               const ArrayData& group_id_mapping) override {
+    auto other = checked_cast<GroupedPivotImpl*>(&raw_other);
+    return accumulator_.Merge(std::move(other->accumulator_), group_id_mapping);
+  }
+
+  Status Consume(const ExecSpan& batch) override {
+    DCHECK_EQ(batch.values.size(), 3);
+    auto groups = batch[2].array.GetSpan<const uint32_t>(1, batch.length);
+    if (!batch[1].is_array()) {
+      return Status::NotImplemented("Consuming scalar pivot value");
+    }
+    if (batch[0].is_array()) {
+      ARROW_ASSIGN_OR_RAISE(span<const PivotWiderKeyIndex> keys,
+                            key_mapper_->MapKeys(batch[0].array));
+      return accumulator_.Consume(groups, keys, batch[1].array);
+    } else {
+      ARROW_ASSIGN_OR_RAISE(PivotWiderKeyIndex key,
+                            key_mapper_->MapKey(*batch[0].scalar));
+      return accumulator_.Consume(groups, key, batch[1].array);
+    }
+  }
+
+  Result<Datum> Finalize() override {
+    ARROW_ASSIGN_OR_RAISE(auto columns, accumulator_.Finalize());
+    DCHECK_EQ(columns.size(), static_cast<size_t>(out_struct_type_->num_fields()));
+    return std::make_shared<StructArray>(out_type_, num_groups_, std::move(columns),
+                                         /*null_bitmap=*/nullptr,
+                                         /*null_count=*/0);
+  }
+
+  std::shared_ptr<DataType> out_type() const override { return out_type_; }
+
+  std::shared_ptr<DataType> key_type_;
+  std::shared_ptr<DataType> out_type_;
+  const StructType* out_struct_type_;
+  const PivotWiderOptions* options_;
+  std::unique_ptr<PivotWiderKeyMapper> key_mapper_;
+  GroupedPivotAccumulator accumulator_;
+  int64_t num_groups_ = 0;
+};
+
+// ----------------------------------------------------------------------
+// Docstrings
+
 const FunctionDoc hash_count_doc{
     "Count the number of null / non-null values in each group",
     ("By default, only non-null values are counted.\n"
@@ -3349,8 +3873,8 @@ const FunctionDoc hash_product_doc{
 const FunctionDoc hash_mean_doc{
     "Compute the mean of values in each group",
     ("Null values are ignored.\n"
-     "For integers and floats, NaN is returned if min_count = 0 and\n"
-     "there are no values. For decimals, null is returned instead."),
+     "For integers and floats, NaN is emitted if min_count = 0 and\n"
+     "there are no values in a group. For decimals, null is emitted instead."),
     {"array", "group_id_array"},
     "ScalarAggregateOptions"};
 
@@ -3358,22 +3882,38 @@ const FunctionDoc hash_stddev_doc{
     "Compute the standard deviation of values in each group",
     ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
      "By default (`ddof` = 0), the population standard deviation is calculated.\n"
-     "Nulls are ignored.  If there are not enough non-null values in the array\n"
-     "to satisfy `ddof`, null is returned."),
+     "Nulls are ignored.  If there are not enough non-null values in a group\n"
+     "to satisfy `ddof`, null is emitted."),
     {"array", "group_id_array"}};
 
 const FunctionDoc hash_variance_doc{
     "Compute the variance of values in each group",
     ("The number of degrees of freedom can be controlled using VarianceOptions.\n"
      "By default (`ddof` = 0), the population variance is calculated.\n"
-     "Nulls are ignored.  If there are not enough non-null values in the array\n"
-     "to satisfy `ddof`, null is returned."),
+     "Nulls are ignored.  If there are not enough non-null values in a group\n"
+     "to satisfy `ddof`, null is emitted."),
+    {"array", "group_id_array"}};
+
+const FunctionDoc hash_skew_doc{
+    "Compute the skewness of values in each group",
+    ("Nulls are ignored by default.  If there are not enough non-null values\n"
+     "in a group to satisfy `min_count`, null is emitted.\n"
+     "The behavior of nulls and the `min_count` parameter can be changed\n"
+     "in SkewOptions."),
+    {"array", "group_id_array"}};
+
+const FunctionDoc hash_kurtosis_doc{
+    "Compute the kurtosis of values in each group",
+    ("Nulls are ignored by default.  If there are not enough non-null values\n"
+     "in a group to satisfy `min_count`, null is emitted.\n"
+     "The behavior of nulls and the `min_count` parameter can be changed\n"
+     "in SkewOptions."),
     {"array", "group_id_array"}};
 
 const FunctionDoc hash_tdigest_doc{
     "Compute approximate quantiles of values in each group",
     ("The T-Digest algorithm is used for a fast approximation.\n"
-     "By default, the 0.5 quantile (i.e. median) is returned.\n"
+     "By default, the 0.5 quantile (i.e. median) is emitted.\n"
      "Nulls and NaNs are ignored.\n"
      "Nulls are returned if there are no valid data points."),
     {"array", "group_id_array"},
@@ -3456,13 +3996,28 @@ const FunctionDoc hash_one_doc{"Get one value from each group",
 const FunctionDoc hash_list_doc{"List all values in each group",
                                 ("Null values are also returned."),
                                 {"array", "group_id_array"}};
+
+const FunctionDoc hash_pivot_doc{
+    "Pivot values according to a pivot key column",
+    ("Output is a struct array with as many fields as `PivotWiderOptions.key_names`.\n"
+     "All output struct fields have the same type as `pivot_values`.\n"
+     "Each pivot key decides in which output field the corresponding pivot value\n"
+     "is emitted. If a pivot key doesn't appear in a given group, null is emitted.\n"
+     "If more than one non-null value is encountered in the same group for a\n"
+     "given pivot key, Invalid is raised.\n"
+     "Behavior of unexpected pivot keys is controlled by `unexpected_key_behavior`\n"
+     "in PivotWiderOptions."),
+    {"pivot_keys", "pivot_values", "group_id_array"},
+    "PivotWiderOptions"};
+
 }  // namespace
 
 void RegisterHashAggregateBasic(FunctionRegistry* registry) {
-  static auto default_count_options = CountOptions::Defaults();
-  static auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
-  static auto default_tdigest_options = TDigestOptions::Defaults();
-  static auto default_variance_options = VarianceOptions::Defaults();
+  static const auto default_count_options = CountOptions::Defaults();
+  static const auto default_scalar_aggregate_options = ScalarAggregateOptions::Defaults();
+  static const auto default_tdigest_options = TDigestOptions::Defaults();
+  static const auto default_variance_options = VarianceOptions::Defaults();
+  static const auto default_skew_options = SkewOptions::Defaults();
 
   {
     auto func = std::make_shared<HashAggregateFunction>(
@@ -3534,28 +4089,32 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
   {
     auto func = std::make_shared<HashAggregateFunction>(
         "hash_stddev", Arity::Binary(), hash_stddev_doc, &default_variance_options);
-    DCHECK_OK(AddHashAggKernels(SignedIntTypes(),
-                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
-    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(),
-                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
-    DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
-                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
-    DCHECK_OK(AddHashAggKernels({decimal128(1, 1), decimal256(1, 1)},
-                                GroupedVarStdFactory<VarOrStd::Std>::Make, func.get()));
+    DCHECK_OK(AddHashAggregateStatisticKernels(
+        func.get(), MakeGroupedStatisticKernel<GroupedStddevImpl>));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
   {
     auto func = std::make_shared<HashAggregateFunction>(
         "hash_variance", Arity::Binary(), hash_variance_doc, &default_variance_options);
-    DCHECK_OK(AddHashAggKernels(SignedIntTypes(),
-                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
-    DCHECK_OK(AddHashAggKernels(UnsignedIntTypes(),
-                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
-    DCHECK_OK(AddHashAggKernels(FloatingPointTypes(),
-                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
-    DCHECK_OK(AddHashAggKernels({decimal128(1, 1), decimal256(1, 1)},
-                                GroupedVarStdFactory<VarOrStd::Var>::Make, func.get()));
+    DCHECK_OK(AddHashAggregateStatisticKernels(
+        func.get(), MakeGroupedStatisticKernel<GroupedVarianceImpl>));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_skew", Arity::Binary(), hash_skew_doc, &default_skew_options);
+    DCHECK_OK(AddHashAggregateStatisticKernels(
+        func.get(), MakeGroupedStatisticKernel<GroupedSkewImpl>));
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>(
+        "hash_kurtosis", Arity::Binary(), hash_kurtosis_doc, &default_skew_options);
+    DCHECK_OK(AddHashAggregateStatisticKernels(
+        func.get(), MakeGroupedStatisticKernel<GroupedKurtosisImpl>));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
@@ -3705,6 +4264,20 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
                                 GroupedListFactory::Make, func.get()));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
+
+  {
+    auto func = std::make_shared<HashAggregateFunction>("hash_pivot_wider",
+                                                        Arity::Ternary(), hash_pivot_doc);
+    for (auto key_type : BaseBinaryTypes()) {
+      // Anything that scatter() (i.e. take()) accepts can be passed as values
+      auto sig = KernelSignature::Make(
+          {key_type->id(), InputType::Any(), InputType(Type::UINT32)},
+          OutputType(ResolveGroupOutputType));
+      DCHECK_OK(func->AddKernel(
+          MakeKernel(std::move(sig), HashAggregateInit<GroupedPivotImpl>)));
+    }
+    DCHECK_OK(registry->AddFunction(std::move(func)));
+  }
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/pivot_internal.cc b/cpp/src/arrow/compute/kernels/pivot_internal.cc
new file mode 100644
index 0000000000000..7a65ddc212a05
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/pivot_internal.cc
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/kernels/pivot_internal.h"
+
+#include <cstdint>
+
+#include "arrow/compute/exec.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/visit_type_inline.h"
+
+namespace arrow::compute::internal {
+
+using ::arrow::util::span;
+
+struct BasePivotKeyMapper : public PivotWiderKeyMapper {
+  Status Init(const PivotWiderOptions* options) override {
+    if (options->key_names.size() > static_cast<size_t>(kMaxPivotKey) + 1) {
+      return Status::NotImplemented("Pivoting to more than ",
+                                    static_cast<size_t>(kMaxPivotKey) + 1,
+                                    " columns: got ", options->key_names.size());
+    }
+    key_name_map_.reserve(options->key_names.size());
+    PivotWiderKeyIndex index = 0;
+    for (const auto& key_name : options->key_names) {
+      bool inserted =
+          key_name_map_.try_emplace(std::string_view(key_name), index++).second;
+      if (!inserted) {
+        return Status::KeyError("Duplicate key name '", key_name,
+                                "' in PivotWiderOptions");
+      }
+    }
+    unexpected_key_behavior_ = options->unexpected_key_behavior;
+    return Status::OK();
+  }
+
+ protected:
+  Result<PivotWiderKeyIndex> KeyNotFound(std::string_view key_name) {
+    if (unexpected_key_behavior_ == PivotWiderOptions::kIgnore) {
+      return kNullPivotKey;
+    }
+    DCHECK_EQ(unexpected_key_behavior_, PivotWiderOptions::kRaise);
+    return Status::KeyError("Unexpected pivot key: ", key_name);
+  }
+
+  Result<PivotWiderKeyIndex> LookupKey(std::string_view key_name) {
+    const auto it = this->key_name_map_.find(key_name);
+    if (ARROW_PREDICT_FALSE(it == this->key_name_map_.end())) {
+      return KeyNotFound(key_name);
+    } else {
+      return it->second;
+    }
+  }
+
+  Status NullKeyName() { return Status::KeyError("pivot key name cannot be null"); }
+
+  // The strings backing the string_views should be kept alive by PivotWiderOptions.
+  std::unordered_map<std::string_view, PivotWiderKeyIndex> key_name_map_;
+  PivotWiderOptions::UnexpectedKeyBehavior unexpected_key_behavior_;
+  TypedBufferBuilder<PivotWiderKeyIndex> key_indices_buffer_;
+};
+
+template <typename KeyType>
+struct TypedPivotKeyMapper : public BasePivotKeyMapper {
+  Result<span<const PivotWiderKeyIndex>> MapKeys(const ArraySpan& array) override {
+    // XXX Should use a faster hashing facility than unordered_map, for example
+    // Grouper or SwissTable.
+    RETURN_NOT_OK(this->key_indices_buffer_.Reserve(array.length));
+    PivotWiderKeyIndex* key_indices = this->key_indices_buffer_.mutable_data();
+    int64_t i = 0;
+    RETURN_NOT_OK(VisitArrayValuesInline<KeyType>(
+        array,
+        [&](std::string_view key_name) {
+          ARROW_ASSIGN_OR_RAISE(key_indices[i], LookupKey(key_name));
+          ++i;
+          return Status::OK();
+        },
+        [&]() { return NullKeyName(); }));
+    return span(key_indices, array.length);
+  }
+
+  Result<PivotWiderKeyIndex> MapKey(const Scalar& scalar) override {
+    if (!scalar.is_valid) {
+      return NullKeyName();
+    }
+    const auto& binary_scalar = checked_cast<const BaseBinaryScalar&>(scalar);
+    return LookupKey(binary_scalar.view());
+  }
+};
+
+Result<std::unique_ptr<PivotWiderKeyMapper>> PivotWiderKeyMapper::Make(
+    const DataType& key_type, const PivotWiderOptions* options) {
+  std::unique_ptr<PivotWiderKeyMapper> instance;
+
+  auto visit_key_type =
+      [&](auto&& key_type) -> Result<std::unique_ptr<PivotWiderKeyMapper>> {
+    using T = std::decay_t<decltype(key_type)>;
+    // Only binary-like keys are supported for now
+    if constexpr (is_base_binary_type<T>::value) {
+      instance = std::make_unique<TypedPivotKeyMapper<T>>();
+      RETURN_NOT_OK(instance->Init(options));
+      return std::move(instance);
+    }
+    return Status::NotImplemented("Pivot key type: ", key_type);
+  };
+
+  return VisitType(key_type, visit_key_type);
+}
+
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/pivot_internal.h b/cpp/src/arrow/compute/kernels/pivot_internal.h
new file mode 100644
index 0000000000000..faa808b7a2511
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/pivot_internal.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/span.h"
+
+namespace arrow::compute::internal {
+
+using PivotWiderKeyIndex = uint8_t;
+
+constexpr PivotWiderKeyIndex kNullPivotKey =
+    std::numeric_limits<PivotWiderKeyIndex>::max();
+constexpr PivotWiderKeyIndex kMaxPivotKey = kNullPivotKey - 1;
+
+struct PivotWiderKeyMapper {
+  virtual ~PivotWiderKeyMapper() = default;
+
+  virtual Status Init(const PivotWiderOptions* options) = 0;
+  virtual Result<::arrow::util::span<const PivotWiderKeyIndex>> MapKeys(
+      const ArraySpan&) = 0;
+  virtual Result<PivotWiderKeyIndex> MapKey(const Scalar&) = 0;
+
+  static Result<std::unique_ptr<PivotWiderKeyMapper>> Make(
+      const DataType& key_type, const PivotWiderOptions* options);
+};
+
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index 0033d89fc40fc..a50dcdead2c97 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -363,10 +363,6 @@ struct CastStruct {
         const auto& in_field = in_type.field(in_field_index);
         // If there are more in_fields check if they match the out_field.
         if (in_field->name() == out_field->name()) {
-          if (in_field->nullable() && !out_field->nullable()) {
-            return Status::TypeError("cannot cast nullable field to non-nullable field: ",
-                                     in_type.ToString(), " ", out_type.ToString());
-          }
           // Found matching in_field and out_field.
           fields_to_select[out_field_index++] = in_field_index;
           // Using the same in_field for multiple out_fields is not allowed.
@@ -403,17 +399,27 @@ struct CastStruct {
     }
 
     int out_field_index = 0;
-    for (int field_index : fields_to_select) {
-      const auto& target_type = out->type()->field(out_field_index++)->type();
-      if (field_index == kFillNullSentinel) {
-        ARROW_ASSIGN_OR_RAISE(auto nulls,
-                              MakeArrayOfNull(target_type->GetSharedPtr(), batch.length));
+    for (int in_field_index : fields_to_select) {
+      const auto& out_field = out_type.field(out_field_index++);
+      const auto& out_field_type = out_field->type();
+      if (in_field_index == kFillNullSentinel) {
+        ARROW_ASSIGN_OR_RAISE(
+            auto nulls, MakeArrayOfNull(out_field_type->GetSharedPtr(), batch.length,
+                                        ctx->memory_pool()));
         out_array->child_data.push_back(nulls->data());
       } else {
-        const auto& values = (in_array.child_data[field_index].ToArrayData()->Slice(
+        const auto& in_field = in_type.field(in_field_index);
+        const auto& in_values = (in_array.child_data[in_field_index].ToArrayData()->Slice(
             in_array.offset, in_array.length));
-        ARROW_ASSIGN_OR_RAISE(Datum cast_values,
-                              Cast(values, target_type, options, ctx->exec_context()));
+        if (in_field->nullable() && !out_field->nullable() &&
+            in_values->GetNullCount() > 0) {
+          return Status::Invalid(
+              "field '", in_field->name(), "' of type ", in_field->type()->ToString(),
+              " has nulls. Can't cast to non-nullable field '", out_field->name(),
+              "' of type ", out_field_type->ToString());
+        }
+        ARROW_ASSIGN_OR_RAISE(Datum cast_values, Cast(in_values, out_field_type, options,
+                                                      ctx->exec_context()));
         DCHECK(cast_values.is_array());
         out_array->child_data.push_back(cast_values.array());
       }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index d7f73e2bb7bfe..528e6acded618 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -4076,92 +4076,95 @@ TEST(Cast, StructToBiggerNullableStruct) {
 TEST(Cast, StructToDifferentNullabilityStruct) {
   {
     // OK to go from non-nullable to nullable...
-    std::vector<std::shared_ptr<Field>> fields_src_non_nullable = {
+    std::vector<std::shared_ptr<Field>> fields_src = {
         std::make_shared<Field>("a", int8(), false),
         std::make_shared<Field>("b", int8(), false),
         std::make_shared<Field>("c", int8(), false)};
-    std::shared_ptr<Array> a_src_non_nullable, b_src_non_nullable, c_src_non_nullable;
-    a_src_non_nullable = ArrayFromJSON(int8(), "[11, 23, 56]");
-    b_src_non_nullable = ArrayFromJSON(int8(), "[32, 46, 37]");
-    c_src_non_nullable = ArrayFromJSON(int8(), "[95, 11, 44]");
-    ASSERT_OK_AND_ASSIGN(
-        auto src_non_nullable,
-        StructArray::Make({a_src_non_nullable, b_src_non_nullable, c_src_non_nullable},
-                          fields_src_non_nullable));
-
-    std::shared_ptr<Array> a_dest_nullable, b_dest_nullable, c_dest_nullable;
-    a_dest_nullable = ArrayFromJSON(int64(), "[11, 23, 56]");
-    b_dest_nullable = ArrayFromJSON(int64(), "[32, 46, 37]");
-    c_dest_nullable = ArrayFromJSON(int64(), "[95, 11, 44]");
-
-    std::vector<std::shared_ptr<Field>> fields_dest1_nullable = {
+    std::vector<std::shared_ptr<Array>> arrays_src = {
+        ArrayFromJSON(int8(), "[11, 23, 56]"),
+        ArrayFromJSON(int8(), "[32, 46, 37]"),
+        ArrayFromJSON(int8(), "[95, 11, 44]"),
+    };
+    ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make(arrays_src, fields_src));
+
+    std::vector<std::shared_ptr<Field>> fields_dest = {
         std::make_shared<Field>("a", int64(), true),
         std::make_shared<Field>("b", int64(), true),
-        std::make_shared<Field>("c", int64(), true)};
-    ASSERT_OK_AND_ASSIGN(
-        auto dest1_nullable,
-        StructArray::Make({a_dest_nullable, b_dest_nullable, c_dest_nullable},
-                          fields_dest1_nullable));
-    CheckCast(src_non_nullable, dest1_nullable);
-
-    std::vector<std::shared_ptr<Field>> fields_dest2_nullable = {
-        std::make_shared<Field>("a", int64(), true),
-        std::make_shared<Field>("c", int64(), true)};
-    ASSERT_OK_AND_ASSIGN(
-        auto dest2_nullable,
-        StructArray::Make({a_dest_nullable, c_dest_nullable}, fields_dest2_nullable));
-    CheckCast(src_non_nullable, dest2_nullable);
-
-    std::vector<std::shared_ptr<Field>> fields_dest3_nullable = {
-        std::make_shared<Field>("b", int64(), true)};
-    ASSERT_OK_AND_ASSIGN(auto dest3_nullable,
-                         StructArray::Make({b_dest_nullable}, fields_dest3_nullable));
-    CheckCast(src_non_nullable, dest3_nullable);
+        std::make_shared<Field>("c", int64(), true),
+    };
+    std::vector<std::shared_ptr<Array>> arrays_dest = {
+        ArrayFromJSON(int64(), "[11, 23, 56]"),
+        ArrayFromJSON(int64(), "[32, 46, 37]"),
+        ArrayFromJSON(int64(), "[95, 11, 44]"),
+    };
+    ASSERT_OK_AND_ASSIGN(auto dest, StructArray::Make(arrays_dest, fields_dest));
+    CheckCast(src, dest);
+
+    std::vector<std::shared_ptr<Field>> fields_dest_ac = {fields_dest[0], fields_dest[2]};
+    std::vector<std::shared_ptr<Array>> arrays_dest_ac = {arrays_dest[0], arrays_dest[2]};
+    ASSERT_OK_AND_ASSIGN(auto dest_ac, StructArray::Make(arrays_dest_ac, fields_dest_ac));
+    CheckCast(src, dest_ac);
+
+    std::vector<std::shared_ptr<Field>> fields_dest_b = {fields_dest[1]};
+    std::vector<std::shared_ptr<Array>> arrays_dest_b = {arrays_dest[1]};
+    ASSERT_OK_AND_ASSIGN(auto dest_b, StructArray::Make(arrays_dest_b, fields_dest_b));
+    CheckCast(src, dest_b);
   }
   {
-    // But NOT OK to go from nullable to non-nullable...
-    std::vector<std::shared_ptr<Field>> fields_src_nullable = {
+    // But when going from nullable to non-nullable, all data must be non-null...
+    std::vector<std::shared_ptr<Field>> fields_src = {
         std::make_shared<Field>("a", int8(), true),
         std::make_shared<Field>("b", int8(), true),
         std::make_shared<Field>("c", int8(), true)};
-    std::shared_ptr<Array> a_src_nullable, b_src_nullable, c_src_nullable;
-    a_src_nullable = ArrayFromJSON(int8(), "[1, null, 5]");
-    b_src_nullable = ArrayFromJSON(int8(), "[3, 4, null]");
-    c_src_nullable = ArrayFromJSON(int8(), "[9, 11, 44]");
-    ASSERT_OK_AND_ASSIGN(
-        auto src_nullable,
-        StructArray::Make({a_src_nullable, b_src_nullable, c_src_nullable},
-                          fields_src_nullable));
-
-    std::vector<std::shared_ptr<Field>> fields_dest1_non_nullable = {
+    std::vector<std::shared_ptr<Array>> arrays_src = {
+        ArrayFromJSON(int8(), "[1, null, 5]"),
+        ArrayFromJSON(int8(), "[3, 4, null]"),
+        ArrayFromJSON(int8(), "[9, 11, 44]"),
+    };
+    ASSERT_OK_AND_ASSIGN(auto src, StructArray::Make(arrays_src, fields_src));
+
+    std::vector<std::shared_ptr<Field>> fields_dest = {
         std::make_shared<Field>("a", int64(), false),
         std::make_shared<Field>("b", int64(), false),
         std::make_shared<Field>("c", int64(), false)};
-    const auto dest1_non_nullable = arrow::struct_(fields_dest1_non_nullable);
-    const auto options1_non_nullable = CastOptions::Safe(dest1_non_nullable);
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        TypeError,
-        ::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
-        Cast(src_nullable, options1_non_nullable));
+        Invalid,
+        ::testing::HasSubstr(
+            "field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' "
+            "of type int64"),
+        Cast(src, CastOptions::Safe(arrow::struct_(fields_dest))));
 
-    std::vector<std::shared_ptr<Field>> fields_dest2_non_nullable = {
-        std::make_shared<Field>("a", int64(), false),
-        std::make_shared<Field>("c", int64(), false)};
-    const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullable);
-    const auto options2_non_nullable = CastOptions::Safe(dest2_non_nullable);
+    std::vector<std::shared_ptr<Field>> fields_dest_ac = {fields_dest[0], fields_dest[2]};
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        TypeError,
-        ::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
-        Cast(src_nullable, options2_non_nullable));
-
-    std::vector<std::shared_ptr<Field>> fields_dest3_non_nullable = {
-        std::make_shared<Field>("c", int64(), false)};
-    const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullable);
-    const auto options3_non_nullable = CastOptions::Safe(dest3_non_nullable);
+        Invalid,
+        ::testing::HasSubstr(
+            "field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' "
+            "of type int64"),
+        Cast(src, CastOptions::Safe(arrow::struct_(fields_dest_ac))));
+
+    // if we only select a field with no nulls, it should be fine:
+    std::vector<std::shared_ptr<Field>> fields_dest_c = {fields_dest[2]};
+    std::vector<std::shared_ptr<Array>> arrays_dest_c = {
+        ArrayFromJSON(int64(), "[9, 11, 44]")};
+    ASSERT_OK_AND_ASSIGN(auto dest_c, StructArray::Make(arrays_dest_c, fields_dest_c));
+    CheckCast(src, dest_c);
+
+    // A slice that doesn't contain nulls is castable...
+    std::vector<std::shared_ptr<Array>> arrays_dest_0 = {
+        ArrayFromJSON(int64(), "[1]"),
+        ArrayFromJSON(int64(), "[3]"),
+        ArrayFromJSON(int64(), "[9]"),
+    };
+    ASSERT_OK_AND_ASSIGN(auto dest_0, StructArray::Make(arrays_dest_0, fields_dest));
+    CheckCast(src->Slice(0, 1), dest_0);
+
+    // ...but a slice that contains nulls will error.
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        TypeError,
-        ::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
-        Cast(src_nullable, options3_non_nullable));
+        Invalid,
+        ::testing::HasSubstr(
+            "field 'a' of type int8 has nulls. Can't cast to non-nullable field 'a' "
+            "of type int64"),
+        Cast(src->Slice(1, 3), CastOptions::Safe(arrow::struct_(fields_dest))));
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc
index 2efc61c2e6ce5..d1323c2030223 100644
--- a/cpp/src/arrow/compute/kernels/vector_rank.cc
+++ b/cpp/src/arrow/compute/kernels/vector_rank.cc
@@ -21,6 +21,7 @@
 #include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
+#include "arrow/util/math_internal.h"
 
 namespace arrow::compute::internal {
 
@@ -62,16 +63,6 @@ void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&& value_sel
   }
 }
 
-const RankOptions* GetDefaultRankOptions() {
-  static const auto kDefaultRankOptions = RankOptions::Defaults();
-  return &kDefaultRankOptions;
-}
-
-const RankQuantileOptions* GetDefaultQuantileRankOptions() {
-  static const auto kDefaultQuantileRankOptions = RankQuantileOptions::Defaults();
-  return &kDefaultQuantileRankOptions;
-}
-
 template <typename ArrowType>
 Result<NullPartitionResult> DoSortAndMarkDuplicate(
     ExecContext* ctx, uint64_t* indices_begin, uint64_t* indices_end, const Array& input,
@@ -164,8 +155,9 @@ class SortAndMarkDuplicate : public TypeVisitor {
   NullPartitionResult sorted_{};
 };
 
-// A helper class that emits rankings for the "rank_quantile" function
-struct QuantileRanker {
+// A CRTP-based helper class for "rank_normal" and "rank_quantile"
+template <typename Derived>
+struct BaseQuantileRanker {
   Result<Datum> CreateRankings(ExecContext* ctx, const NullPartitionResult& sorted) {
     const int64_t length = sorted.overall_end() - sorted.overall_begin();
     ARROW_ASSIGN_OR_RAISE(auto rankings,
@@ -187,10 +179,11 @@ struct QuantileRanker {
       }
       // The run length, i.e. the frequency of the current value
       int64_t freq = run_end - it;
-      double quantile = (cum_freq + 0.5 * freq) / static_cast<double>(length);
+      const double quantile = (cum_freq + 0.5 * freq) / static_cast<double>(length);
+      const double value = Derived::TransformValue(quantile);
       // Output quantile rank values
       for (; it < run_end; ++it) {
-        out_begin[original_index(*it)] = quantile;
+        out_begin[original_index(*it)] = value;
       }
       cum_freq += freq;
     }
@@ -199,6 +192,18 @@ struct QuantileRanker {
   }
 };
 
+// A derived class that emits rankings for the "rank_quantile" function
+struct QuantileRanker : public BaseQuantileRanker<QuantileRanker> {
+  static double TransformValue(double quantile) { return quantile; }
+};
+
+// A derived class that emits rankings for the "rank_normal" function
+struct NormalRanker : public BaseQuantileRanker<NormalRanker> {
+  static double TransformValue(double quantile) {
+    return ::arrow::internal::NormalPPF(quantile);
+  }
+};
+
 // A helper class that emits rankings for the "rank" function
 struct OrdinalRanker {
   explicit OrdinalRanker(RankOptions::Tiebreaker tiebreaker) : tiebreaker_(tiebreaker) {}
@@ -294,6 +299,20 @@ const FunctionDoc rank_quantile_doc(
      "The handling of nulls and NaNs can be changed in RankQuantileOptions."),
     {"input"}, "RankQuantileOptions");
 
+const FunctionDoc rank_normal_doc(
+    "Compute normal (gaussian) ranks of an array",
+    ("This function computes a normal (gaussian) rank of the input array.\n"
+     "By default, null values are considered greater than any other value and\n"
+     "are therefore sorted at the end of the input. For floating-point types,\n"
+     "NaNs are considered greater than any other non-null value, but smaller\n"
+     "than null values.\n"
+     "The results are finite real values. They are obtained as if first\n"
+     "calling the \"rank_quantile\" function and then applying the normal\n"
+     "percent-point function (PPF) to the resulting quantile values.\n"
+     "\n"
+     "The handling of nulls and NaNs can be changed in RankQuantileOptions."),
+    {"input"}, "RankQuantileOptions");
+
 template <typename Derived>
 class RankMetaFunctionBase : public MetaFunction {
  public:
@@ -361,11 +380,14 @@ class RankMetaFunction : public RankMetaFunctionBase<RankMetaFunction> {
   }
 
   RankMetaFunction()
-      : RankMetaFunctionBase("rank", Arity::Unary(), rank_doc, GetDefaultRankOptions()) {}
+      : RankMetaFunctionBase("rank", Arity::Unary(), rank_doc, &kDefaultOptions) {}
+
+  static inline const auto kDefaultOptions = RankOptions::Defaults();
 };
 
 class RankQuantileMetaFunction : public RankMetaFunctionBase<RankQuantileMetaFunction> {
  public:
+  using Base = RankMetaFunctionBase<RankQuantileMetaFunction>;
   using FunctionOptionsType = RankQuantileOptions;
   using RankerType = QuantileRanker;
 
@@ -375,7 +397,26 @@ class RankQuantileMetaFunction : public RankMetaFunctionBase<RankQuantileMetaFun
 
   RankQuantileMetaFunction()
       : RankMetaFunctionBase("rank_quantile", Arity::Unary(), rank_quantile_doc,
-                             GetDefaultQuantileRankOptions()) {}
+                             &kDefaultOptions) {}
+
+  static inline const auto kDefaultOptions = RankQuantileOptions::Defaults();
+};
+
+class RankNormalMetaFunction : public RankMetaFunctionBase<RankNormalMetaFunction> {
+ public:
+  using Base = RankMetaFunctionBase<RankQuantileMetaFunction>;
+  using FunctionOptionsType = RankQuantileOptions;
+  using RankerType = NormalRanker;
+
+  static bool NeedsDuplicates(const RankQuantileOptions&) { return true; }
+
+  static RankerType GetRanker(const RankQuantileOptions& options) { return RankerType(); }
+
+  RankNormalMetaFunction()
+      : RankMetaFunctionBase("rank_normal", Arity::Unary(), rank_normal_doc,
+                             &kDefaultOptions) {}
+
+  static inline const auto kDefaultOptions = RankQuantileOptions::Defaults();
 };
 
 }  // namespace
@@ -383,6 +424,7 @@ class RankQuantileMetaFunction : public RankMetaFunctionBase<RankQuantileMetaFun
 void RegisterVectorRank(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::make_shared<RankMetaFunction>()));
   DCHECK_OK(registry->AddFunction(std::make_shared<RankQuantileMetaFunction>()));
+  DCHECK_OK(registry->AddFunction(std::make_shared<RankNormalMetaFunction>()));
 }
 
 }  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index dc1c055705d56..2b592cd1a9260 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -2205,7 +2205,7 @@ TEST_F(TestNestedSortIndices, SortRecordBatch) { TestSort(GetRecordBatch()); }
 TEST_F(TestNestedSortIndices, SortTable) { TestSort(GetTable()); }
 
 // ----------------------------------------------------------------------
-// Tests for Rank and Quantile Rank
+// Tests for Rank, Quantile Rank and Normal Rank
 
 class BaseTestRank : public ::testing::Test {
  protected:
@@ -2471,43 +2471,84 @@ TEST_F(TestRank, EmptyChunks) {
 
 class TestRankQuantile : public BaseTestRank {
  public:
-  void AssertRankQuantile(const DatumVector& datums, SortOrder order,
-                          NullPlacement null_placement,
-                          const std::shared_ptr<Array>& expected) {
+  void AssertRankQuantileGeneric(const std::string& function_name,
+                                 const DatumVector& datums, SortOrder order,
+                                 NullPlacement null_placement,
+                                 const std::shared_ptr<Array>& expected) {
+    ARROW_SCOPED_TRACE("function = ", function_name);
     const std::vector<SortKey> sort_keys{SortKey("foo", order)};
     RankQuantileOptions options(sort_keys, null_placement);
     ARROW_SCOPED_TRACE("options = ", options.ToString());
     for (const auto& datum : datums) {
-      ASSERT_OK_AND_ASSIGN(auto actual, CallFunction("rank_quantile", {datum}, &options));
+      ASSERT_OK_AND_ASSIGN(auto actual, CallFunction(function_name, {datum}, &options));
       ValidateOutput(actual);
-      AssertDatumsEqual(expected, actual, /*verbose=*/true);
+      if (function_name == "rank_normal") {
+        // Normal PPF results can only be approximate
+        auto equal_options = EqualOptions().atol(1e-8);
+        AssertDatumsApproxEqual(expected, actual, /*verbose=*/true, equal_options);
+      } else {
+        AssertDatumsEqual(expected, actual, /*verbose=*/true);
+      }
     }
   }
 
-  void AssertRankQuantile(const DatumVector& datums, SortOrder order,
-                          NullPlacement null_placement, const std::string& expected) {
-    AssertRankQuantile(datums, order, null_placement, ArrayFromJSON(float64(), expected));
+  void AssertRankQuantileGeneric(const std::string& function_name, const Datum& datum,
+                                 SortOrder order, NullPlacement null_placement,
+                                 const std::shared_ptr<Array>& expected) {
+    AssertRankQuantileGeneric(function_name, DatumVector{datum}, order, null_placement,
+                              expected);
   }
 
-  void AssertRankQuantile(SortOrder order, NullPlacement null_placement,
-                          const std::shared_ptr<Array>& expected) {
-    AssertRankQuantile(datums_, order, null_placement, expected);
+  void AssertRankQuantileGeneric(const std::string& function_name,
+                                 const DatumVector& datums, SortOrder order,
+                                 NullPlacement null_placement,
+                                 const std::string& expected) {
+    AssertRankQuantileGeneric(function_name, datums, order, null_placement,
+                              ArrayFromJSON(float64(), expected));
   }
 
-  void AssertRankQuantile(SortOrder order, NullPlacement null_placement,
-                          const std::string& expected) {
-    AssertRankQuantile(datums_, order, null_placement,
-                       ArrayFromJSON(float64(), expected));
+  void AssertRankQuantileGeneric(const std::string& function_name, const Datum& datum,
+                                 SortOrder order, NullPlacement null_placement,
+                                 const std::string& expected) {
+    AssertRankQuantileGeneric(function_name, DatumVector{datum}, order, null_placement,
+                              ArrayFromJSON(float64(), expected));
+  }
+
+  void AssertRankQuantileGeneric(const std::string& function_name, SortOrder order,
+                                 NullPlacement null_placement,
+                                 const std::shared_ptr<Array>& expected) {
+    AssertRankQuantileGeneric(function_name, datums_, order, null_placement, expected);
+  }
+
+  void AssertRankQuantileGeneric(const std::string& function_name, SortOrder order,
+                                 NullPlacement null_placement,
+                                 const std::string& expected) {
+    AssertRankQuantileGeneric(function_name, datums_, order, null_placement,
+                              ArrayFromJSON(float64(), expected));
+  }
+
+  template <typename... Args>
+  void AssertRankQuantile(Args&&... args) {
+    AssertRankQuantileGeneric("rank_quantile", std::forward<Args>(args)...);
+  }
+
+  template <typename... Args>
+  void AssertRankNormal(Args&&... args) {
+    AssertRankQuantileGeneric("rank_normal", std::forward<Args>(args)...);
   }
 
   void AssertRankQuantileEmpty(std::shared_ptr<DataType> type) {
     for (auto null_placement : AllNullPlacements()) {
       for (auto order : AllOrders()) {
-        AssertRankQuantile({ArrayFromJSON(type, "[]")}, order, null_placement, "[]");
-        AssertRankQuantile({ArrayFromJSON(type, "[null]")}, order, null_placement,
-                           "[0.5]");
-        AssertRankQuantile({ArrayFromJSON(type, "[null, null, null]")}, order,
+        AssertRankQuantile(ArrayFromJSON(type, "[]"), order, null_placement, "[]");
+        AssertRankQuantile(ArrayFromJSON(type, "[null]"), order, null_placement, "[0.5]");
+        AssertRankQuantile(ArrayFromJSON(type, "[null, null, null]"), order,
                            null_placement, "[0.5, 0.5, 0.5]");
+
+        AssertRankNormal(ArrayFromJSON(type, "[]"), order, null_placement, "[]");
+        AssertRankNormal(ArrayFromJSON(type, "[null]"), order, null_placement, "[0.0]");
+        AssertRankNormal(ArrayFromJSON(type, "[null, null, null]"), order, null_placement,
+                         "[0.0, 0.0, 0.0]");
       }
     }
   }
@@ -2519,6 +2560,12 @@ class TestRankQuantile : public BaseTestRank {
                          "[0.3, 0.8, 0.3, 0.8, 0.3]");
       AssertRankQuantile(SortOrder::Descending, null_placement,
                          "[0.7, 0.2, 0.7, 0.2, 0.7]");
+      AssertRankNormal(SortOrder::Ascending, null_placement,
+                       "[-0.5244005127080409, 0.8416212335729143, -0.5244005127080409, "
+                       "0.8416212335729143, -0.5244005127080409]");
+      AssertRankNormal(SortOrder::Descending, null_placement,
+                       "[0.5244005127080407, -0.8416212335729142, 0.5244005127080407, "
+                       "-0.8416212335729142, 0.5244005127080407]");
     }
   }
 
@@ -2532,6 +2579,19 @@ class TestRankQuantile : public BaseTestRank {
                        "[0.3, 0.9, 0.3, 0.7, 0.3]");
     AssertRankQuantile(SortOrder::Descending, NullPlacement::AtEnd,
                        "[0.7, 0.3, 0.7, 0.1, 0.7]");
+
+    AssertRankNormal(SortOrder::Ascending, NullPlacement::AtStart,
+                     "[-0.5244005127080409, 0.5244005127080407, -0.5244005127080409, "
+                     "1.2815515655446004, -0.5244005127080409]");
+    AssertRankNormal(SortOrder::Ascending, NullPlacement::AtEnd,
+                     "[0.5244005127080407, -1.2815515655446004, 0.5244005127080407, "
+                     "-0.5244005127080409, 0.5244005127080407]");
+    AssertRankNormal(SortOrder::Descending, NullPlacement::AtStart,
+                     "[-0.5244005127080409, 1.2815515655446004, -0.5244005127080409, "
+                     "0.5244005127080407, -0.5244005127080409]");
+    AssertRankNormal(SortOrder::Descending, NullPlacement::AtEnd,
+                     "[0.5244005127080407, -0.5244005127080409, 0.5244005127080407, "
+                     "-1.2815515655446004, 0.5244005127080407]");
   }
 
   void AssertRankQuantileNumeric(std::shared_ptr<DataType> type) {
@@ -2545,6 +2605,17 @@ class TestRankQuantile : public BaseTestRank {
                          "[0.95, 0.8, 0.8, 0.6, 0.6, 0.35, 0.35, 0.35, 0.15, 0.05]");
       AssertRankQuantile(SortOrder::Descending, null_placement,
                          "[0.05, 0.2, 0.2, 0.4, 0.4, 0.65, 0.65, 0.65, 0.85, 0.95]");
+
+      AssertRankNormal(SortOrder::Ascending, null_placement,
+                       "[1.6448536269514722, 0.8416212335729143, 0.8416212335729143, "
+                       "0.2533471031357997, 0.2533471031357997, -0.38532046640756773, "
+                       "-0.38532046640756773, -0.38532046640756773, -1.0364333894937898, "
+                       "-1.6448536269514729]");
+      AssertRankNormal(SortOrder::Descending, null_placement,
+                       "[-1.6448536269514729, -0.8416212335729142, -0.8416212335729142, "
+                       "-0.2533471031357997, -0.2533471031357997, 0.38532046640756773, "
+                       "0.38532046640756773, 0.38532046640756773, 1.0364333894937898, "
+                       "1.6448536269514722]");
     }
 
     // With nulls
diff --git a/cpp/src/arrow/compute/key_map_internal.cc b/cpp/src/arrow/compute/key_map_internal.cc
index ad264533bff30..e44177d6a6f91 100644
--- a/cpp/src/arrow/compute/key_map_internal.cc
+++ b/cpp/src/arrow/compute/key_map_internal.cc
@@ -97,24 +97,28 @@ inline void SwissTable::search_block(uint64_t block, int stamp, int start_slot,
 template <typename T, bool use_selection>
 void SwissTable::extract_group_ids_imp(const int num_keys, const uint16_t* selection,
                                        const uint32_t* hashes, const uint8_t* local_slots,
-                                       uint32_t* out_group_ids, int element_offset,
-                                       int element_multiplier) const {
-  const T* elements = reinterpret_cast<const T*>(blocks_->data()) + element_offset;
+                                       uint32_t* out_group_ids) const {
   if (log_blocks_ == 0) {
-    ARROW_DCHECK(sizeof(T) == sizeof(uint8_t));
+    DCHECK_EQ(sizeof(T), sizeof(uint8_t));
     for (int i = 0; i < num_keys; ++i) {
       uint32_t id = use_selection ? selection[i] : i;
-      uint32_t group_id = blocks()[8 + local_slots[id]];
+      uint32_t group_id =
+          block_data(/*block_id=*/0,
+                     /*num_block_bytes=*/0)[bytes_status_in_block_ + local_slots[id]];
       out_group_ids[id] = group_id;
     }
   } else {
+    int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+    DCHECK_EQ(sizeof(T) * 8, num_groupid_bits);
+    int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
+
     for (int i = 0; i < num_keys; ++i) {
       uint32_t id = use_selection ? selection[i] : i;
       uint32_t hash = hashes[id];
-      int64_t pos =
-          (hash >> (bits_hash_ - log_blocks_)) * element_multiplier + local_slots[id];
-      uint32_t group_id = static_cast<uint32_t>(elements[pos]);
-      ARROW_DCHECK(group_id < num_inserted_ || num_inserted_ == 0);
+      uint32_t block_id = block_id_from_hash(hash, log_blocks_);
+      const T* slots_base = reinterpret_cast<const T*>(
+          block_data(block_id, num_block_bytes) + bytes_status_in_block_);
+      uint32_t group_id = static_cast<uint32_t>(slots_base[local_slots[id]]);
       out_group_ids[id] = group_id;
     }
   }
@@ -123,59 +127,49 @@ void SwissTable::extract_group_ids_imp(const int num_keys, const uint16_t* selec
 void SwissTable::extract_group_ids(const int num_keys, const uint16_t* optional_selection,
                                    const uint32_t* hashes, const uint8_t* local_slots,
                                    uint32_t* out_group_ids) const {
-  // Group id values for all 8 slots in the block are bit-packed and follow the status
-  // bytes. We assume here that the number of bits is rounded up to 8, 16, 32 or 64. In
-  // that case we can extract group id using aligned 64-bit word access.
-  int num_group_id_bits = num_groupid_bits_from_log_blocks(log_blocks_);
-  ARROW_DCHECK(num_group_id_bits == 8 || num_group_id_bits == 16 ||
-               num_group_id_bits == 32);
-
   int num_processed = 0;
-
   // Optimistically use simplified lookup involving only a start block to find
   // a single group id candidate for every input.
 #if defined(ARROW_HAVE_RUNTIME_AVX2) && defined(ARROW_HAVE_RUNTIME_BMI2)
-  int num_group_id_bytes = num_group_id_bits / 8;
   if ((hardware_flags_ & CpuInfo::AVX2) && CpuInfo::GetInstance()->HasEfficientBmi2() &&
       !optional_selection) {
-    num_processed = extract_group_ids_avx2(num_keys, hashes, local_slots, out_group_ids,
-                                           sizeof(uint64_t), 8 + 8 * num_group_id_bytes,
-                                           num_group_id_bytes);
+    num_processed = extract_group_ids_avx2(num_keys, hashes, local_slots, out_group_ids);
   }
 #endif
-  switch (num_group_id_bits) {
+  int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  switch (num_groupid_bits) {
     case 8:
       if (optional_selection) {
         extract_group_ids_imp<uint8_t, true>(num_keys, optional_selection, hashes,
-                                             local_slots, out_group_ids, 8, 16);
+                                             local_slots, out_group_ids);
       } else {
         extract_group_ids_imp<uint8_t, false>(
             num_keys - num_processed, nullptr, hashes + num_processed,
-            local_slots + num_processed, out_group_ids + num_processed, 8, 16);
+            local_slots + num_processed, out_group_ids + num_processed);
       }
       break;
     case 16:
       if (optional_selection) {
         extract_group_ids_imp<uint16_t, true>(num_keys, optional_selection, hashes,
-                                              local_slots, out_group_ids, 4, 12);
+                                              local_slots, out_group_ids);
       } else {
         extract_group_ids_imp<uint16_t, false>(
             num_keys - num_processed, nullptr, hashes + num_processed,
-            local_slots + num_processed, out_group_ids + num_processed, 4, 12);
+            local_slots + num_processed, out_group_ids + num_processed);
       }
       break;
     case 32:
       if (optional_selection) {
         extract_group_ids_imp<uint32_t, true>(num_keys, optional_selection, hashes,
-                                              local_slots, out_group_ids, 2, 10);
+                                              local_slots, out_group_ids);
       } else {
         extract_group_ids_imp<uint32_t, false>(
             num_keys - num_processed, nullptr, hashes + num_processed,
-            local_slots + num_processed, out_group_ids + num_processed, 2, 10);
+            local_slots + num_processed, out_group_ids + num_processed);
       }
       break;
     default:
-      ARROW_DCHECK(false);
+      DCHECK(false);
   }
 }
 
@@ -195,9 +189,9 @@ void SwissTable::init_slot_ids(const int num_keys, const uint16_t* selection,
     for (int i = 0; i < num_keys; ++i) {
       uint16_t id = selection[i];
       uint32_t hash = hashes[id];
-      uint32_t iblock = (hash >> (bits_hash_ - log_blocks_));
+      uint32_t iblock = block_id_from_hash(hash, log_blocks_);
       uint32_t match = ::arrow::bit_util::GetBit(match_bitvector, id) ? 1 : 0;
-      uint32_t slot_id = iblock * 8 + local_slots[id] + match;
+      uint32_t slot_id = global_slot_id(iblock, local_slots[id] + match);
       out_slot_ids[id] = slot_id;
     }
   }
@@ -207,11 +201,11 @@ void SwissTable::init_slot_ids_for_new_keys(uint32_t num_ids, const uint16_t* id
                                             const uint32_t* hashes,
                                             uint32_t* slot_ids) const {
   int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
-  uint32_t num_block_bytes = num_groupid_bits + 8;
+  int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
   if (log_blocks_ == 0) {
     uint64_t block = *reinterpret_cast<const uint64_t*>(blocks_->mutable_data());
-    uint32_t empty_slot =
-        static_cast<uint32_t>(8 - ARROW_POPCOUNT64(block & kHighBitOfEachByte));
+    uint32_t empty_slot = static_cast<uint32_t>(
+        kSlotsPerBlock - ARROW_POPCOUNT64(block & kHighBitOfEachByte));
     for (uint32_t i = 0; i < num_ids; ++i) {
       int id = ids[i];
       slot_ids[id] = empty_slot;
@@ -220,19 +214,18 @@ void SwissTable::init_slot_ids_for_new_keys(uint32_t num_ids, const uint16_t* id
     for (uint32_t i = 0; i < num_ids; ++i) {
       int id = ids[i];
       uint32_t hash = hashes[id];
-      uint32_t iblock = hash >> (bits_hash_ - log_blocks_);
+      uint32_t iblock = block_id_from_hash(hash, log_blocks_);
       uint64_t block;
       for (;;) {
-        block = *reinterpret_cast<const uint64_t*>(blocks_->mutable_data() +
-                                                   num_block_bytes * iblock);
+        block = *reinterpret_cast<const uint64_t*>(block_data(iblock, num_block_bytes));
         block &= kHighBitOfEachByte;
         if (block) {
           break;
         }
         iblock = (iblock + 1) & ((1 << log_blocks_) - 1);
       }
-      uint32_t empty_slot = static_cast<int>(8 - ARROW_POPCOUNT64(block));
-      slot_ids[id] = iblock * 8 + empty_slot;
+      uint32_t empty_slot = static_cast<int>(kSlotsPerBlock - ARROW_POPCOUNT64(block));
+      slot_ids[id] = global_slot_id(iblock, empty_slot);
     }
   }
 }
@@ -249,6 +242,7 @@ void SwissTable::early_filter_imp(const int num_keys, const uint32_t* hashes,
   // Based on the size of the table, prepare bit number constants.
   uint32_t stamp_mask = (1 << bits_stamp_) - 1;
   int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
 
   for (int i = 0; i < num_keys; ++i) {
     // Extract from hash: block index and stamp
@@ -258,9 +252,7 @@ void SwissTable::early_filter_imp(const int num_keys, const uint32_t* hashes,
     uint32_t stamp = iblock & stamp_mask;
     iblock >>= bits_shift_for_block_;
 
-    uint32_t num_block_bytes = num_groupid_bits + 8;
-    const uint8_t* blockbase =
-        blocks_->data() + static_cast<uint64_t>(iblock) * num_block_bytes;
+    const uint8_t* blockbase = block_data(iblock, num_block_bytes);
     ARROW_DCHECK(num_block_bytes % sizeof(uint64_t) == 0);
     uint64_t block = *reinterpret_cast<const uint64_t*>(blockbase);
 
@@ -280,14 +272,14 @@ void SwissTable::early_filter_imp(const int num_keys, const uint32_t* hashes,
 // How many groups we can keep in the hash table without the need for resizing.
 // When we reach this limit, we need to break processing of any further rows and resize.
 //
-uint64_t SwissTable::num_groups_for_resize() const {
+int64_t SwissTable::num_groups_for_resize() const {
   // Consider N = 9 (aka 2 ^ 9 = 512 blocks) as small.
   // When N = 9, a slot id takes N + 3 = 12 bits, rounded up to 16 bits. This is also the
   // number of bits needed for a key id. Since each slot stores a status byte and a key
   // id, then a slot takes 1 byte + 16 bits = 3 bytes. Therefore a block of 8 slots takes
   // 24 bytes. The threshold of a small hash table ends up being 24 bytes * 512 = 12 KB.
   constexpr int log_blocks_small_ = 9;
-  uint64_t num_slots = 1ULL << (log_blocks_ + 3);
+  int64_t num_slots = num_slots_from_log_blocks(log_blocks_);
   if (log_blocks_ <= log_blocks_small_) {
     // Resize small hash tables when 50% full.
     return num_slots / 2;
@@ -297,8 +289,9 @@ uint64_t SwissTable::num_groups_for_resize() const {
   }
 }
 
-uint64_t SwissTable::wrap_global_slot_id(uint64_t global_slot_id) const {
-  uint64_t global_slot_id_mask = (1 << (log_blocks_ + 3)) - 1;
+uint32_t SwissTable::wrap_global_slot_id(uint32_t global_slot_id) const {
+  uint32_t global_slot_id_mask =
+      static_cast<uint32_t>((1ULL << (log_blocks_ + kLogSlotsPerBlock)) - 1ULL);
   return global_slot_id & global_slot_id_mask;
 }
 
@@ -396,37 +389,38 @@ void SwissTable::run_comparisons(const int num_keys,
 bool SwissTable::find_next_stamp_match(const uint32_t hash, const uint32_t in_slot_id,
                                        uint32_t* out_slot_id,
                                        uint32_t* out_group_id) const {
-  const uint64_t num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  const int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  const int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
+  const int group_id_mask = group_id_mask_from_num_groupid_bits(num_groupid_bits);
   constexpr uint64_t stamp_mask = 0x7f;
   const int stamp =
       static_cast<int>((hash >> bits_shift_for_block_and_stamp_) & stamp_mask);
-  uint64_t start_slot_id = wrap_global_slot_id(in_slot_id);
+  uint32_t start_slot_id = wrap_global_slot_id(in_slot_id);
   int match_found;
   int local_slot;
-  uint8_t* blockbase;
+  const uint8_t* blockbase;
   for (;;) {
-    const uint64_t num_block_bytes = (8 + num_groupid_bits);
-    blockbase = blocks_->mutable_data() + num_block_bytes * (start_slot_id >> 3);
-    uint64_t block = *reinterpret_cast<uint64_t*>(blockbase);
+    blockbase = block_data(start_slot_id >> kLogSlotsPerBlock, num_block_bytes);
+    uint64_t block = *reinterpret_cast<const uint64_t*>(blockbase);
 
-    search_block<true>(block, stamp, (start_slot_id & 7), &local_slot, &match_found);
+    search_block<true>(block, stamp, start_slot_id & kLocalSlotMask, &local_slot,
+                       &match_found);
 
     start_slot_id =
-        wrap_global_slot_id((start_slot_id & ~7ULL) + local_slot + match_found);
+        wrap_global_slot_id((start_slot_id & ~kLocalSlotMask) + local_slot + match_found);
 
     // Match found can be 1 in two cases:
     // - match was found
     // - match was not found in a full block
     // In the second case search needs to continue in the next block.
-    if (match_found == 0 || blockbase[7 - local_slot] == stamp) {
+    if (match_found == 0 || blockbase[kMaxLocalSlot - local_slot] == stamp) {
       break;
     }
   }
 
-  const uint64_t groupid_mask = (1ULL << num_groupid_bits) - 1;
   *out_group_id =
-      static_cast<uint32_t>(extract_group_id(blockbase, local_slot, groupid_mask));
-  *out_slot_id = static_cast<uint32_t>(start_slot_id);
+      extract_group_id(blockbase, local_slot, num_groupid_bits, group_id_mask);
+  *out_slot_id = start_slot_id;
 
   return match_found;
 }
@@ -531,7 +525,7 @@ Status SwissTable::map_new_keys_helper(
   //
   ARROW_DCHECK(*inout_num_selected <= static_cast<uint32_t>(1 << log_minibatch_));
 
-  size_t num_bytes_for_bits = (*inout_num_selected + 7) / 8 + sizeof(uint64_t);
+  size_t num_bytes_for_bits = (*inout_num_selected + 7) / 8 + bytes_status_in_block_;
   auto match_bitvector_buf = util::TempVectorHolder<uint8_t>(
       temp_stack, static_cast<uint32_t>(num_bytes_for_bits));
   uint8_t* match_bitvector = match_bitvector_buf.mutable_data();
@@ -645,7 +639,8 @@ Status SwissTable::map_new_keys(uint32_t num_ids, uint16_t* ids, const uint32_t*
       for (uint32_t i = 0; i < num_ids; ++i) {
         // First slot in the new starting block
         const int16_t id = ids[i];
-        slot_ids[id] = (hashes[id] >> (bits_hash_ - log_blocks_)) * 8;
+        uint32_t block_id = block_id_from_hash(hashes[id], log_blocks_);
+        slot_ids[id] = global_slot_id(block_id, /*local_slot_id=*/0);
       }
     }
   } while (num_ids > 0);
@@ -657,16 +652,18 @@ Status SwissTable::grow_double() {
   // Before and after metadata
   int num_group_id_bits_before = num_groupid_bits_from_log_blocks(log_blocks_);
   int num_group_id_bits_after = num_groupid_bits_from_log_blocks(log_blocks_ + 1);
-  uint64_t group_id_mask_before = ~0ULL >> (64 - num_group_id_bits_before);
+  uint32_t group_id_mask_before =
+      group_id_mask_from_num_groupid_bits(num_group_id_bits_before);
   int log_blocks_after = log_blocks_ + 1;
   int bits_shift_for_block_and_stamp_after =
       ComputeBitsShiftForBlockAndStamp(log_blocks_after);
   int bits_shift_for_block_after = ComputeBitsShiftForBlock(log_blocks_after);
-  uint64_t block_size_before = (8 + num_group_id_bits_before);
-  uint64_t block_size_after = (8 + num_group_id_bits_after);
-  uint64_t block_size_total_after = (block_size_after << log_blocks_after) + padding_;
-  uint64_t hashes_size_total_after =
-      (bits_hash_ / 8 * (1 << (log_blocks_after + 3))) + padding_;
+  int block_size_before = num_block_bytes_from_num_groupid_bits(num_group_id_bits_before);
+  int block_size_after = num_block_bytes_from_num_groupid_bits(num_group_id_bits_after);
+  int64_t block_size_total_after =
+      num_bytes_total_blocks(block_size_after, log_blocks_after);
+  int64_t hashes_size_total_after =
+      (bits_hash_ / 8 * num_slots_from_log_blocks(log_blocks_after)) + padding_;
   constexpr uint32_t stamp_mask = (1 << bits_stamp_) - 1;
 
   // Allocate new buffers
@@ -682,44 +679,39 @@ Status SwissTable::grow_double() {
   // (block other than selected by hash bits corresponding to the entry).
   for (int i = 0; i < (1 << log_blocks_); ++i) {
     // How many full slots in this block
-    uint8_t* block_base = blocks_->mutable_data() + i * block_size_before;
+    const uint8_t* block_base = block_data(i, block_size_before);
     uint8_t* double_block_base_new =
-        blocks_new->mutable_data() + 2 * i * block_size_after;
+        mutable_block_data(blocks_new->mutable_data(), 2 * i, block_size_after);
     uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
 
-    auto full_slots =
-        static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
-    int full_slots_new[2];
+    uint32_t full_slots = CountLeadingZeros(block & kHighBitOfEachByte) >> 3;
+    uint32_t full_slots_new[2];
     full_slots_new[0] = full_slots_new[1] = 0;
     util::SafeStore(double_block_base_new, kHighBitOfEachByte);
     util::SafeStore(double_block_base_new + block_size_after, kHighBitOfEachByte);
 
-    for (int j = 0; j < full_slots; ++j) {
-      uint64_t slot_id = i * 8 + j;
+    for (uint32_t j = 0; j < full_slots; ++j) {
+      uint32_t slot_id = global_slot_id(i, j);
       uint32_t hash = hashes()[slot_id];
-      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      uint32_t block_id_new = block_id_from_hash(hash, log_blocks_after);
       bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
       if (is_overflow_entry) {
         continue;
       }
 
-      int ihalf = block_id_new & 1;
+      uint32_t ihalf = block_id_new & 1;
       uint8_t stamp_new = (hash >> bits_shift_for_block_and_stamp_after) & stamp_mask;
-      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
-      uint64_t group_id =
-          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
-           (group_id_bit_offs & 7)) &
-          group_id_mask_before;
-
-      uint64_t slot_id_new = i * 16 + ihalf * 8 + full_slots_new[ihalf];
+      uint32_t group_id =
+          extract_group_id(block_base, j, num_group_id_bits_before, group_id_mask_before);
+      uint32_t slot_id_new = global_slot_id(i * 2 + ihalf, full_slots_new[ihalf]);
       hashes_new[slot_id_new] = hash;
       uint8_t* block_base_new = double_block_base_new + ihalf * block_size_after;
-      block_base_new[7 - full_slots_new[ihalf]] = stamp_new;
+      block_base_new[kMaxLocalSlot - full_slots_new[ihalf]] = stamp_new;
       int group_id_bit_offs_new = full_slots_new[ihalf] * num_group_id_bits_after;
-      uint64_t* ptr =
-          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
-      util::SafeStore(ptr,
-                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
+      uint64_t* ptr = reinterpret_cast<uint64_t*>(
+          block_base_new + bytes_status_in_block_ + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr, util::SafeLoad(ptr) | (static_cast<uint64_t>(group_id)
+                                                  << (group_id_bit_offs_new & 7)));
       full_slots_new[ihalf]++;
     }
   }
@@ -728,32 +720,29 @@ Status SwissTable::grow_double() {
   // Reinsert entries that were in an overflow block.
   for (int i = 0; i < (1 << log_blocks_); ++i) {
     // How many full slots in this block
-    uint8_t* block_base = blocks_->mutable_data() + i * block_size_before;
+    const uint8_t* block_base = block_data(i, block_size_before);
     uint64_t block = util::SafeLoadAs<uint64_t>(block_base);
-    int full_slots = static_cast<int>(CountLeadingZeros(block & kHighBitOfEachByte) >> 3);
+    uint32_t full_slots = CountLeadingZeros(block & kHighBitOfEachByte) >> 3;
 
-    for (int j = 0; j < full_slots; ++j) {
-      uint64_t slot_id = i * 8 + j;
+    for (uint32_t j = 0; j < full_slots; ++j) {
+      uint32_t slot_id = global_slot_id(i, j);
       uint32_t hash = hashes()[slot_id];
-      uint64_t block_id_new = hash >> (bits_hash_ - log_blocks_after);
+      uint32_t block_id_new = block_id_from_hash(hash, log_blocks_after);
       bool is_overflow_entry = ((block_id_new >> 1) != static_cast<uint64_t>(i));
       if (!is_overflow_entry) {
         continue;
       }
 
-      uint64_t group_id_bit_offs = j * num_group_id_bits_before;
-      uint64_t group_id =
-          (util::SafeLoadAs<uint64_t>(block_base + 8 + (group_id_bit_offs >> 3)) >>
-           (group_id_bit_offs & 7)) &
-          group_id_mask_before;
+      uint32_t group_id =
+          extract_group_id(block_base, j, num_group_id_bits_before, group_id_mask_before);
       uint8_t stamp_new = (hash >> bits_shift_for_block_and_stamp_after) & stamp_mask;
 
       uint8_t* block_base_new =
-          blocks_new->mutable_data() + block_id_new * block_size_after;
+          mutable_block_data(blocks_new->mutable_data(), block_id_new, block_size_after);
       uint64_t block_new = util::SafeLoadAs<uint64_t>(block_base_new);
       int full_slots_new =
           static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
-      while (full_slots_new == 8) {
+      while (full_slots_new == kSlotsPerBlock) {
         block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1);
         block_base_new = blocks_new->mutable_data() + block_id_new * block_size_after;
         block_new = util::SafeLoadAs<uint64_t>(block_base_new);
@@ -761,13 +750,13 @@ Status SwissTable::grow_double() {
             static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) >> 3);
       }
 
-      hashes_new[block_id_new * 8 + full_slots_new] = hash;
-      block_base_new[7 - full_slots_new] = stamp_new;
+      hashes_new[block_id_new * kSlotsPerBlock + full_slots_new] = hash;
+      block_base_new[kMaxLocalSlot - full_slots_new] = stamp_new;
       int group_id_bit_offs_new = full_slots_new * num_group_id_bits_after;
-      uint64_t* ptr =
-          reinterpret_cast<uint64_t*>(block_base_new + 8 + (group_id_bit_offs_new >> 3));
-      util::SafeStore(ptr,
-                      util::SafeLoad(ptr) | (group_id << (group_id_bit_offs_new & 7)));
+      uint64_t* ptr = reinterpret_cast<uint64_t*>(
+          block_base_new + bytes_status_in_block_ + (group_id_bit_offs_new >> 3));
+      util::SafeStore(ptr, util::SafeLoad(ptr) | (static_cast<uint64_t>(group_id)
+                                                  << (group_id_bit_offs_new & 7)));
     }
   }
 
@@ -792,25 +781,25 @@ Status SwissTable::init(int64_t hardware_flags, MemoryPool* pool, int log_blocks
   int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
   num_inserted_ = 0;
 
-  const uint64_t block_bytes = 8 + num_groupid_bits;
-  const uint64_t slot_bytes = (block_bytes << log_blocks_) + padding_;
+  const int block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
+  const int64_t slot_bytes = num_bytes_total_blocks(block_bytes, log_blocks_);
   ARROW_ASSIGN_OR_RAISE(blocks_, AllocateBuffer(slot_bytes, pool_));
 
   // Make sure group ids are initially set to zero for all slots.
   memset(blocks_->mutable_data(), 0, slot_bytes);
 
   // Initialize all status bytes to represent an empty slot.
-  uint8_t* blocks_ptr = blocks_->mutable_data();
-  for (uint64_t i = 0; i < (static_cast<uint64_t>(1) << log_blocks_); ++i) {
-    util::SafeStore(blocks_ptr + i * block_bytes, kHighBitOfEachByte);
+  for (int i = 0; i < 1 << log_blocks_; ++i) {
+    auto block = mutable_block_data(i, block_bytes);
+    util::SafeStore(block, kHighBitOfEachByte);
   }
 
   if (no_hash_array) {
     hashes_ = nullptr;
   } else {
-    uint64_t num_slots = 1ULL << (log_blocks_ + 3);
-    const uint64_t hash_size = sizeof(uint32_t);
-    const uint64_t hash_bytes = hash_size * num_slots + padding_;
+    int64_t num_slots = num_slots_from_log_blocks(log_blocks);
+    const int hash_size = bits_hash_ >> 3;
+    const int64_t hash_bytes = hash_size * num_slots + padding_;
     ARROW_ASSIGN_OR_RAISE(hashes_, AllocateBuffer(hash_bytes, pool_));
   }
 
diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h
index 66a9957006dd7..8423134cb3269 100644
--- a/cpp/src/arrow/compute/key_map_internal.h
+++ b/cpp/src/arrow/compute/key_map_internal.h
@@ -81,31 +81,77 @@ class ARROW_EXPORT SwissTable {
 
   void num_inserted(uint32_t i) { num_inserted_ = i; }
 
-  uint8_t* blocks() const { return blocks_->mutable_data(); }
-
   uint32_t* hashes() const {
     return reinterpret_cast<uint32_t*>(hashes_->mutable_data());
   }
 
-  /// \brief Extract group id for a given slot in a given block.
+  /// \brief Extract group id for a given slot in a given block using aligned 32-bit read
+  /// regardless of the number of group id bits.
+  /// Note that group_id_mask should be derived from num_group_id_bits. This function
+  /// accepts both and does debug checking for performance sake.
   ///
-  inline uint64_t extract_group_id(const uint8_t* block_ptr, int slot,
-                                   uint64_t group_id_mask) const;
+  static uint32_t extract_group_id(const uint8_t* block_ptr, int local_slot,
+                                   int num_group_id_bits, uint32_t group_id_mask) {
+    assert(group_id_mask_from_num_groupid_bits(num_group_id_bits) == group_id_mask);
+    int slot_bit_offset = local_slot * num_group_id_bits;
+    const uint32_t* group_id_ptr32 =
+        reinterpret_cast<const uint32_t*>(block_ptr + bytes_status_in_block_) +
+        (slot_bit_offset >> 5);
+    uint32_t group_id = (*group_id_ptr32 >> (slot_bit_offset & 31)) & group_id_mask;
+    return group_id;
+  }
 
-  inline void insert_into_empty_slot(uint32_t slot_id, uint32_t hash, uint32_t group_id);
+  inline void insert_into_empty_slot(uint32_t global_slot_id, uint32_t hash,
+                                     uint32_t group_id);
+
+  static uint32_t block_id_from_hash(uint32_t hash, int log_blocks) {
+    return hash >> (bits_hash_ - log_blocks);
+  }
+
+  static uint32_t global_slot_id(uint32_t block_id, uint32_t local_slot_id) {
+    return block_id * kSlotsPerBlock + local_slot_id;
+  }
 
   static int num_groupid_bits_from_log_blocks(int log_blocks) {
-    int required_bits = log_blocks + 3;
-    return required_bits <= 8    ? 8
-           : required_bits <= 16 ? 16
-           : required_bits <= 32 ? 32
-                                 : 64;
+    assert(log_blocks >= 0);
+    int required_bits = log_blocks + kLogSlotsPerBlock;
+    assert(required_bits <= 32);
+    return required_bits <= 8 ? 8 : required_bits <= 16 ? 16 : 32;
+  }
+
+  static int num_block_bytes_from_num_groupid_bits(int num_groupid_bits) {
+    return num_groupid_bits + bytes_status_in_block_;
+  }
+
+  static uint32_t group_id_mask_from_num_groupid_bits(int num_groupid_bits) {
+    // num_groupid_bits could be 32, so using 64-bit shifting.
+    return static_cast<uint32_t>((1ULL << num_groupid_bits) - 1ULL);
+  }
+
+  const uint8_t* block_data(uint32_t block_id, int num_block_bytes) const {
+    return block_data(blocks_->data(), block_id, num_block_bytes);
+  }
+
+  uint8_t* mutable_block_data(uint32_t block_id, int num_block_bytes) {
+    return mutable_block_data(blocks_->mutable_data(), block_id, num_block_bytes);
   }
 
+  static constexpr int kSlotsPerBlock = 8;
+
   // Use 32-bit hash for now
   static constexpr int bits_hash_ = 32;
 
  private:
+  static const uint8_t* block_data(const uint8_t* blocks, uint32_t block_id,
+                                   int num_block_bytes) {
+    return blocks + static_cast<int64_t>(block_id) * num_block_bytes;
+  }
+
+  static uint8_t* mutable_block_data(uint8_t* blocks, uint32_t block_id,
+                                     int num_block_bytes) {
+    return blocks + static_cast<int64_t>(block_id) * num_block_bytes;
+  }
+
   // Lookup helpers
 
   /// \brief Scan bytes in block in reverse and stop as soon
@@ -142,15 +188,23 @@ class ARROW_EXPORT SwissTable {
   template <typename T, bool use_selection>
   void extract_group_ids_imp(const int num_keys, const uint16_t* selection,
                              const uint32_t* hashes, const uint8_t* local_slots,
-                             uint32_t* out_group_ids, int elements_offset,
-                             int element_multiplier) const;
+                             uint32_t* out_group_ids) const;
 
-  inline uint64_t next_slot_to_visit(uint64_t block_index, int slot,
-                                     int match_found) const;
+  static constexpr int kLogSlotsPerBlock = 3;
+  static constexpr int kMaxLocalSlot = kSlotsPerBlock - 1;
+  static constexpr uint32_t kLocalSlotMask = (1U << kLogSlotsPerBlock) - 1U;
 
-  inline uint64_t num_groups_for_resize() const;
+  static int64_t num_slots_from_log_blocks(int log_blocks) {
+    return 1LL << (log_blocks + kLogSlotsPerBlock);
+  }
 
-  inline uint64_t wrap_global_slot_id(uint64_t global_slot_id) const;
+  static int64_t num_bytes_total_blocks(int num_block_bytes, int log_blocks) {
+    return (static_cast<int64_t>(num_block_bytes) << log_blocks) + padding_;
+  }
+
+  inline int64_t num_groups_for_resize() const;
+
+  inline uint32_t wrap_global_slot_id(uint32_t global_slot_id) const;
 
   void init_slot_ids(const int num_keys, const uint16_t* selection,
                      const uint32_t* hashes, const uint8_t* local_slots,
@@ -173,8 +227,7 @@ class ARROW_EXPORT SwissTable {
                                 uint8_t* out_match_bitvector,
                                 uint8_t* out_local_slots) const;
   int extract_group_ids_avx2(const int num_keys, const uint32_t* hashes,
-                             const uint8_t* local_slots, uint32_t* out_group_ids,
-                             int byte_offset, int byte_multiplier, int byte_size) const;
+                             const uint8_t* local_slots, uint32_t* out_group_ids) const;
 #endif
 
   void run_comparisons(const int num_keys, const uint16_t* optional_selection_ids,
@@ -220,6 +273,8 @@ class ARROW_EXPORT SwissTable {
     return bits_stamp_;
   }
 
+  static constexpr int bytes_status_in_block_ = 8;
+
   // Number of hash bits stored in slots in a block.
   // The highest bits of hash determine block id.
   // The next set of highest bits is a "stamp" stored in a slot in a block.
@@ -263,46 +318,30 @@ class ARROW_EXPORT SwissTable {
   MemoryPool* pool_;
 };
 
-uint64_t SwissTable::extract_group_id(const uint8_t* block_ptr, int slot,
-                                      uint64_t group_id_mask) const {
-  // Group id values for all 8 slots in the block are bit-packed and follow the status
-  // bytes. We assume here that the number of bits is rounded up to 8, 16, 32 or 64. In
-  // that case we can extract group id using aligned 64-bit word access.
-  int num_group_id_bits = static_cast<int>(ARROW_POPCOUNT64(group_id_mask));
-  assert(num_group_id_bits == 8 || num_group_id_bits == 16 || num_group_id_bits == 32 ||
-         num_group_id_bits == 64);
-
-  int bit_offset = slot * num_group_id_bits;
-  const uint64_t* group_id_bytes =
-      reinterpret_cast<const uint64_t*>(block_ptr) + 1 + (bit_offset >> 6);
-  uint64_t group_id = (*group_id_bytes >> (bit_offset & 63)) & group_id_mask;
-
-  return group_id;
-}
-
-void SwissTable::insert_into_empty_slot(uint32_t slot_id, uint32_t hash,
+void SwissTable::insert_into_empty_slot(uint32_t global_slot_id, uint32_t hash,
                                         uint32_t group_id) {
-  const uint64_t num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  const int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
 
   // We assume here that the number of bits is rounded up to 8, 16, 32 or 64.
   // In that case we can insert group id value using aligned 64-bit word access.
   assert(num_groupid_bits == 8 || num_groupid_bits == 16 || num_groupid_bits == 32 ||
          num_groupid_bits == 64);
 
-  const uint64_t num_block_bytes = (8 + num_groupid_bits);
-  constexpr uint64_t stamp_mask = 0x7f;
+  const int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
+  constexpr uint32_t stamp_mask = 0x7f;
 
-  int start_slot = (slot_id & 7);
-  int stamp = static_cast<int>((hash >> bits_shift_for_block_and_stamp_) & stamp_mask);
-  uint64_t block_id = slot_id >> 3;
-  uint8_t* blockbase = blocks_->mutable_data() + num_block_bytes * block_id;
+  int start_slot = (global_slot_id & kLocalSlotMask);
+  int stamp = (hash >> bits_shift_for_block_and_stamp_) & stamp_mask;
+  uint32_t block_id = global_slot_id >> kLogSlotsPerBlock;
+  uint8_t* blockbase = mutable_block_data(block_id, num_block_bytes);
 
-  blockbase[7 - start_slot] = static_cast<uint8_t>(stamp);
-  int groupid_bit_offset = static_cast<int>(start_slot * num_groupid_bits);
+  blockbase[kMaxLocalSlot - start_slot] = static_cast<uint8_t>(stamp);
+  int groupid_bit_offset = start_slot * num_groupid_bits;
 
   // Block status bytes should start at an address aligned to 8 bytes
   assert((reinterpret_cast<uint64_t>(blockbase) & 7) == 0);
-  uint64_t* ptr = reinterpret_cast<uint64_t*>(blockbase) + 1 + (groupid_bit_offset >> 6);
+  uint64_t* ptr = reinterpret_cast<uint64_t*>(blockbase + bytes_status_in_block_) +
+                  (groupid_bit_offset >> 6);
   *ptr |= (static_cast<uint64_t>(group_id) << (groupid_bit_offset & 63));
 }
 
diff --git a/cpp/src/arrow/compute/key_map_internal_avx2.cc b/cpp/src/arrow/compute/key_map_internal_avx2.cc
index be54f7de63973..353d5a59e67d0 100644
--- a/cpp/src/arrow/compute/key_map_internal_avx2.cc
+++ b/cpp/src/arrow/compute/key_map_internal_avx2.cc
@@ -35,6 +35,7 @@ int SwissTable::early_filter_imp_avx2_x8(const int num_hashes, const uint32_t* h
   constexpr int unroll = 8;
 
   const int num_group_id_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  const int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_group_id_bits);
   const __m256i* vhash_ptr = reinterpret_cast<const __m256i*>(hashes);
   const __m256i vstamp_mask = _mm256_set1_epi32((1 << bits_stamp_) - 1);
 
@@ -53,7 +54,7 @@ int SwissTable::early_filter_imp_avx2_x8(const int num_hashes, const uint32_t* h
     // in order to process 64-bit blocks
     //
     __m256i vblock_offset =
-        _mm256_mullo_epi32(vblock_id, _mm256_set1_epi32(num_group_id_bits + 8));
+        _mm256_mullo_epi32(vblock_id, _mm256_set1_epi32(num_block_bytes));
     __m256i voffset_A = _mm256_and_si256(vblock_offset, _mm256_set1_epi64x(0xffffffff));
     __m256i vstamp_A = _mm256_and_si256(vstamp, _mm256_set1_epi64x(0xffffffff));
     __m256i voffset_B = _mm256_srli_epi64(vblock_offset, 32);
@@ -230,9 +231,10 @@ int SwissTable::early_filter_imp_avx2_x32(const int num_hashes, const uint32_t*
   // Assemble the sequence of block bytes.
   uint64_t block_bytes[16];
   const int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+  const int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
   for (int i = 0; i < (1 << log_blocks_); ++i) {
     uint64_t in_blockbytes =
-        *reinterpret_cast<const uint64_t*>(blocks_->data() + (8 + num_groupid_bits) * i);
+        *reinterpret_cast<const uint64_t*>(block_data(i, num_block_bytes));
     block_bytes[i] = in_blockbytes;
   }
 
@@ -365,14 +367,9 @@ int SwissTable::early_filter_imp_avx2_x32(const int num_hashes, const uint32_t*
 
 int SwissTable::extract_group_ids_avx2(const int num_keys, const uint32_t* hashes,
                                        const uint8_t* local_slots,
-                                       uint32_t* out_group_ids, int byte_offset,
-                                       int byte_multiplier, int byte_size) const {
-  ARROW_DCHECK(byte_size == 1 || byte_size == 2 || byte_size == 4);
-  uint32_t mask = byte_size == 1 ? 0xFF : byte_size == 2 ? 0xFFFF : 0xFFFFFFFF;
-  auto elements = reinterpret_cast<const int*>(blocks_->data() + byte_offset);
+                                       uint32_t* out_group_ids) const {
   constexpr int unroll = 8;
   if (log_blocks_ == 0) {
-    ARROW_DCHECK(byte_size == 1 && byte_offset == 8 && byte_multiplier == 16);
     __m256i block_group_ids =
         _mm256_set1_epi64x(reinterpret_cast<const uint64_t*>(blocks_->data())[1]);
     for (int i = 0; i < num_keys / unroll; ++i) {
@@ -385,33 +382,52 @@ int SwissTable::extract_group_ids_avx2(const int num_keys, const uint32_t* hashe
       _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_group_ids) + i, group_id);
     }
   } else {
+    int num_groupid_bits = num_groupid_bits_from_log_blocks(log_blocks_);
+    int num_groupid_bytes = num_groupid_bits / 8;
+    uint32_t mask = num_groupid_bytes == 1   ? 0xFF
+                    : num_groupid_bytes == 2 ? 0xFFFF
+                                             : 0xFFFFFFFF;
+    int num_block_bytes = num_block_bytes_from_num_groupid_bits(num_groupid_bits);
+    const int* slots_base =
+        reinterpret_cast<const int*>(blocks_->data() + bytes_status_in_block_);
+
     for (int i = 0; i < num_keys / unroll; ++i) {
       __m256i hash = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(hashes) + i);
-      // Extend hash and local_slot to 64-bit to compute 64-bit group id offsets to
-      // gather from. This is to prevent index overflow issues in GH-44513.
-      // NB: Use zero-extend conversion for unsigned hash.
-      __m256i hash_lo = _mm256_cvtepu32_epi64(_mm256_castsi256_si128(hash));
-      __m256i hash_hi = _mm256_cvtepu32_epi64(_mm256_extracti128_si256(hash, 1));
+      __m256i block_id =
+          _mm256_srlv_epi32(hash, _mm256_set1_epi32(bits_hash_ - log_blocks_));
+
       __m256i local_slot =
           _mm256_set1_epi64x(reinterpret_cast<const uint64_t*>(local_slots)[i]);
+
+      // Extend block_id and local_slot to 64-bit to compute 64-bit group id offsets to
+      // gather from. This is to prevent index overflow issues in GH-44513.
       __m256i local_slot_lo = _mm256_shuffle_epi8(
           local_slot, _mm256_setr_epi32(0x80808000, 0x80808080, 0x80808001, 0x80808080,
                                         0x80808002, 0x80808080, 0x80808003, 0x80808080));
       __m256i local_slot_hi = _mm256_shuffle_epi8(
           local_slot, _mm256_setr_epi32(0x80808004, 0x80808080, 0x80808005, 0x80808080,
                                         0x80808006, 0x80808080, 0x80808007, 0x80808080));
-      local_slot_lo = _mm256_mul_epu32(local_slot_lo, _mm256_set1_epi32(byte_size));
-      local_slot_hi = _mm256_mul_epu32(local_slot_hi, _mm256_set1_epi32(byte_size));
-      __m256i pos_lo = _mm256_srli_epi64(hash_lo, bits_hash_ - log_blocks_);
-      __m256i pos_hi = _mm256_srli_epi64(hash_hi, bits_hash_ - log_blocks_);
-      pos_lo = _mm256_mul_epu32(pos_lo, _mm256_set1_epi32(byte_multiplier));
-      pos_hi = _mm256_mul_epu32(pos_hi, _mm256_set1_epi32(byte_multiplier));
-      pos_lo = _mm256_add_epi64(pos_lo, local_slot_lo);
-      pos_hi = _mm256_add_epi64(pos_hi, local_slot_hi);
-      __m128i group_id_lo = _mm256_i64gather_epi32(elements, pos_lo, 1);
-      __m128i group_id_hi = _mm256_i64gather_epi32(elements, pos_hi, 1);
+      local_slot_lo =
+          _mm256_mul_epu32(local_slot_lo, _mm256_set1_epi32(num_groupid_bytes));
+      local_slot_hi =
+          _mm256_mul_epu32(local_slot_hi, _mm256_set1_epi32(num_groupid_bytes));
+
+      // NB: Use zero-extend conversion for unsigned block_id.
+      __m256i slot_offset_lo = _mm256_cvtepu32_epi64(_mm256_castsi256_si128(block_id));
+      __m256i slot_offset_hi =
+          _mm256_cvtepu32_epi64(_mm256_extracti128_si256(block_id, 1));
+      slot_offset_lo =
+          _mm256_mul_epi32(slot_offset_lo, _mm256_set1_epi64x(num_block_bytes));
+      slot_offset_hi =
+          _mm256_mul_epi32(slot_offset_hi, _mm256_set1_epi64x(num_block_bytes));
+      slot_offset_lo = _mm256_add_epi64(slot_offset_lo, local_slot_lo);
+      slot_offset_hi = _mm256_add_epi64(slot_offset_hi, local_slot_hi);
+
+      __m128i group_id_lo = _mm256_i64gather_epi32(slots_base, slot_offset_lo, 1);
+      __m128i group_id_hi = _mm256_i64gather_epi32(slots_base, slot_offset_hi, 1);
       __m256i group_id = _mm256_set_m128i(group_id_hi, group_id_lo);
       group_id = _mm256_and_si256(group_id, _mm256_set1_epi32(mask));
+
       _mm256_storeu_si256(reinterpret_cast<__m256i*>(out_group_ids) + i, group_id);
     }
   }
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index ef9f3c7e1fbf5..91df4155ba9aa 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -327,6 +327,7 @@ static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
   RegisterHashAggregateBasic(registry.get());
   RegisterScalarAggregateBasic(registry.get());
   RegisterScalarAggregateMode(registry.get());
+  RegisterScalarAggregatePivot(registry.get());
   RegisterScalarAggregateQuantile(registry.get());
   RegisterScalarAggregateTDigest(registry.get());
   RegisterScalarAggregateVariance(registry.get());
diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h
index 8287e6305086d..d91e2693a3fba 100644
--- a/cpp/src/arrow/compute/registry_internal.h
+++ b/cpp/src/arrow/compute/registry_internal.h
@@ -63,6 +63,7 @@ void RegisterVectorOptions(FunctionRegistry* registry);
 void RegisterHashAggregateBasic(FunctionRegistry* registry);
 void RegisterScalarAggregateBasic(FunctionRegistry* registry);
 void RegisterScalarAggregateMode(FunctionRegistry* registry);
+void RegisterScalarAggregatePivot(FunctionRegistry* registry);
 void RegisterScalarAggregateQuantile(FunctionRegistry* registry);
 void RegisterScalarAggregateTDigest(FunctionRegistry* registry);
 void RegisterScalarAggregateVariance(FunctionRegistry* registry);
diff --git a/cpp/src/arrow/compute/row/encode_internal.cc b/cpp/src/arrow/compute/row/encode_internal.cc
index 0e2720a286634..0d57cb83bfd09 100644
--- a/cpp/src/arrow/compute/row/encode_internal.cc
+++ b/cpp/src/arrow/compute/row/encode_internal.cc
@@ -279,7 +279,7 @@ void EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
       case 4:
         for (uint32_t i = 0; i < num_rows; ++i) {
           reinterpret_cast<uint32_t*>(col_base)[i] = *reinterpret_cast<const uint32_t*>(
-              rows.fixed_length_rows(start_row + i + offset_within_row));
+              rows.fixed_length_rows(start_row + i) + offset_within_row);
         }
         break;
       case 8:
diff --git a/cpp/src/arrow/compute/row/grouper_test.cc b/cpp/src/arrow/compute/row/grouper_test.cc
index 0b8d8da0a6b7d..3ed2fde2e9439 100644
--- a/cpp/src/arrow/compute/row/grouper_test.cc
+++ b/cpp/src/arrow/compute/row/grouper_test.cc
@@ -825,6 +825,28 @@ TEST(Grouper, DictKey) {
                                   g.grouper_->Consume(dict_span));
 }
 
+// GH-45393: Test combinations of numeric type keys of different lengths.
+TEST(Grouper, MultipleIntKeys) {
+  auto types = NumericTypes();
+  for (auto& t0 : types) {
+    ARROW_SCOPED_TRACE("t0=", t0->ToString());
+    for (auto& t1 : types) {
+      ARROW_SCOPED_TRACE("t1=", t1->ToString());
+      for (auto& t2 : types) {
+        ARROW_SCOPED_TRACE("t2=", t2->ToString());
+        TestGrouper g({t0, t1, t2});
+
+        g.ExpectConsume(R"([[0, 1, 2], [0, 1, 2]])", "[0, 0]");
+        g.ExpectConsume(R"([[0, 1, 2], [null, 1, 2]])", "[0, 1]");
+        g.ExpectConsume(R"([[0, 1, 2], [0, null, 2]])", "[0, 2]");
+        g.ExpectConsume(R"([[0, 1, 2], [0, 1, null]])", "[0, 3]");
+
+        g.ExpectUniques("[[0, 1, 2], [null, 1, 2], [0, null, 2], [0, 1, null]]");
+      }
+    }
+  }
+}
+
 TEST(Grouper, StringInt64Key) {
   TestGrouper g({utf8(), int64()});
 
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 89f32ceb0f906..016d97a0dbc2b 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -40,6 +40,7 @@ class CastOptions;
 
 struct ExecBatch;
 class ExecContext;
+struct ExecValue;
 class KernelContext;
 
 struct Kernel;
diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc
index da3472a9d9a35..c08c01e2ddc55 100644
--- a/cpp/src/arrow/csv/parser.cc
+++ b/cpp/src/arrow/csv/parser.cc
@@ -126,16 +126,30 @@ class ValueDescWriter {
         {static_cast<uint32_t>(parsed_writer->size()) & 0x7fffffffU, quoted_});
   }
 
-  void Finish(std::shared_ptr<Buffer>* out_values) {
-    ARROW_CHECK_OK(values_buffer_->Resize(values_size_ * sizeof(*values_)));
-    *out_values = values_buffer_;
+  Result<std::shared_ptr<Buffer>> Finish() {
+    RETURN_NOT_OK(values_buffer_->Resize(values_size_ * sizeof(*values_)));
+    return std::move(values_buffer_);
+  }
+
+  const Status& status() const { return status_; }
+
+  // Convenience error-checking factory. The arguments are forwarded to the
+  // Derived class constructor.
+  template <typename... Args>
+  static Result<Derived> Make(Args&&... args) {
+    auto self = Derived(std::forward<Args>(args)...);
+    RETURN_NOT_OK(self.status());
+    return self;
   }
 
  protected:
   ValueDescWriter(MemoryPool* pool, int64_t values_capacity)
-      : values_size_(0), values_capacity_(values_capacity) {
-    values_buffer_ = *AllocateResizableBuffer(values_capacity_ * sizeof(*values_), pool);
-    values_ = reinterpret_cast<ParsedValueDesc*>(values_buffer_->mutable_data());
+      : values_size_(0), values_capacity_(values_capacity), status_(Status::OK()) {
+    status_ &= AllocateResizableBuffer(values_capacity_ * sizeof(*values_), pool)
+                   .Value(&values_buffer_);
+    if (status_.ok()) {
+      values_ = reinterpret_cast<ParsedValueDesc*>(values_buffer_->mutable_data());
+    }
   }
 
   std::shared_ptr<ResizableBuffer> values_buffer_;
@@ -145,6 +159,7 @@ class ValueDescWriter {
   bool quoted_;
   // Checkpointing, for when an incomplete line is encountered at end of block
   int64_t saved_values_size_;
+  Status status_;
 };
 
 // A helper class handling a growable buffer for values offsets.  This class is
@@ -157,11 +172,21 @@ class ResizableValueDescWriter : public ValueDescWriter<ResizableValueDescWriter
 
   void PushValue(ParsedValueDesc v) {
     if (ARROW_PREDICT_FALSE(values_size_ == values_capacity_)) {
-      values_capacity_ = values_capacity_ * 2;
-      ARROW_CHECK_OK(values_buffer_->Resize(values_capacity_ * sizeof(*values_)));
-      values_ = reinterpret_cast<ParsedValueDesc*>(values_buffer_->mutable_data());
+      int64_t new_capacity = values_capacity_ * 2;
+      auto resize_status = values_buffer_->Resize(new_capacity * sizeof(*values_));
+      if (resize_status.ok()) {
+        values_ = reinterpret_cast<ParsedValueDesc*>(values_buffer_->mutable_data());
+        values_capacity_ = new_capacity;
+      }
+      status_ &= std::move(resize_status);
+    }
+    // The `values_` pointer may have become invalid if the `Resize` call above failed.
+    // Note that ResizableValueDescWriter is less performance-critical than
+    // PresizedValueDescWriter, as it should only be called on the first line(s)
+    // of CSV data.
+    if (ARROW_PREDICT_TRUE(status_.ok())) {
+      values_[values_size_++] = v;
     }
-    values_[values_size_++] = v;
   }
 };
 
@@ -171,12 +196,26 @@ class ResizableValueDescWriter : public ValueDescWriter<ResizableValueDescWriter
 // faster CSV parsing code.
 class PresizedValueDescWriter : public ValueDescWriter<PresizedValueDescWriter> {
  public:
+  // The number of offsets being written will be `1 + num_rows * num_cols`,
+  // however we allow for one extraneous write in case of excessive columns,
+  // hence `2 + num_rows * num_cols` (see explanation in PushValue below).
   PresizedValueDescWriter(MemoryPool* pool, int32_t num_rows, int32_t num_cols)
-      : ValueDescWriter(pool, /*values_capacity=*/1 + num_rows * num_cols) {}
+      : ValueDescWriter(pool, /*values_capacity=*/2 + num_rows * num_cols) {}
 
   void PushValue(ParsedValueDesc v) {
     DCHECK_LT(values_size_, values_capacity_);
-    values_[values_size_++] = v;
+    values_[values_size_] = v;
+    // We must take care not to write past the buffer's end if the line being
+    // parsed has more than `num_cols` columns. The obvious solution of setting
+    // an error status hurts too much on benchmarks, which is why we instead
+    // cap `values_size_` to stay inside the buffer.
+    //
+    // Not setting an error immediately is not a problem since the `num_cols`
+    // mismatch is detected later in ParseLine.
+    //
+    // Note that we want `values_size_` to reflect the number of written values
+    // in the nominal case, which is why we choose a slightly larger `values_capacity_`.
+    values_size_ += (values_size_ != values_capacity_ - 1);
   }
 };
 
@@ -464,6 +503,7 @@ class BlockParserImpl {
         RETURN_NOT_OK((ParseLine<SpecializedOptions, true>(values_writer, parsed_writer,
                                                            data, data_end, is_final,
                                                            &line_end, bulk_filter)));
+        RETURN_NOT_OK(values_writer->status());
         if (line_end == data) {
           // Cannot parse any further
           *finished_parsing = true;
@@ -477,6 +517,7 @@ class BlockParserImpl {
         RETURN_NOT_OK((ParseLine<SpecializedOptions, false>(values_writer, parsed_writer,
                                                             data, data_end, is_final,
                                                             &line_end, bulk_filter)));
+        RETURN_NOT_OK(values_writer->status());
         if (line_end == data) {
           // Cannot parse any further
           *finished_parsing = true;
@@ -496,8 +537,7 @@ class BlockParserImpl {
     }
 
     // Append new buffers and update size
-    std::shared_ptr<Buffer> values_buffer;
-    values_writer->Finish(&values_buffer);
+    ARROW_ASSIGN_OR_RAISE(auto values_buffer, values_writer->Finish());
     if (values_buffer->size() > 0) {
       values_size_ +=
           static_cast<int32_t>(values_buffer->size() / sizeof(ParsedValueDesc) - 1);
@@ -535,7 +575,7 @@ class BlockParserImpl {
         // Can't presize values when the number of columns is not known, first parse
         // a single line
         const int32_t rows_in_chunk = 1;
-        ResizableValueDescWriter values_writer(pool_);
+        ARROW_ASSIGN_OR_RAISE(auto values_writer, ResizableValueDescWriter::Make(pool_));
         values_writer.Start(parsed_writer);
 
         RETURN_NOT_OK(ParseChunk<SpecializedOptions>(
@@ -560,7 +600,9 @@ class BlockParserImpl {
           rows_in_chunk = std::min(kTargetChunkSize, max_num_rows_ - batch_.num_rows_);
         }
 
-        PresizedValueDescWriter values_writer(pool_, rows_in_chunk, batch_.num_cols_);
+        ARROW_ASSIGN_OR_RAISE(
+            auto values_writer,
+            PresizedValueDescWriter::Make(pool_, rows_in_chunk, batch_.num_cols_));
         values_writer.Start(parsed_writer);
 
         RETURN_NOT_OK(ParseChunk<SpecializedOptions>(
diff --git a/cpp/src/arrow/csv/parser_test.cc b/cpp/src/arrow/csv/parser_test.cc
index dd3d025202018..0b5e7175093f8 100644
--- a/cpp/src/arrow/csv/parser_test.cc
+++ b/cpp/src/arrow/csv/parser_test.cc
@@ -17,7 +17,9 @@
 
 #include <algorithm>
 #include <cstdint>
+#include <sstream>
 #include <string>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -586,6 +588,28 @@ TEST(BlockParser, QuotesSpecial) {
   }
 }
 
+std::vector<std::string> MismatchingNumColumns(int32_t num_cols, int32_t mismatch,
+                                               int64_t extra_lines = 0) {
+  auto write_line = [](int32_t num_cols, std::string_view prefix, std::ostream* out) {
+    for (int32_t i = 0; i < num_cols; ++i) {
+      *out << prefix << i << ",";
+    }
+    out->seekp(-1, std::ios_base::cur);
+    *out << "\n";
+  };
+
+  std::stringstream csv_data;
+  // Output first line with `num_cols` columns
+  write_line(num_cols, "a", &csv_data);
+  // Output second line with mismatching number of columns
+  write_line(num_cols + mismatch, "b", &csv_data);
+  // Output extra lines with `num_cols` columns each
+  for (int64_t i = 0; i < extra_lines; ++i) {
+    write_line(num_cols, "c", &csv_data);
+  }
+  return {csv_data.str()};
+}
+
 TEST(BlockParser, MismatchingNumColumns) {
   uint32_t out_size;
   {
@@ -621,6 +645,25 @@ TEST(BlockParser, MismatchingNumColumns) {
     EXPECT_RAISES_WITH_MESSAGE_THAT(
         Invalid, testing::HasSubstr("CSV parse error: Expected 2 columns, got 1: a"), st);
   }
+  // Vary the number of columns and mismatch, to catch buffer overflow issues
+  for (int32_t num_cols : {1, 2, 5, 100}) {
+    ARROW_SCOPED_TRACE("num_cols = ", num_cols);
+    for (int32_t mismatch : {-5, -1, 1, 5, 10, 50, 1024, 32767}) {
+      if (mismatch + num_cols <= 0) {
+        continue;
+      }
+      ARROW_SCOPED_TRACE("mismatch = ", mismatch);
+      // Try to parse CSV data
+      auto csv_data = MismatchingNumColumns(num_cols, mismatch);
+      BlockParser parser(ParseOptions::Defaults(), num_cols, /*first_row=*/1);
+      Status st = Parse(parser, MakeCSVData(csv_data), &out_size);
+      std::stringstream expected_error;
+      expected_error << "Row #2: Expected " << num_cols << " columns, got "
+                     << num_cols + mismatch << ":";
+      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr(expected_error.str()),
+                                      st);
+    }
+  }
 }
 
 TEST(BlockParser, MismatchingNumColumnsHandler) {
@@ -724,6 +767,35 @@ TEST(BlockParser, MismatchingNumColumnsHandler) {
 
     ASSERT_NO_FATAL_FAILURE(AssertLastRowEq(parser, {"j", "k"}, {false, false}));
   }
+
+  // Vary the number of columns and mismatch, to catch buffer overflow issues
+  for (int32_t num_cols : {1, 2, 5, 100}) {
+    ARROW_SCOPED_TRACE("num_cols = ", num_cols);
+    for (int32_t mismatch : {-5, -1, 1, 5, 10, 50, 1024, 32767}) {
+      if (mismatch + num_cols <= 0) {
+        continue;
+      }
+      ARROW_SCOPED_TRACE("mismatch = ", mismatch);
+      // Parse CSV data
+      auto csv_data = MismatchingNumColumns(num_cols, mismatch, /*extra_lines=*/1);
+      ParseOptions opts = ParseOptions::Defaults();
+      CustomHandler handler;
+      opts.invalid_row_handler = handler;
+      BlockParser parser(opts, num_cols, /*first_row=*/1);
+      ASSERT_NO_FATAL_FAILURE(AssertParseOk(parser, MakeCSVData(csv_data)));
+      ASSERT_EQ(2, parser.num_rows());
+      ASSERT_EQ(3, parser.total_num_rows());
+      ASSERT_EQ(1, handler.rows.size());
+      const auto& invalid_row = handler.rows[0];
+      ASSERT_EQ(num_cols, invalid_row.first.expected_columns);
+      ASSERT_EQ(num_cols + mismatch, invalid_row.first.actual_columns);
+      ASSERT_EQ("b0", invalid_row.second.substr(0, 2));
+      std::vector<std::string> last_row;
+      GetLastRow(parser, &last_row);
+      ASSERT_EQ(last_row.front(), "c0");
+      ASSERT_EQ(last_row.back(), "c" + std::to_string(num_cols - 1));
+    }
+  }
 }
 
 TEST(BlockParser, Escaping) {
diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc
index 8a3df182474bf..d4c969234d5fe 100644
--- a/cpp/src/arrow/dataset/dataset.cc
+++ b/cpp/src/arrow/dataset/dataset.cc
@@ -21,6 +21,7 @@
 #include "arrow/acero/util.h"
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/dataset_internal.h"
+#include "arrow/dataset/projector.h"
 #include "arrow/dataset/scanner.h"
 #include "arrow/table.h"
 #include "arrow/util/async_generator.h"
@@ -41,7 +42,7 @@ const compute::Expression Fragment::kNoPartitionInformation = compute::literal(t
 Fragment::Fragment(compute::Expression partition_expression,
                    std::shared_ptr<Schema> physical_schema)
     : partition_expression_(std::move(partition_expression)),
-      physical_schema_(std::move(physical_schema)) {}
+      given_physical_schema_(std::move(physical_schema)) {}
 
 Future<std::shared_ptr<InspectedFragment>> Fragment::InspectFragment(
     const FragmentScanOptions* format_options, compute::ExecContext* exec_context) {
@@ -75,8 +76,14 @@ Future<std::optional<int64_t>> Fragment::CountRows(compute::Expression,
   return Future<std::optional<int64_t>>::MakeFinished(std::nullopt);
 }
 
+Status Fragment::ClearCachedMetadata() {
+  auto lock = physical_schema_mutex_.Lock();
+  physical_schema_.reset();
+  return Status::OK();
+}
+
 Result<std::shared_ptr<Schema>> InMemoryFragment::ReadPhysicalSchemaImpl() {
-  return physical_schema_;
+  return given_physical_schema_;
 }
 
 InMemoryFragment::InMemoryFragment(std::shared_ptr<Schema> schema,
@@ -84,7 +91,8 @@ InMemoryFragment::InMemoryFragment(std::shared_ptr<Schema> schema,
                                    compute::Expression partition_expression)
     : Fragment(std::move(partition_expression), std::move(schema)),
       record_batches_(std::move(record_batches)) {
-  DCHECK_NE(physical_schema_, nullptr);
+  DCHECK_NE(given_physical_schema_, nullptr);
+  physical_schema_ = given_physical_schema_;
 }
 
 InMemoryFragment::InMemoryFragment(RecordBatchVector record_batches,
diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h
index 1cdd92d5c42f2..5c788ef5581c6 100644
--- a/cpp/src/arrow/dataset/dataset.h
+++ b/cpp/src/arrow/dataset/dataset.h
@@ -190,6 +190,13 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
   virtual Future<std::optional<int64_t>> CountRows(
       compute::Expression predicate, const std::shared_ptr<ScanOptions>& options);
 
+  /// \brief Clear any metadata that may have been cached by this object.
+  ///
+  /// A fragment may typically cache metadata to speed up repeated accesses.
+  /// In use cases when memory use is more critical than CPU time, calling
+  /// this function can help reclaim memory.
+  virtual Status ClearCachedMetadata();
+
   virtual std::string type_name() const = 0;
   virtual std::string ToString() const { return type_name(); }
 
@@ -210,7 +217,10 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this<Fragment> {
 
   util::Mutex physical_schema_mutex_;
   compute::Expression partition_expression_ = compute::literal(true);
+  // The physical schema that is inferred from the Fragment
   std::shared_ptr<Schema> physical_schema_;
+  // The physical schema that was passed to the Fragment constructor
+  std::shared_ptr<Schema> given_physical_schema_;
 };
 
 /// \brief Per-scan options for fragment(s) in a dataset.
diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc
index eb3fd0e304750..cd23429bf5ef4 100644
--- a/cpp/src/arrow/dataset/dataset_test.cc
+++ b/cpp/src/arrow/dataset/dataset_test.cc
@@ -22,6 +22,7 @@
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/discovery.h"
 #include "arrow/dataset/partition.h"
+#include "arrow/dataset/projector.h"
 #include "arrow/dataset/test_util_internal.h"
 #include "arrow/filesystem/mockfs.h"
 #include "arrow/stl.h"
diff --git a/cpp/src/arrow/dataset/discovery.cc b/cpp/src/arrow/dataset/discovery.cc
index 7e5acba992bec..b502d1d19472f 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -29,6 +29,7 @@
 #include "arrow/dataset/partition.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/filesystem/path_util.h"
+#include "arrow/record_batch.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
diff --git a/cpp/src/arrow/dataset/file_base.cc b/cpp/src/arrow/dataset/file_base.cc
index 6a97b51cf2815..1825cef8e94a7 100644
--- a/cpp/src/arrow/dataset/file_base.cc
+++ b/cpp/src/arrow/dataset/file_base.cc
@@ -33,6 +33,7 @@
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/dataset_writer.h"
 #include "arrow/dataset/forest_internal.h"
+#include "arrow/dataset/projector.h"
 #include "arrow/dataset/scanner.h"
 #include "arrow/dataset/subtree_internal.h"
 #include "arrow/filesystem/filesystem.h"
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index ca391b4354c07..cf51ea18d7a02 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -802,14 +802,12 @@ Status ParquetFileFragment::EnsureCompleteMetadata(parquet::arrow::FileReader* r
     return EnsureCompleteMetadata(reader.get());
   }
 
-  std::shared_ptr<Schema> schema;
-  RETURN_NOT_OK(reader->GetSchema(&schema));
-  if (physical_schema_ && !physical_schema_->Equals(*schema)) {
+  RETURN_NOT_OK(reader->GetSchema(&physical_schema_));
+  if (given_physical_schema_ && !given_physical_schema_->Equals(*physical_schema_)) {
     return Status::Invalid("Fragment initialized with physical schema ",
-                           *physical_schema_, " but ", source_.path(), " has schema ",
-                           *schema);
+                           *given_physical_schema_, " but ", source_.path(),
+                           " has schema ", *physical_schema_);
   }
-  physical_schema_ = std::move(schema);
 
   if (!row_groups_) {
     row_groups_ = Iota(reader->num_row_groups());
@@ -835,6 +833,10 @@ Status ParquetFileFragment::SetMetadata(
   DCHECK_EQ(manifest_->descr, original_metadata_->schema())
       << "SchemaDescriptor should be owned by the original FileMetaData";
 
+  if (!physical_schema_) {
+    physical_schema_ = given_physical_schema_;
+  }
+
   statistics_expressions_.resize(row_groups_->size(), compute::literal(true));
   statistics_expressions_complete_.resize(manifest_->descr->num_columns(), false);
 
@@ -850,6 +852,13 @@ Status ParquetFileFragment::SetMetadata(
   return Status::OK();
 }
 
+Status ParquetFileFragment::ClearCachedMetadata() {
+  metadata_.reset();
+  manifest_.reset();
+  original_metadata_.reset();
+  return FileFragment::ClearCachedMetadata();
+}
+
 Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(
     compute::Expression predicate) {
   RETURN_NOT_OK(EnsureCompleteMetadata());
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 63d8fd729223c..d496ff6eb87bf 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -165,11 +165,16 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   }
 
   /// \brief Return the FileMetaData associated with this fragment.
+  ///
+  /// This may return nullptr if the fragment wasn't scanned yet, or if
+  /// `ScanOptions::cache_metadata` was disabled.
   std::shared_ptr<parquet::FileMetaData> metadata();
 
   /// \brief Ensure this fragment's FileMetaData is in memory.
   Status EnsureCompleteMetadata(parquet::arrow::FileReader* reader = NULLPTR);
 
+  Status ClearCachedMetadata() override;
+
   /// \brief Return fragment which selects a filtered subset of this fragment's RowGroups.
   Result<std::shared_ptr<Fragment>> Subset(compute::Expression predicate);
   Result<std::shared_ptr<Fragment>> Subset(std::vector<int> row_group_ids);
diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
index 0287d593d12d3..39f45b7460bef 100644
--- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
@@ -15,6 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <arrow/array/builder_binary.h>
+#include <arrow/array/builder_nested.h>
+#include <arrow/array/builder_primitive.h>
+#include <arrow/util/logging.h>
+#include <boost/container/container_fwd.hpp>
 #include <string_view>
 
 #include "gtest/gtest.h"
@@ -43,7 +48,7 @@ constexpr std::string_view kFooterKeyMasterKeyId = "footer_key";
 constexpr std::string_view kFooterKeyName = "footer_key";
 constexpr std::string_view kColumnMasterKey = "1234567890123450";
 constexpr std::string_view kColumnMasterKeyId = "col_key";
-constexpr std::string_view kColumnKeyMapping = "col_key: a";
+constexpr std::string_view kColumnName = "a";
 constexpr std::string_view kBaseDir = "";
 
 using arrow::internal::checked_pointer_cast;
@@ -90,7 +95,9 @@ class DatasetEncryptionTestBase : public ::testing::Test {
     auto encryption_config =
         std::make_shared<parquet::encryption::EncryptionConfiguration>(
             std::string(kFooterKeyName));
-    encryption_config->column_keys = kColumnKeyMapping;
+    std::stringstream column_key;
+    column_key << kColumnMasterKeyId << ": " << ColumnKey();
+    encryption_config->column_keys = column_key.str();
     auto parquet_encryption_config = std::make_shared<ParquetEncryptionConfig>();
     // Directly assign shared_ptr objects to ParquetEncryptionConfig members
     parquet_encryption_config->crypto_factory = crypto_factory_;
@@ -118,6 +125,7 @@ class DatasetEncryptionTestBase : public ::testing::Test {
   }
 
   virtual void PrepareTableAndPartitioning() = 0;
+  virtual std::string_view ColumnKey() { return kColumnName; }
 
   void TestScanDataset() {
     // Create decryption properties.
@@ -179,8 +187,9 @@ class DatasetEncryptionTest : public DatasetEncryptionTestBase {
   // The dataset is partitioned using a Hive partitioning scheme.
   void PrepareTableAndPartitioning() override {
     // Prepare table data.
-    auto table_schema = schema({field("a", int64()), field("c", int64()),
-                                field("e", int64()), field("part", utf8())});
+    auto table_schema =
+        schema({field(std::string(kColumnName), int64()), field("c", int64()),
+                field("e", int64()), field("part", utf8())});
     table_ = TableFromJSON(table_schema, {R"([
                           [ 0, 9, 1, "a" ],
                           [ 1, 8, 2, "a" ],
@@ -240,6 +249,187 @@ TEST_F(DatasetEncryptionTest, ReadSingleFile) {
   ASSERT_EQ(checked_pointer_cast<Int64Array>(table->column(2)->chunk(0))->GetView(0), 1);
 }
 
+class NestedFieldsEncryptionTest : public DatasetEncryptionTestBase,
+                                   public ::testing::WithParamInterface<std::string> {
+ public:
+  NestedFieldsEncryptionTest() : rand_gen(0) {}
+
+  // The dataset is partitioned using a Hive partitioning scheme.
+  void PrepareTableAndPartitioning() override {
+    // Prepare table and partitioning.
+    auto table_schema = schema({field("a", std::move(column_type_))});
+    table_ = arrow::Table::Make(table_schema, {column_data_});
+    partitioning_ = std::make_shared<dataset::DirectoryPartitioning>(arrow::schema({}));
+  }
+
+  std::string_view ColumnKey() override { return GetParam(); }
+
+ protected:
+  std::shared_ptr<DataType> column_type_;
+  std::shared_ptr<Array> column_data_;
+  arrow::random::RandomArrayGenerator rand_gen;
+};
+
+class ListFieldEncryptionTest : public NestedFieldsEncryptionTest {
+ public:
+  ListFieldEncryptionTest() {
+    arrow::MemoryPool* pool = arrow::default_memory_pool();
+    auto value_builder = std::make_shared<arrow::Int32Builder>(pool);
+    arrow::ListBuilder list_builder = arrow::ListBuilder(pool, value_builder);
+    ARROW_CHECK_OK(list_builder.Append());
+    ARROW_CHECK_OK(value_builder->Append(1));
+    ARROW_CHECK_OK(value_builder->Append(2));
+    ARROW_CHECK_OK(value_builder->Append(3));
+    ARROW_CHECK_OK(list_builder.Append());
+    ARROW_CHECK_OK(value_builder->Append(4));
+    ARROW_CHECK_OK(value_builder->Append(5));
+    ARROW_CHECK_OK(list_builder.Append());
+    ARROW_CHECK_OK(value_builder->Append(6));
+
+    std::shared_ptr<arrow::Array> list_array;
+    arrow::Status status = list_builder.Finish(&list_array);
+
+    column_type_ = list(int32());
+    column_data_ = list_array;
+  }
+};
+
+class MapFieldEncryptionTest : public NestedFieldsEncryptionTest {
+ public:
+  MapFieldEncryptionTest() : NestedFieldsEncryptionTest() {
+    arrow::MemoryPool* pool = arrow::default_memory_pool();
+    auto map_type = map(utf8(), int32());
+    auto key_builder = std::make_shared<arrow::StringBuilder>(pool);
+    auto item_builder = std::make_shared<arrow::Int32Builder>(pool);
+    auto map_builder =
+        std::make_shared<arrow::MapBuilder>(pool, key_builder, item_builder, map_type);
+    ARROW_CHECK_OK(map_builder->Append());
+    ARROW_CHECK_OK(key_builder->Append("one"));
+    ARROW_CHECK_OK(item_builder->Append(1));
+    ARROW_CHECK_OK(map_builder->Append());
+    ARROW_CHECK_OK(key_builder->Append("two"));
+    ARROW_CHECK_OK(item_builder->Append(2));
+    ARROW_CHECK_OK(map_builder->Append());
+    ARROW_CHECK_OK(key_builder->Append("three"));
+    ARROW_CHECK_OK(item_builder->Append(3));
+
+    std::shared_ptr<arrow::Array> map_array;
+    ARROW_CHECK_OK(map_builder->Finish(&map_array));
+
+    column_type_ = map_type;
+    column_data_ = map_array;
+  }
+};
+
+class StructFieldEncryptionTest : public NestedFieldsEncryptionTest {
+ public:
+  StructFieldEncryptionTest() : NestedFieldsEncryptionTest() {
+    arrow::MemoryPool* pool = arrow::default_memory_pool();
+    auto struct_type = struct_({field("f1", int32()), field("f2", utf8())});
+    auto f1_builder = std::make_shared<arrow::Int32Builder>(pool);
+    auto f2_builder = std::make_shared<arrow::StringBuilder>(pool);
+    std::vector<std::shared_ptr<ArrayBuilder>> value_builders = {f1_builder, f2_builder};
+    auto struct_builder = std::make_shared<arrow::StructBuilder>(std::move(struct_type),
+                                                                 pool, value_builders);
+    ARROW_CHECK_OK(struct_builder->Append());
+    ARROW_CHECK_OK(f1_builder->Append(1));
+    ARROW_CHECK_OK(f2_builder->Append("one"));
+    ARROW_CHECK_OK(struct_builder->Append());
+    ARROW_CHECK_OK(f1_builder->Append(2));
+    ARROW_CHECK_OK(f2_builder->Append("two"));
+    ARROW_CHECK_OK(struct_builder->Append());
+    ARROW_CHECK_OK(f1_builder->Append(3));
+    ARROW_CHECK_OK(f2_builder->Append("three"));
+
+    std::shared_ptr<arrow::Array> struct_array;
+    ARROW_CHECK_OK(struct_builder->Finish(&struct_array));
+
+    column_type_ = struct_type;
+    column_data_ = struct_array;
+  }
+};
+
+class DeepNestedFieldEncryptionTest : public NestedFieldsEncryptionTest {
+ public:
+  DeepNestedFieldEncryptionTest() : NestedFieldsEncryptionTest() {
+    arrow::MemoryPool* pool = arrow::default_memory_pool();
+
+    auto struct_type = struct_({field("f1", int32()), field("f2", utf8())});
+    auto f1_builder = std::make_shared<arrow::Int32Builder>(pool);
+    auto f2_builder = std::make_shared<arrow::StringBuilder>(pool);
+    std::vector<std::shared_ptr<ArrayBuilder>> value_builders = {f1_builder, f2_builder};
+    auto struct_builder = std::make_shared<arrow::StructBuilder>(std::move(struct_type),
+                                                                 pool, value_builders);
+
+    auto map_type = map(int32(), struct_type);
+    auto key_builder = std::make_shared<arrow::Int32Builder>(pool);
+    auto item_builder = struct_builder;
+    auto map_builder =
+        std::make_shared<arrow::MapBuilder>(pool, key_builder, item_builder, map_type);
+
+    auto list_type = list(map_type);
+    auto value_builder = map_builder;
+    arrow::ListBuilder list_builder = arrow::ListBuilder(pool, value_builder);
+
+    ARROW_CHECK_OK(list_builder.Append());
+    ARROW_CHECK_OK(value_builder->Append());
+
+    ARROW_CHECK_OK(key_builder->Append(1));
+    ARROW_CHECK_OK(item_builder->Append());
+    ARROW_CHECK_OK(f1_builder->Append(1));
+    ARROW_CHECK_OK(f2_builder->Append("one"));
+
+    ARROW_CHECK_OK(key_builder->Append(1));
+    ARROW_CHECK_OK(item_builder->Append());
+    ARROW_CHECK_OK(f1_builder->Append(2));
+    ARROW_CHECK_OK(f2_builder->Append("two"));
+
+    ARROW_CHECK_OK(value_builder->Append());
+
+    ARROW_CHECK_OK(key_builder->Append(3));
+    ARROW_CHECK_OK(item_builder->Append());
+    ARROW_CHECK_OK(f1_builder->Append(3));
+    ARROW_CHECK_OK(f2_builder->Append("three"));
+
+    ARROW_CHECK_OK(list_builder.Append());
+    ARROW_CHECK_OK(value_builder->Append());
+
+    ARROW_CHECK_OK(key_builder->Append(4));
+    ARROW_CHECK_OK(item_builder->Append());
+    ARROW_CHECK_OK(f1_builder->Append(4));
+    ARROW_CHECK_OK(f2_builder->Append("four"));
+
+    std::shared_ptr<arrow::Array> list_array;
+    arrow::Status status = list_builder.Finish(&list_array);
+
+    column_type_ = list_type;
+    column_data_ = list_array;
+  }
+};
+
+// Test writing and reading encrypted nested fields
+INSTANTIATE_TEST_SUITE_P(List, ListFieldEncryptionTest,
+                         ::testing::Values("a", "a.list.element"));
+INSTANTIATE_TEST_SUITE_P(Map, MapFieldEncryptionTest,
+                         ::testing::Values("a", "a.key", "a.value", "a.key_value.key",
+                                           "a.key_value.value"));
+INSTANTIATE_TEST_SUITE_P(Struct, StructFieldEncryptionTest,
+                         ::testing::Values("a", "a.f1", "a.f2"));
+INSTANTIATE_TEST_SUITE_P(DeepNested, DeepNestedFieldEncryptionTest,
+                         ::testing::Values("a", "a.list.element",
+                                           "a.list.element.key_value.key",
+                                           "a.list.element.key_value.value",
+                                           "a.list.element.key_value.value.f1",
+                                           "a.list.element.key_value.value.f2"));
+
+TEST_P(ListFieldEncryptionTest, ColumnKeys) { TestScanDataset(); }
+
+TEST_P(MapFieldEncryptionTest, ColumnKeys) { TestScanDataset(); }
+
+TEST_P(StructFieldEncryptionTest, ColumnKeys) { TestScanDataset(); }
+
+TEST_P(DeepNestedFieldEncryptionTest, ColumnKeys) { TestScanDataset(); }
+
 // GH-39444: This test covers the case where parquet dataset scanner crashes when
 // processing encrypted datasets over 2^15 rows in multi-threaded mode.
 class LargeRowEncryptionTest : public DatasetEncryptionTestBase {
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 536fcdb21c107..c97ae520d8e1d 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -47,6 +47,7 @@
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::checked_pointer_cast;
 
 namespace dataset {
@@ -325,6 +326,7 @@ TEST_F(TestParquetFileFormat, CachedMetadata) {
   FileSource source(tracked_input);
   ASSERT_OK_AND_ASSIGN(auto fragment,
                        format_->MakeFragment(std::move(source), literal(true)));
+  auto pq_fragment = checked_cast<ParquetFileFragment*>(fragment.get());
 
   // Read the file the first time, will read metadata
   auto options = std::make_shared<ScanOptions>();
@@ -336,17 +338,28 @@ TEST_F(TestParquetFileFormat, CachedMetadata) {
   options->projection = projection_descr.expression;
   ASSERT_OK_AND_ASSIGN(auto generator, fragment->ScanBatchesAsync(options));
   ASSERT_FINISHES_OK(CollectAsyncGenerator(std::move(generator)));
+  ASSERT_NE(nullptr, pq_fragment->metadata());
 
   ASSERT_GT(tracked_input->bytes_read(), 0);
   int64_t bytes_read_first_time = tracked_input->bytes_read();
-
   ASSERT_OK(tracked_input->Seek(0));
 
   // Read the file the second time, should not read metadata
+  tracked_input->ResetStats();
+  ASSERT_OK_AND_ASSIGN(generator, fragment->ScanBatchesAsync(options));
+  ASSERT_FINISHES_OK(CollectAsyncGenerator(std::move(generator)));
+  ASSERT_LT(tracked_input->bytes_read(), bytes_read_first_time);
+
+  // Clear cached metadata
+  ASSERT_OK(fragment->ClearCachedMetadata());
+  ASSERT_EQ(nullptr, pq_fragment->metadata());
+
+  // Read the file a third time, should read metadata
+  tracked_input->ResetStats();
   ASSERT_OK_AND_ASSIGN(generator, fragment->ScanBatchesAsync(options));
   ASSERT_FINISHES_OK(CollectAsyncGenerator(std::move(generator)));
-  int64_t bytes_read_second_time = tracked_input->bytes_read() - bytes_read_first_time;
-  ASSERT_LT(bytes_read_second_time, bytes_read_first_time);
+  ASSERT_EQ(tracked_input->bytes_read(), bytes_read_first_time);
+  ASSERT_NE(nullptr, pq_fragment->metadata());
 }
 
 TEST_F(TestParquetFileFormat, MultithreadedScan) {
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 0df8fd802656c..8fa45ac36d218 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -37,6 +37,7 @@
 #include "arrow/dataset/dataset.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/plan.h"
+#include "arrow/record_batch.h"
 #include "arrow/table.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/config.h"
@@ -318,10 +319,14 @@ Result<EnumeratedRecordBatchGenerator> FragmentToBatches(
       RecordBatch::Make(options->dataset_schema, /*num_rows=*/0, std::move(columns)));
   auto enumerated_batch_gen = MakeEnumeratedGenerator(std::move(batch_gen));
 
-  auto combine_fn =
-      [fragment](const Enumerated<std::shared_ptr<RecordBatch>>& record_batch) {
-        return EnumeratedRecordBatch{record_batch, fragment};
-      };
+  auto combine_fn = [fragment, cache_metadata = options->cache_metadata](
+                        const Enumerated<std::shared_ptr<RecordBatch>>& record_batch) {
+    if (!cache_metadata && record_batch.last) {
+      ARROW_WARN_NOT_OK(fragment.value->ClearCachedMetadata(),
+                        "Could not clear cached metadata on fragment");
+    }
+    return EnumeratedRecordBatch{record_batch, fragment};
+  };
 
   return MakeMappedGenerator(enumerated_batch_gen, std::move(combine_fn));
 }
@@ -343,7 +348,7 @@ class OneShotFragment : public Fragment {
   OneShotFragment(std::shared_ptr<Schema> schema, RecordBatchIterator batch_it)
       : Fragment(compute::literal(true), std::move(schema)),
         batch_it_(std::move(batch_it)) {
-    DCHECK_NE(physical_schema_, nullptr);
+    DCHECK_NE(given_physical_schema_, nullptr);
   }
   Status CheckConsumed() {
     if (!batch_it_) return Status::Invalid("OneShotFragment was already scanned");
@@ -362,7 +367,7 @@ class OneShotFragment : public Fragment {
 
  protected:
   Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
-    return physical_schema_;
+    return given_physical_schema_;
   }
 
   RecordBatchIterator batch_it_;
@@ -936,6 +941,11 @@ Status ScannerBuilder::UseThreads(bool use_threads) {
   return Status::OK();
 }
 
+Status ScannerBuilder::CacheMetadata(bool cache_metadata) {
+  scan_options_->cache_metadata = cache_metadata;
+  return Status::OK();
+}
+
 Status ScannerBuilder::BatchSize(int64_t batch_size) {
   if (batch_size <= 0) {
     return Status::Invalid("BatchSize must be greater than 0, got ", batch_size);
@@ -1000,6 +1010,7 @@ Result<acero::ExecNode*> MakeScanNode(acero::ExecPlan* plan,
   auto scan_options = scan_node_options.scan_options;
   auto dataset = scan_node_options.dataset;
   bool require_sequenced_output = scan_node_options.require_sequenced_output;
+  bool implicit_ordering = scan_node_options.implicit_ordering;
 
   RETURN_NOT_OK(NormalizeScanOptions(scan_options, dataset->schema()));
 
@@ -1032,11 +1043,11 @@ Result<acero::ExecNode*> MakeScanNode(acero::ExecPlan* plan,
   } else {
     batch_gen = std::move(merged_batch_gen);
   }
-  int64_t index = require_sequenced_output ? 0 : compute::kUnsequencedIndex;
+
   auto gen = MakeMappedGenerator(
       std::move(batch_gen),
-      [scan_options, index](const EnumeratedRecordBatch& partial) mutable
-      -> Result<std::optional<compute::ExecBatch>> {
+      [scan_options](const EnumeratedRecordBatch& partial)
+          -> Result<std::optional<compute::ExecBatch>> {
         // TODO(ARROW-13263) fragments may be able to attach more guarantees to batches
         // than this, for example parquet's row group stats. Failing to do this leaves
         // perf on the table because row group stats could be used to skip kernel execs in
@@ -1057,11 +1068,10 @@ Result<acero::ExecNode*> MakeScanNode(acero::ExecPlan* plan,
         batch->values.emplace_back(partial.record_batch.index);
         batch->values.emplace_back(partial.record_batch.last);
         batch->values.emplace_back(partial.fragment.value->ToString());
-        if (index != compute::kUnsequencedIndex) batch->index = index++;
         return batch;
       });
 
-  auto ordering = require_sequenced_output ? Ordering::Implicit() : Ordering::Unordered();
+  auto ordering = implicit_ordering ? Ordering::Implicit() : Ordering::Unordered();
 
   auto fields = scan_options->dataset_schema->fields();
   if (scan_options->add_augmented_fields) {
diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h
index d2de267897180..50310577f1e7c 100644
--- a/cpp/src/arrow/dataset/scanner.h
+++ b/cpp/src/arrow/dataset/scanner.h
@@ -29,15 +29,12 @@
 #include "arrow/compute/expression.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/dataset/dataset.h"
-#include "arrow/dataset/projector.h"
 #include "arrow/dataset/type_fwd.h"
 #include "arrow/dataset/visibility.h"
 #include "arrow/io/interfaces.h"
-#include "arrow/memory_pool.h"
 #include "arrow/type_fwd.h"
-#include "arrow/util/async_generator.h"
+#include "arrow/util/async_generator_fwd.h"
 #include "arrow/util/iterator.h"
-#include "arrow/util/thread_pool.h"
 #include "arrow/util/type_fwd.h"
 
 namespace arrow {
@@ -117,6 +114,14 @@ struct ARROW_DS_EXPORT ScanOptions {
   /// If true the scanner will add augmented fields to the output schema.
   bool add_augmented_fields = true;
 
+  /// Whether to cache metadata when scanning.
+  ///
+  /// Fragments may typically cache metadata to speed up repeated accesses.
+  /// However, in use cases where a single scan is done, or if memory use
+  /// is more critical than CPU time, setting this option to false can
+  /// lessen memory use.
+  bool cache_metadata = true;
+
   /// Fragment-specific scan options.
   std::shared_ptr<FragmentScanOptions> fragment_scan_options;
 
@@ -309,7 +314,13 @@ ARROW_DS_EXPORT void SetProjection(ScanOptions* options, ProjectionDescr project
 struct TaggedRecordBatch {
   std::shared_ptr<RecordBatch> record_batch;
   std::shared_ptr<Fragment> fragment;
+
+  friend inline bool operator==(const TaggedRecordBatch& left,
+                                const TaggedRecordBatch& right) {
+    return left.record_batch == right.record_batch && left.fragment == right.fragment;
+  }
 };
+
 using TaggedRecordBatchGenerator = std::function<Future<TaggedRecordBatch>()>;
 using TaggedRecordBatchIterator = Iterator<TaggedRecordBatch>;
 
@@ -320,7 +331,13 @@ using TaggedRecordBatchIterator = Iterator<TaggedRecordBatch>;
 struct EnumeratedRecordBatch {
   Enumerated<std::shared_ptr<RecordBatch>> record_batch;
   Enumerated<std::shared_ptr<Fragment>> fragment;
+
+  friend inline bool operator==(const EnumeratedRecordBatch& left,
+                                const EnumeratedRecordBatch& right) {
+    return left.record_batch == right.record_batch && left.fragment == right.fragment;
+  }
 };
+
 using EnumeratedRecordBatchGenerator = std::function<Future<EnumeratedRecordBatch>()>;
 using EnumeratedRecordBatchIterator = Iterator<EnumeratedRecordBatch>;
 
@@ -505,6 +522,14 @@ class ARROW_DS_EXPORT ScannerBuilder {
   ///        ThreadPool found in ScanOptions;
   Status UseThreads(bool use_threads = true);
 
+  /// \brief Indicate if metadata should be cached when scanning
+  ///
+  /// Fragments may typically cache metadata to speed up repeated accesses.
+  /// However, in use cases where a single scan is done, or if memory use
+  /// is more critical than CPU time, setting this option to false can
+  /// lessen memory use.
+  Status CacheMetadata(bool cache_metadata = true);
+
   /// \brief Set the maximum number of rows per RecordBatch.
   ///
   /// \param[in] batch_size the maximum number of rows.
@@ -557,20 +582,27 @@ class ARROW_DS_EXPORT ScannerBuilder {
 /// \brief Construct a source ExecNode which yields batches from a dataset scan.
 ///
 /// Does not construct associated filter or project nodes.
-/// Yielded batches will be augmented with fragment/batch indices to enable stable
-/// ordering for simple ExecPlans.
+///
+/// Batches are yielded sequentially, like single-threaded,
+/// when require_sequenced_output=true.
+///
+/// Yielded batches will be augmented with fragment/batch indices when
+/// implicit_ordering=true to enable stable ordering for simple ExecPlans.
 class ARROW_DS_EXPORT ScanNodeOptions : public acero::ExecNodeOptions {
  public:
   explicit ScanNodeOptions(std::shared_ptr<Dataset> dataset,
                            std::shared_ptr<ScanOptions> scan_options,
-                           bool require_sequenced_output = false)
+                           bool require_sequenced_output = false,
+                           bool implicit_ordering = false)
       : dataset(std::move(dataset)),
         scan_options(std::move(scan_options)),
-        require_sequenced_output(require_sequenced_output) {}
+        require_sequenced_output(require_sequenced_output),
+        implicit_ordering(implicit_ordering) {}
 
   std::shared_ptr<Dataset> dataset;
   std::shared_ptr<ScanOptions> scan_options;
   bool require_sequenced_output;
+  bool implicit_ordering;
 };
 
 /// @}
diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc
index bf4e3248e5371..a45847b49def0 100644
--- a/cpp/src/arrow/dataset/scanner_test.cc
+++ b/cpp/src/arrow/dataset/scanner_test.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/dataset/scanner.h"
 
+#include <atomic>
 #include <memory>
 #include <mutex>
 #include <utility>
@@ -269,8 +270,8 @@ struct MockFragment : public Fragment {
   std::string type_name() const override { return "mock"; }
 
   Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
-    return physical_schema_;
-  };
+    return given_physical_schema_;
+  }
 
   // ### Unit Test API ###
 
@@ -922,6 +923,23 @@ std::ostream& operator<<(std::ostream& out, const TestScannerParams& params) {
   return out;
 }
 
+// An InMemoryFragment subclass that tracks the calls to ClearCachedMetadata()
+class ClearCachedMetadataFragment : public InMemoryFragment {
+ public:
+  using InMemoryFragment::InMemoryFragment;
+
+  Status ClearCachedMetadata() override {
+    RETURN_NOT_OK(InMemoryFragment::ClearCachedMetadata());
+    metadata_clear_count_.fetch_add(1);
+    return Status::OK();
+  }
+
+  int metadata_clear_count() const { return metadata_clear_count_.load(); }
+
+ protected:
+  std::atomic<int> metadata_clear_count_{0};
+};
+
 class TestScanner : public DatasetFixtureMixinWithParam<TestScannerParams> {
  protected:
   std::shared_ptr<Scanner> MakeScanner(std::shared_ptr<Dataset> dataset) {
@@ -978,6 +996,39 @@ class TestScanner : public DatasetFixtureMixinWithParam<TestScannerParams> {
 
     AssertScanBatchesUnorderedEquals(expected.get(), scanner.get(), 1);
   }
+
+  void TestCacheMetadata(std::function<Status(Scanner*)> consume_scanner) {
+    // Test that ClearCachedMetadata() is called if ScanOptions::cache_metadata is false.
+    SetSchema({field("i32", int32())});
+    auto batch = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_);
+    RecordBatchVector batches{batch, batch};
+    auto frag1 = std::make_shared<ClearCachedMetadataFragment>(batches);
+    auto frag2 = std::make_shared<ClearCachedMetadataFragment>(batches);
+    auto check_metadata_clear_counts = [&](const std::vector<int>& expected) {
+      auto actual =
+          std::vector<int>{frag1->metadata_clear_count(), frag2->metadata_clear_count()};
+      ASSERT_EQ(expected, actual);
+    };
+    {
+      ScannerBuilder builder(
+          std::make_shared<FragmentDataset>(schema_, FragmentVector{frag1, frag2}),
+          options_);
+      ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+      ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+      ASSERT_OK(consume_scanner(scanner.get()));
+      check_metadata_clear_counts({0, 0});
+    }
+    options_->cache_metadata = false;
+    {
+      ScannerBuilder builder(
+          std::make_shared<FragmentDataset>(schema_, FragmentVector{frag1, frag2}),
+          options_);
+      ASSERT_OK(builder.UseThreads(GetParam().use_threads));
+      ASSERT_OK_AND_ASSIGN(auto scanner, builder.Finish());
+      ASSERT_OK(consume_scanner(scanner.get()));
+      check_metadata_clear_counts({1, 1});
+    }
+  }
 };
 
 TEST_P(TestScanner, Scan) {
@@ -1329,6 +1380,22 @@ TEST_P(TestScanner, EmptyFragment) {
   AssertTablesEqual(*expected, *actual, /*same_chunk_layout=*/false);
 }
 
+TEST_P(TestScanner, CacheMetadataScanBatches) {
+  auto consume_scanner = [](Scanner* scanner) -> Status {
+    ARROW_ASSIGN_OR_RAISE(auto batches_it, scanner->ScanBatches());
+    return batches_it.ToVector().status();
+  };
+  TestCacheMetadata(consume_scanner);
+}
+
+TEST_P(TestScanner, CacheMetadataScanBatchesUnordered) {
+  auto consume_scanner = [](Scanner* scanner) -> Status {
+    ARROW_ASSIGN_OR_RAISE(auto batches_it, scanner->ScanBatchesUnordered());
+    return batches_it.ToVector().status();
+  };
+  TestCacheMetadata(consume_scanner);
+}
+
 class CountRowsOnlyFragment : public InMemoryFragment {
  public:
   using InMemoryFragment::InMemoryFragment;
@@ -1644,7 +1711,7 @@ class ControlledFragment : public Fragment {
         tracking_generator_(record_batch_generator_) {}
 
   Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
-    return physical_schema_;
+    return given_physical_schema_;
   }
   std::string type_name() const override { return "scanner_test.cc::ControlledFragment"; }
 
@@ -1657,7 +1724,7 @@ class ControlledFragment : public Fragment {
 
   void Finish() { ARROW_UNUSED(record_batch_generator_.producer().Close()); }
   void DeliverBatch(uint32_t num_rows) {
-    auto batch = ConstantArrayGenerator::Zeroes(num_rows, physical_schema_);
+    auto batch = ConstantArrayGenerator::Zeroes(num_rows, given_physical_schema_);
     record_batch_generator_.producer().Push(std::move(batch));
   }
 
diff --git a/cpp/src/arrow/engine/substrait/util.cc b/cpp/src/arrow/engine/substrait/util.cc
index d842d0ef9d73b..28e80047315a8 100644
--- a/cpp/src/arrow/engine/substrait/util.cc
+++ b/cpp/src/arrow/engine/substrait/util.cc
@@ -31,6 +31,7 @@
 #include "arrow/engine/substrait/relation.h"
 #include "arrow/engine/substrait/serde.h"
 #include "arrow/engine/substrait/type_fwd.h"
+#include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/async_generator.h"
diff --git a/cpp/src/arrow/io/test_common.cc b/cpp/src/arrow/io/test_common.cc
index a06ef2f59221c..b3cfdd0eb210d 100644
--- a/cpp/src/arrow/io/test_common.cc
+++ b/cpp/src/arrow/io/test_common.cc
@@ -158,6 +158,8 @@ class TrackedRandomAccessFileImpl : public TrackedRandomAccessFile {
     return read_ranges_;
   }
 
+  void ResetStats() override { read_ranges_.clear(); }
+
  private:
   io::RandomAccessFile* delegate_;
   std::vector<io::ReadRange> read_ranges_;
diff --git a/cpp/src/arrow/io/test_common.h b/cpp/src/arrow/io/test_common.h
index 9abaef1a66536..5ee72a05cf825 100644
--- a/cpp/src/arrow/io/test_common.h
+++ b/cpp/src/arrow/io/test_common.h
@@ -60,6 +60,8 @@ class ARROW_TESTING_EXPORT TrackedRandomAccessFile : public io::RandomAccessFile
   virtual int64_t num_reads() const = 0;
   virtual int64_t bytes_read() const = 0;
   virtual const std::vector<io::ReadRange>& get_read_ranges() const = 0;
+  virtual void ResetStats() = 0;
+
   static std::unique_ptr<TrackedRandomAccessFile> Make(io::RandomAccessFile* target);
 };
 
diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build
new file mode 100644
index 0000000000000..15aab62d7bb80
--- /dev/null
+++ b/cpp/src/arrow/meson.build
@@ -0,0 +1,321 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+dl_dep = dependency('dl')
+threads_dep = dependency('threads')
+
+arrow_components = {
+    'arrow_array': {
+        'sources': [
+            'array/array_base.cc',
+            'array/array_binary.cc',
+            'array/array_decimal.cc',
+            'array/array_dict.cc',
+            'array/array_nested.cc',
+            'array/array_primitive.cc',
+            'array/array_run_end.cc',
+            'array/builder_adaptive.cc',
+            'array/builder_base.cc',
+            'array/builder_binary.cc',
+            'array/builder_decimal.cc',
+            'array/builder_dict.cc',
+            'array/builder_run_end.cc',
+            'array/builder_nested.cc',
+            'array/builder_primitive.cc',
+            'array/builder_union.cc',
+            'array/concatenate.cc',
+            'array/data.cc',
+            'array/diff.cc',
+            'array/statistics.cc',
+            'array/util.cc',
+            'array/validate.cc',
+        ],
+        'include_dirs': [],
+        'dependencies': [],
+    },
+    'arrow_compute': {
+        'sources': [
+            'compute/api_aggregate.cc',
+            'compute/api_scalar.cc',
+            'compute/api_vector.cc',
+            'compute/cast.cc',
+            'compute/exec.cc',
+            'compute/expression.cc',
+            'compute/function.cc',
+            'compute/function_internal.cc',
+            'compute/kernel.cc',
+            'compute/ordering.cc',
+            'compute/registry.cc',
+            'compute/kernels/chunked_internal.cc',
+            'compute/kernels/codegen_internal.cc',
+            'compute/kernels/ree_util_internal.cc',
+            'compute/kernels/scalar_cast_boolean.cc',
+            'compute/kernels/scalar_cast_dictionary.cc',
+            'compute/kernels/scalar_cast_extension.cc',
+            'compute/kernels/scalar_cast_internal.cc',
+            'compute/kernels/scalar_cast_nested.cc',
+            'compute/kernels/scalar_cast_numeric.cc',
+            'compute/kernels/scalar_cast_string.cc',
+            'compute/kernels/scalar_cast_temporal.cc',
+            'compute/kernels/util_internal.cc',
+            'compute/kernels/vector_hash.cc',
+            'compute/kernels/vector_selection.cc',
+            'compute/kernels/vector_selection_filter_internal.cc',
+            'compute/kernels/vector_selection_internal.cc',
+            'compute/kernels/vector_selection_take_internal.cc',
+        ],
+        'include_dirs': [],
+        'dependencies': [],
+    },
+    'arrow_io': {
+        'sources': [
+            'io/buffered.cc',
+            'io/caching.cc',
+            'io/compressed.cc',
+            'io/file.cc',
+            'io/hdfs.cc',
+            'io/hdfs_internal.cc',
+            'io/interfaces.cc',
+            'io/memory.cc',
+            'io/slow.cc',
+            'io/stdio.cc',
+            'io/transform.cc',
+        ],
+        'include_dirs': [include_directories('../../thirdparty/hadoop/include')],
+        'dependencies': [dl_dep],
+    },
+    'arrow_util': {
+        'sources': [
+            'util/align_util.cc',
+            'util/async_util.cc',
+            'util/atfork_internal.cc',
+            'util/basic_decimal.cc',
+            'util/bit_block_counter.cc',
+            'util/bit_run_reader.cc',
+            'util/bit_util.cc',
+            'util/bitmap.cc',
+            'util/bitmap_builders.cc',
+            'util/bitmap_ops.cc',
+            'util/bpacking.cc',
+            'util/byte_size.cc',
+            'util/cancel.cc',
+            'util/compression.cc',
+            'util/counting_semaphore.cc',
+            'util/cpu_info.cc',
+            'util/crc32.cc',
+            'util/debug.cc',
+            'util/decimal.cc',
+            'util/delimiting.cc',
+            'util/dict_util.cc',
+            'util/fixed_width_internal.cc',
+            'util/float16.cc',
+            'util/formatting.cc',
+            'util/future.cc',
+            'util/hashing.cc',
+            'util/int_util.cc',
+            'util/io_util.cc',
+            'util/list_util.cc',
+            'util/logger.cc',
+            'util/logging.cc',
+            'util/key_value_metadata.cc',
+            'util/memory.cc',
+            'util/mutex.cc',
+            'util/ree_util.cc',
+            'util/string.cc',
+            'util/string_builder.cc',
+            'util/task_group.cc',
+            'util/tdigest.cc',
+            'util/thread_pool.cc',
+            'util/time.cc',
+            'util/tracing.cc',
+            'util/trie.cc',
+            'util/union_util.cc',
+            'util/unreachable.cc',
+            'util/uri.cc',
+            'util/utf8.cc',
+            'util/value_parsing.cc',
+        ],
+        'include_dirs': [],
+        'dependencies': [threads_dep],
+    },
+    'memory_pool': {
+        'sources': ['memory_pool.cc'],
+        'include_dirs': [],
+        'dependencies': [],
+    },
+    'vendored': {
+        'sources': [
+            'vendored/base64.cpp',
+            'vendored/datetime.cpp',
+            'vendored/double-conversion/bignum-dtoa.cc',
+            'vendored/double-conversion/bignum.cc',
+            'vendored/double-conversion/cached-powers.cc',
+            'vendored/double-conversion/double-to-string.cc',
+            'vendored/double-conversion/fast-dtoa.cc',
+            'vendored/double-conversion/fixed-dtoa.cc',
+            'vendored/double-conversion/string-to-double.cc',
+            'vendored/double-conversion/strtod.cc',
+            'vendored/musl/strptime.c',
+            'vendored/uriparser/UriCommon.c',
+            'vendored/uriparser/UriCompare.c',
+            'vendored/uriparser/UriEscape.c',
+            'vendored/uriparser/UriFile.c',
+            'vendored/uriparser/UriIp4.c',
+            'vendored/uriparser/UriIp4Base.c',
+            'vendored/uriparser/UriMemory.c',
+            'vendored/uriparser/UriNormalize.c',
+            'vendored/uriparser/UriNormalizeBase.c',
+            'vendored/uriparser/UriParse.c',
+            'vendored/uriparser/UriParseBase.c',
+            'vendored/uriparser/UriQuery.c',
+            'vendored/uriparser/UriRecompose.c',
+            'vendored/uriparser/UriResolve.c',
+            'vendored/uriparser/UriShorten.c',
+        ],
+        'include_dirs': [],
+        'dependencies': [],
+    },
+    'arrow_base': {
+        'sources': [
+            'builder.cc',
+            'buffer.cc',
+            'chunked_array.cc',
+            'chunk_resolver.cc',
+            'compare.cc',
+            'config.cc',
+            'datum.cc',
+            'device.cc',
+            'device_allocation_type_set.cc',
+            'extension_type.cc',
+            'extension/bool8.cc',
+            'extension/json.cc',
+            'extension/uuid.cc',
+            'pretty_print.cc',
+            'record_batch.cc',
+            'result.cc',
+            'scalar.cc',
+            'sparse_tensor.cc',
+            'status.cc',
+            'table.cc',
+            'table_builder.cc',
+            'tensor.cc',
+            'tensor/coo_converter.cc',
+            'tensor/csf_converter.cc',
+            'tensor/csx_converter.cc',
+            'type.cc',
+            'type_traits.cc',
+            'visitor.cc',
+            'c/bridge.cc',
+            'c/dlpack.cc',
+        ],
+        'include_dirs': [],
+        'dependencies': [],
+    },
+}
+
+arrow_srcs = []
+include_dir = include_directories('..')
+arrow_includes = [include_dir]
+arrow_deps = []
+foreach key, val : arrow_components
+    arrow_srcs += val['sources']
+    arrow_includes += val['include_dirs']
+    arrow_deps += val['dependencies']
+endforeach
+
+arrow_lib = library(
+    'arrow',
+    sources: arrow_srcs,
+    include_directories: arrow_includes,
+    dependencies: arrow_deps,
+    install: true,
+)
+
+# Meson does not allow you to glob for headers to install. See also
+# https://mesonbuild.com/FAQ.html#why-cant-i-specify-target-files-with-a-wildcard
+# install_subdir would be usable if the directory only contained headers
+install_headers(
+    [
+        'api.h',
+        'array.h',
+        'buffer_builder.h',
+        'buffer.h',
+        'builder.h',
+        'chunked_array.h',
+        'chunk_resolver.h',
+        'compare.h',
+        'config.h',
+        'datum.h',
+        'device_allocation_type_set.h',
+        'device.h',
+        'extension_type.h',
+        'memory_pool.h',
+        'pretty_print.h',
+        'record_batch.h',
+        'result.h',
+        'scalar.h',
+        'sparse_tensor.h',
+        'status.h',
+        'stl.h',
+        'stl_allocator.h',
+        'stl_iterator.h',
+        'table_builder.h',
+        'table.h',
+        'tensor.h',
+        'type_fwd.h',
+        'type.h',
+        'type_traits.h',
+        'visit_array_inline.h',
+        'visit_data_inline.h',
+        'visitor_generate.h',
+        'visitor.h',
+        'visit_scalar_inline.h',
+        'visit_type_inline.h',
+    ],
+    install_dir: 'arrow',
+)
+
+version = meson.project_version()
+
+version_no_snapshot = version.split('-SNAPSHOT')[0]
+components = version_no_snapshot.split('.')
+assert(
+    components.length() >= 3,
+    'The version does not contain major, minor and patch',
+)
+version_major = components[0]
+version_minor = components[1]
+version_patch = components[2]
+
+arrow_version = (version_major.to_int() * 1000 + version_minor.to_int()) * 1000 + version_patch.to_int()
+arrow_so_version = (version_major.to_int() * 100 + version_minor.to_int()).to_string()
+arrow_full_so_version = '@0@.@1@.@2@'.format(arrow_so_version, version_patch, 0)
+
+pkg = import('pkgconfig')
+pkg.generate(
+    arrow_lib,
+    filebase: 'arrow',
+    name: 'Apache Arrow',
+    description: 'Arrow is a set of technologies that enable big-data systems to process and move data fast.',
+    variables: {
+        'so_version': arrow_so_version,
+        'abi_version': arrow_so_version,
+        'full_so_version': arrow_full_so_version,
+    },
+)
+
+subdir('util')
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 5ce33a3731e7e..9622fe8322cca 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -18,6 +18,7 @@
 #include "arrow/record_batch.h"
 
 #include <algorithm>
+#include <atomic>
 #include <cmath>
 #include <cstdlib>
 #include <memory>
@@ -102,8 +103,13 @@ class SimpleRecordBatch : public RecordBatch {
   std::shared_ptr<Array> column(int i) const override {
     std::shared_ptr<Array> result = std::atomic_load(&boxed_columns_[i]);
     if (!result) {
-      result = MakeArray(columns_[i]);
-      std::atomic_store(&boxed_columns_[i], result);
+      auto new_array = MakeArray(columns_[i]);
+      // Be careful not to overwrite existing entry if another thread has been calling
+      // `column(i)` at the same time, since the `boxed_columns_` contents are exposed
+      // by `columns()` (see GH-45371).
+      if (std::atomic_compare_exchange_strong(&boxed_columns_[i], &result, new_array)) {
+        return new_array;
+      }
     }
     return result;
   }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 06cb621e984fa..2b9fe0bda890a 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -286,7 +286,7 @@ class ARROW_EXPORT RecordBatch {
   ///
   /// The created array follows the C data interface statistics
   /// specification. See
-  /// https://arrow.apache.org/docs/format/CDataInterfaceStatistics.html
+  /// https://arrow.apache.org/docs/format/StatisticsSchema.html
   /// for details.
   ///
   /// \param[in] pool the memory pool to allocate memory from
diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index 21d51ae5068b6..a659f8798e5c2 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -20,8 +20,13 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <utility>
 #include <vector>
 
 #include "arrow/array/array_base.h"
@@ -31,6 +36,7 @@
 #include "arrow/array/util.h"
 #include "arrow/c/abi.h"
 #include "arrow/chunked_array.h"
+#include "arrow/config.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
@@ -38,6 +44,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/float16.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/key_value_metadata.h"
@@ -393,6 +400,37 @@ TEST_F(TestRecordBatch, RemoveColumnEmpty) {
   AssertBatchesEqual(*added, *batch1);
 }
 
+TEST_F(TestRecordBatch, ColumnsThreadSafety) {
+#ifndef ARROW_ENABLE_THREADING
+  GTEST_SKIP() << "Test requires threading support";
+#endif
+  constexpr int kNumThreads = 10;
+  constexpr int kLength = 10;
+
+  random::RandomArrayGenerator gen(42);
+  std::shared_ptr<ArrayData> array_data = gen.ArrayOf(utf8(), kLength)->data();
+  auto schema = ::arrow::schema({field("f1", utf8())});
+  auto record_batch = RecordBatch::Make(schema, kLength, {array_data});
+  std::mutex mutex;
+  std::vector<std::thread> threads;
+  std::vector<Array*> all_columns;
+  for (int i = 0; i < kNumThreads; i++) {
+    threads.emplace_back([&]() {
+      auto columns = record_batch->columns();
+      mutex.lock();
+      all_columns.push_back(columns[0].get());
+      mutex.unlock();
+    });
+  }
+  for (auto& thread : threads) {
+    thread.join();
+  }
+  // assert that all calls to columns() return the same arrays
+  for (const auto& col : all_columns) {
+    ASSERT_EQ(col, all_columns[0]);
+  }
+}
+
 TEST_F(TestRecordBatch, ToFromEmptyStructArray) {
   auto batch1 =
       RecordBatch::Make(::arrow::schema({}), 10, std::vector<std::shared_ptr<Array>>{});
@@ -1360,7 +1398,7 @@ TEST_F(TestRecordBatch, MakeStatisticsArrayMaxApproximate) {
   auto no_statistics_array = ArrayFromJSON(boolean(), "[true, false, true]");
   auto float64_array_data = ArrayFromJSON(float64(), "[1.0, null, -1.0]")->data()->Copy();
   float64_array_data->statistics = std::make_shared<ArrayStatistics>();
-  float64_array_data->statistics->min = -1.0;
+  float64_array_data->statistics->max = 1.0;
   auto float64_array = MakeArray(std::move(float64_array_data));
   auto batch = RecordBatch::Make(schema, float64_array->length(),
                                  {no_statistics_array, float64_array});
@@ -1374,13 +1412,13 @@ TEST_F(TestRecordBatch, MakeStatisticsArrayMaxApproximate) {
                                ARROW_STATISTICS_KEY_ROW_COUNT_EXACT,
                            },
                            {
-                               ARROW_STATISTICS_KEY_MIN_VALUE_APPROXIMATE,
+                               ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE,
                            }},
                           {{
                                ArrayStatistics::ValueType{int64_t{3}},
                            },
                            {
-                               ArrayStatistics::ValueType{-1.0},
+                               ArrayStatistics::ValueType{1.0},
                            }}));
   AssertArraysEqual(*expected_statistics_array, *statistics_array, true);
 }
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index 85ceec9720214..f9a56c79c9d0f 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -1177,14 +1177,16 @@ enable_if_duration<To, Result<std::shared_ptr<Scalar>>> CastImpl(
 }
 
 // time to time
-template <typename To, typename From, typename T = typename To::TypeClass>
+template <typename To, typename From,
+          typename T = typename TypeTraits<To>::ScalarType::TypeClass>
 enable_if_time<To, Result<std::shared_ptr<Scalar>>> CastImpl(
     const TimeScalar<From>& from, std::shared_ptr<DataType> to_type) {
   using ToScalar = typename TypeTraits<To>::ScalarType;
   ARROW_ASSIGN_OR_RAISE(
       auto value, util::ConvertTimestampValue(AsTimestampType<From>(from.type),
                                               AsTimestampType<To>(to_type), from.value));
-  return std::make_shared<ToScalar>(value, std::move(to_type));
+  return std::make_shared<ToScalar>(static_cast<typename To::c_type>(value),
+                                    std::move(to_type));
 }
 
 constexpr int64_t kMillisecondsInDay = 86400000;
@@ -1288,10 +1290,11 @@ CastImpl(const StructScalar& from, std::shared_ptr<DataType> to_type) {
 }
 
 // casts between variable-length and fixed-length list types
-template <typename To, typename From>
+template <typename To, typename FromScalar,
+          typename From = typename FromScalar::TypeClass>
 std::enable_if_t<is_list_type<To>::value && is_list_type<From>::value,
                  Result<std::shared_ptr<Scalar>>>
-CastImpl(const From& from, std::shared_ptr<DataType> to_type) {
+CastImpl(const FromScalar& from, std::shared_ptr<DataType> to_type) {
   if constexpr (sizeof(typename To::offset_type) < sizeof(int64_t)) {
     if (from.value->length() > std::numeric_limits<typename To::offset_type>::max()) {
       return Status::Invalid(from.type->ToString(), " too large to cast to ",
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index d19d7f8a39ec5..6938bc0d88722 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -36,6 +36,7 @@
 #include "arrow/status.h"
 #include "arrow/testing/extension_type.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 #include "arrow/type_traits.h"
 
@@ -156,6 +157,50 @@ TEST(TestBooleanScalar, Cast) {
   }
 }
 
+TEST(TestScalar, IdentityCast) {
+  random::RandomArrayGenerator gen(/*seed=*/42);
+  auto test_identity_cast_for_type =
+      [&gen](const std::shared_ptr<arrow::DataType>& data_type) {
+        auto tmp_array = gen.ArrayOf(data_type, /*size=*/1, /*null_probability=*/0.0);
+        ARROW_SCOPED_TRACE("data type = ", data_type->ToString());
+        ASSERT_OK_AND_ASSIGN(auto scalar, tmp_array->GetScalar(0));
+        ASSERT_OK_AND_ASSIGN(auto casted_scalar, scalar->CastTo(data_type));
+        ASSERT_TRUE(casted_scalar->Equals(*scalar));
+        ASSERT_TRUE(scalar->Equals(*casted_scalar));
+      };
+  for (auto& type : PrimitiveTypes()) {
+    test_identity_cast_for_type(type);
+  }
+  for (auto& type : DurationTypes()) {
+    test_identity_cast_for_type(type);
+  }
+  for (auto& type : IntervalTypes()) {
+    test_identity_cast_for_type(type);
+  }
+  for (auto& type : {
+           arrow::fixed_size_list(arrow::int32(), 20), arrow::list(arrow::int32()),
+           arrow::large_list(arrow::int32()),
+           // TODO(GH-45430): CastTo for ListView is not implemented yet.
+           // arrow::list_view(arrow::int32()), arrow::large_list_view(arrow::int32())
+           // TODO(GH-45431): CastTo for ComplexType is not implemented yet.
+           // arrow::map(arrow::binary(), arrow::int32()),
+           // struct_({field("float", arrow::float32())}),
+       }) {
+    test_identity_cast_for_type(type);
+  }
+  // TODO(GH-45429): CastTo for Decimal is not implemented yet.
+  /*
+  for (auto& type: {
+    arrow::decimal32(2, 2),
+    arrow::decimal64(4, 4),
+    arrow::decimal128(10, 10),
+    arrow::decimal128(20, 20),
+  }) {
+    test_identity_cast_for_type(type);
+  }
+  */
+}
+
 template <typename T>
 class TestNumericScalar : public ::testing::Test {
  public:
@@ -866,6 +911,9 @@ TEST(TestTimeScalars, Basics) {
     ASSERT_TRUE(first->Equals(*MakeScalar(ty, 5).ValueOrDie()));
     ASSERT_TRUE(last->Equals(*MakeScalar(ty, 42).ValueOrDie()));
     ASSERT_FALSE(last->Equals(*MakeScalar("string")));
+
+    ASSERT_OK_AND_ASSIGN(auto casted, first->CastTo(ty));
+    ASSERT_TRUE(casted->Equals(*first));
   }
 }
 
diff --git a/cpp/src/arrow/testing/CMakeLists.txt b/cpp/src/arrow/testing/CMakeLists.txt
index 6cf4b2d2b12ca..82db590e33162 100644
--- a/cpp/src/arrow/testing/CMakeLists.txt
+++ b/cpp/src/arrow/testing/CMakeLists.txt
@@ -18,8 +18,9 @@
 arrow_install_all_headers("arrow/testing")
 
 if(ARROW_BUILD_TESTS)
-  add_arrow_test(random_test)
+  add_arrow_test(generator_test)
   add_arrow_test(gtest_util_test)
+  add_arrow_test(random_test)
 
   if(ARROW_FILESYSTEM)
     add_library(arrow_filesystem_example MODULE examplefs.cc)
diff --git a/cpp/src/arrow/testing/generator.cc b/cpp/src/arrow/testing/generator.cc
index 5ea6a541e8922..8715ecdeb57b4 100644
--- a/cpp/src/arrow/testing/generator.cc
+++ b/cpp/src/arrow/testing/generator.cc
@@ -26,7 +26,6 @@
 
 #include "arrow/array.h"
 #include "arrow/buffer.h"
-#include "arrow/builder.h"
 #include "arrow/compute/exec.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
@@ -220,42 +219,6 @@ class ConstantGenerator : public ArrayGenerator {
   std::shared_ptr<Scalar> value_;
 };
 
-class StepGenerator : public ArrayGenerator {
- public:
-  StepGenerator(uint32_t start, uint32_t step, bool signed_int)
-      : start_(start), step_(step), signed_int_(signed_int) {}
-
-  template <typename BuilderType, typename CType>
-  Result<std::shared_ptr<Array>> DoGenerate(int64_t num_rows) {
-    BuilderType builder;
-    ARROW_RETURN_NOT_OK(builder.Reserve(num_rows));
-    CType val = start_;
-    for (int64_t i = 0; i < num_rows; i++) {
-      builder.UnsafeAppend(val);
-      val += step_;
-    }
-    start_ = val;
-    return builder.Finish();
-  }
-
-  Result<std::shared_ptr<Array>> Generate(int64_t num_rows) override {
-    if (signed_int_) {
-      return DoGenerate<Int32Builder, int32_t>(num_rows);
-    } else {
-      return DoGenerate<UInt32Builder, uint32_t>(num_rows);
-    }
-  }
-
-  std::shared_ptr<DataType> type() const override {
-    return signed_int_ ? int32() : uint32();
-  }
-
- private:
-  uint32_t start_;
-  uint32_t step_;
-  bool signed_int_;
-};
-
 static constexpr random::SeedType kTestSeed = 42;
 
 class RandomGenerator : public ArrayGenerator {
@@ -405,10 +368,6 @@ std::shared_ptr<ArrayGenerator> Constant(std::shared_ptr<Scalar> value) {
   return std::make_shared<ConstantGenerator>(std::move(value));
 }
 
-std::shared_ptr<ArrayGenerator> Step(uint32_t start, uint32_t step, bool signed_int) {
-  return std::make_shared<StepGenerator>(start, step, signed_int);
-}
-
 std::shared_ptr<ArrayGenerator> Random(std::shared_ptr<DataType> type) {
   return std::make_shared<RandomGenerator>(std::move(type));
 }
diff --git a/cpp/src/arrow/testing/generator.h b/cpp/src/arrow/testing/generator.h
index 4ec8845864b72..e90c125a7d471 100644
--- a/cpp/src/arrow/testing/generator.h
+++ b/cpp/src/arrow/testing/generator.h
@@ -23,6 +23,8 @@
 #include <vector>
 
 #include "arrow/array/array_base.h"
+#include "arrow/array/util.h"
+#include "arrow/buffer_builder.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/visibility.h"
@@ -301,12 +303,48 @@ ARROW_TESTING_EXPORT std::shared_ptr<DataGenerator> Gen(
 /// make a generator that returns a constant value
 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Constant(
     std::shared_ptr<Scalar> value);
+
 /// make a generator that returns an incrementing value
 ///
 /// Note: overflow is not prevented standard unsigned integer overflow applies
-ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Step(uint32_t start = 0,
-                                                          uint32_t step = 1,
-                                                          bool signed_int = false);
+template <typename T = uint32_t>
+std::shared_ptr<ArrayGenerator> Step(T start = 0, T step = 1) {
+  class StepGenerator : public ArrayGenerator {
+   public:
+    // Use [[maybe_unused]] to avoid a compiler warning in Clang versions before 15 that
+    // incorrectly reports 'unused type alias'.
+    using ArrowType [[maybe_unused]] = typename CTypeTraits<T>::ArrowType;
+    static_assert(is_number_type<ArrowType>::value,
+                  "Step generator only supports numeric types");
+
+    StepGenerator(T start, T step) : start_(start), step_(step) {}
+
+    Result<std::shared_ptr<Array>> Generate(int64_t num_rows) override {
+      TypedBufferBuilder<T> builder;
+      ARROW_RETURN_NOT_OK(builder.Reserve(num_rows));
+      T val = start_;
+      for (int64_t i = 0; i < num_rows; i++) {
+        builder.UnsafeAppend(val);
+        val += step_;
+      }
+      start_ = val;
+      ARROW_ASSIGN_OR_RAISE(auto buf, builder.Finish());
+      return MakeArray(ArrayData::Make(TypeTraits<ArrowType>::type_singleton(), num_rows,
+                                       {NULLPTR, std::move(buf)}, /*null_count=*/0));
+    }
+
+    std::shared_ptr<DataType> type() const override {
+      return TypeTraits<ArrowType>::type_singleton();
+    }
+
+   private:
+    T start_;
+    T step_;
+  };
+
+  return std::make_shared<StepGenerator>(start, step);
+}
+
 /// make a generator that returns a random value
 ARROW_TESTING_EXPORT std::shared_ptr<ArrayGenerator> Random(
     std::shared_ptr<DataType> type);
diff --git a/cpp/src/arrow/testing/generator_test.cc b/cpp/src/arrow/testing/generator_test.cc
new file mode 100644
index 0000000000000..f4b38ee4313df
--- /dev/null
+++ b/cpp/src/arrow/testing/generator_test.cc
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/generator.h"
+
+namespace arrow::gen {
+
+template <typename CType>
+void CheckStep(const Array& result, CType start, CType step, int64_t length) {
+  using ArrowType = typename CTypeTraits<CType>::ArrowType;
+
+  ASSERT_OK(result.ValidateFull());
+  ASSERT_EQ(result.type_id(), TypeTraits<ArrowType>::type_singleton()->id());
+  ASSERT_EQ(result.length(), length);
+  ASSERT_EQ(result.null_bitmap(), nullptr);
+  auto data = result.data()->GetValues<CType>(1);
+  CType current = start;
+  for (int64_t i = 0; i < length; ++i) {
+    ASSERT_EQ(data[i], current);
+    current += step;
+  }
+}
+
+TEST(StepTest, Default) {
+  for (auto length : {0, 1, 1024}) {
+    ARROW_SCOPED_TRACE("length=" + std::to_string(length));
+    ASSERT_OK_AND_ASSIGN(auto array, Step()->Generate(length));
+    CheckStep<uint32_t>(*array, 0, 1, length);
+  }
+}
+
+using NumericCTypes = ::testing::Types<int8_t, uint8_t, int16_t, uint16_t, int32_t,
+                                       uint32_t, int64_t, uint64_t, float, double>;
+
+template <typename CType>
+class TypedStepTest : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TypedStepTest, NumericCTypes);
+
+TYPED_TEST(TypedStepTest, Basic) {
+  for (auto length : {0, 1, 1024}) {
+    ARROW_SCOPED_TRACE("length=" + std::to_string(length));
+    for (TypeParam start :
+         {std::numeric_limits<TypeParam>::min(), static_cast<TypeParam>(0)}) {
+      ARROW_SCOPED_TRACE("start=" + std::to_string(start));
+      for (TypeParam step :
+           {static_cast<TypeParam>(0), std::numeric_limits<TypeParam>::epsilon(),
+            static_cast<TypeParam>(std::numeric_limits<TypeParam>::max() /
+                                   (length + 1))}) {
+        ARROW_SCOPED_TRACE("step=" + std::to_string(step));
+        ASSERT_OK_AND_ASSIGN(auto array, Step(start, step)->Generate(length));
+        CheckStep(*array, start, step, length);
+      }
+    }
+  }
+}
+
+}  // namespace arrow::gen
diff --git a/cpp/src/arrow/testing/gtest_util_test.cc b/cpp/src/arrow/testing/gtest_util_test.cc
index daf071c2b36f1..94c8156b8f071 100644
--- a/cpp/src/arrow/testing/gtest_util_test.cc
+++ b/cpp/src/arrow/testing/gtest_util_test.cc
@@ -227,12 +227,14 @@ void CheckNotWithinUlp(Float x, Float y, int n_ulp) {
 
 TEST(TestWithinUlp, Double) {
   for (double f : {0.0, 1e-20, 1.0, 2345678.9}) {
+    CheckWithinUlp(f, f, 0);
     CheckWithinUlp(f, f, 1);
     CheckWithinUlp(f, f, 42);
   }
   CheckWithinUlp(-0.0, 0.0, 1);
   CheckWithinUlp(1.0, 1.0000000000000002, 1);
   CheckWithinUlp(1.0, 1.0000000000000007, 3);
+  CheckNotWithinUlp(1.0, 1.0000000000000002, 0);
   CheckNotWithinUlp(1.0, 1.0000000000000007, 2);
   CheckNotWithinUlp(1.0, 1.0000000000000007, 1);
   // left and right have a different exponent but are still very close
@@ -243,6 +245,9 @@ TEST(TestWithinUlp, Double) {
   CheckWithinUlp(123.4567, 123.45670000000015, 11);
   CheckNotWithinUlp(123.4567, 123.45670000000015, 10);
 
+  CheckWithinUlp(HUGE_VAL, HUGE_VAL, 10);
+  CheckWithinUlp(-HUGE_VAL, -HUGE_VAL, 10);
+  CheckWithinUlp(std::nan(""), std::nan(""), 10);
   CheckNotWithinUlp(HUGE_VAL, -HUGE_VAL, 10);
   CheckNotWithinUlp(12.34, -HUGE_VAL, 10);
   CheckNotWithinUlp(12.34, std::nan(""), 10);
@@ -252,12 +257,14 @@ TEST(TestWithinUlp, Double) {
 
 TEST(TestWithinUlp, Float) {
   for (float f : {0.0f, 1e-8f, 1.0f, 123.456f}) {
+    CheckWithinUlp(f, f, 0);
     CheckWithinUlp(f, f, 1);
     CheckWithinUlp(f, f, 42);
   }
   CheckWithinUlp(-0.0f, 0.0f, 1);
   CheckWithinUlp(1.0f, 1.0000001f, 1);
   CheckWithinUlp(1.0f, 1.0000013f, 11);
+  CheckNotWithinUlp(1.0f, 1.0000001f, 0);
   CheckNotWithinUlp(1.0f, 1.0000013f, 10);
   // left and right have a different exponent but are still very close
   CheckWithinUlp(1.0f, 0.99999994f, 1);
@@ -267,6 +274,9 @@ TEST(TestWithinUlp, Float) {
   CheckWithinUlp(123.456f, 123.456085f, 11);
   CheckNotWithinUlp(123.456f, 123.456085f, 10);
 
+  CheckWithinUlp(HUGE_VALF, HUGE_VALF, 10);
+  CheckWithinUlp(-HUGE_VALF, -HUGE_VALF, 10);
+  CheckWithinUlp(std::nanf(""), std::nanf(""), 10);
   CheckNotWithinUlp(HUGE_VALF, -HUGE_VALF, 10);
   CheckNotWithinUlp(12.34f, -HUGE_VALF, 10);
   CheckNotWithinUlp(12.34f, std::nanf(""), 10);
diff --git a/cpp/src/arrow/testing/math.cc b/cpp/src/arrow/testing/math.cc
index 2cb2fcb2a9c15..cc22e5a4de315 100644
--- a/cpp/src/arrow/testing/math.cc
+++ b/cpp/src/arrow/testing/math.cc
@@ -53,9 +53,15 @@ bool WithinUlpOneWay(Float left, Float right, int n_ulps) {
 
 template <typename Float>
 bool WithinUlpGeneric(Float left, Float right, int n_ulps) {
+  if (std::isnan(left) || std::isnan(right)) {
+    return std::isnan(left) == std::isnan(right);
+  }
   if (!std::isfinite(left) || !std::isfinite(right)) {
     return left == right;
   }
+  if (n_ulps == 0) {
+    return left == right;
+  }
   return (std::abs(left) <= std::abs(right)) ? WithinUlpOneWay(left, right, n_ulps)
                                              : WithinUlpOneWay(right, left, n_ulps);
 }
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 53207bb9da8a6..6b4f2c9f37f66 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -800,12 +800,15 @@ class ARROW_EXPORT BinaryViewType : public DataType {
   /// This union supports two states:
   ///
   /// - Entirely inlined string data
+  /// \code{.unparsed}
   ///                |----|--------------|
   ///                 ^    ^
   ///                 |    |
   ///              size    in-line string data, zero padded
+  /// \endcode
   ///
   /// - Reference into a buffer
+  /// \code{.unparsed}
   ///                |----|----|----|----|
   ///                 ^    ^    ^    ^
   ///                 |    |    |    |
@@ -813,6 +816,7 @@ class ARROW_EXPORT BinaryViewType : public DataType {
   ///                  prefix   |           |
   ///                        buffer index   |
   ///                                  offset in buffer
+  /// \endcode
   ///
   /// Adapted from TU Munich's UmbraDB [1], Velox, DuckDB.
   ///
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 24a1c1177240d..17eea5532cc91 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -66,6 +66,7 @@ add_arrow_test(utility-test
                list_util_test.cc
                logger_test.cc
                logging_test.cc
+               math_test.cc
                queue_test.cc
                range_test.cc
                ree_util_test.cc
diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h
index fd66298d1a9d6..a815b0928007b 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -1489,19 +1489,6 @@ AsyncGenerator<T> MakeConcatenatedGenerator(AsyncGenerator<AsyncGenerator<T>> so
   return MergedGenerator<T>(std::move(source), 1);
 }
 
-template <typename T>
-struct Enumerated {
-  T value;
-  int index;
-  bool last;
-};
-
-template <typename T>
-struct IterationTraits<Enumerated<T>> {
-  static Enumerated<T> End() { return Enumerated<T>{IterationEnd<T>(), -1, false}; }
-  static bool IsEnd(const Enumerated<T>& val) { return val.index < 0; }
-};
-
 /// \see MakeEnumeratedGenerator
 template <typename T>
 class EnumeratingGenerator {
diff --git a/cpp/src/arrow/util/async_generator_fwd.h b/cpp/src/arrow/util/async_generator_fwd.h
index f3c5bf9ef6f52..59c4276f4524c 100644
--- a/cpp/src/arrow/util/async_generator_fwd.h
+++ b/cpp/src/arrow/util/async_generator_fwd.h
@@ -20,6 +20,7 @@
 #include <functional>
 
 #include "arrow/type_fwd.h"
+#include "arrow/util/type_fwd.h"
 
 namespace arrow {
 
@@ -47,9 +48,6 @@ class PushGenerator;
 template <typename T>
 class MergedGenerator;
 
-template <typename T>
-struct Enumerated;
-
 template <typename T>
 class EnumeratingGenerator;
 
diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h
index 3c72f6398e92d..d18cb90f042f4 100644
--- a/cpp/src/arrow/util/iterator.h
+++ b/cpp/src/arrow/util/iterator.h
@@ -31,6 +31,7 @@
 #include "arrow/util/compare.h"
 #include "arrow/util/functional.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -82,6 +83,12 @@ struct IterationTraits<std::optional<T>> {
   // is nullopt. Add IterationTraits::GetRangeElement() to handle this case
 };
 
+template <typename T>
+struct IterationTraits<Enumerated<T>> {
+  static Enumerated<T> End() { return Enumerated<T>{IterationEnd<T>(), -1, false}; }
+  static bool IsEnd(const Enumerated<T>& val) { return val.index < 0; }
+};
+
 /// \brief A generic Iterator that can return errors
 template <typename T>
 class Iterator : public util::EqualityComparable<Iterator<T>> {
diff --git a/cpp/src/arrow/util/math_internal.cc b/cpp/src/arrow/util/math_internal.cc
new file mode 100644
index 0000000000000..604af45a49649
--- /dev/null
+++ b/cpp/src/arrow/util/math_internal.cc
@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/math_internal.h"
+
+#include <cmath>
+
+#include "arrow/util/logging.h"
+
+namespace arrow::internal {
+
+double NormalPPF(double p) {
+  DCHECK(p >= 0.0 && p <= 1.0);
+  if (p == 0.0) {
+    return -HUGE_VAL;
+  }
+  if (p == 1.0) {
+    return HUGE_VAL;
+  }
+
+  // Algorithm from https://doi.org/10.2307/2347330
+  // Wichura, M. J. (1988).
+  // Algorithm AS 241: The Percentage Points of the Normal Distribution.
+  // Journal of the Royal Statistical Society. Series C (Applied Statistics),
+  // 37(3), 477-484.
+  //
+  // Copied from the Rust implementation at https://github.com/ankane/dist-rust/
+  double q = p - 0.5;
+  if (std::abs(q) < 0.425) {
+    double r = 0.180625 - q * q;
+    return q *
+           (((((((2.5090809287301226727e3 * r + 3.3430575583588128105e4) * r +
+                 6.7265770927008700853e4) *
+                    r +
+                4.5921953931549871457e4) *
+                   r +
+               1.3731693765509461125e4) *
+                  r +
+              1.9715909503065514427e3) *
+                 r +
+             1.3314166789178437745e2) *
+                r +
+            3.3871328727963666080e0) /
+           (((((((5.2264952788528545610e3 * r + 2.8729085735721942674e4) * r +
+                 3.9307895800092710610e4) *
+                    r +
+                2.1213794301586595867e4) *
+                   r +
+               5.3941960214247511077e3) *
+                  r +
+              6.8718700749205790830e2) *
+                 r +
+             4.2313330701600911252e1) *
+                r +
+            1.0);
+  } else {
+    double r = q < 0.0 ? p : 1.0 - p;
+    r = std::sqrt(-std::log(r));
+    if (r < 5.0) {
+      r -= 1.6;
+      r = (((((((7.74545014278341407640e-4 * r + 2.27238449892691845833e-2) * r +
+                2.41780725177450611770e-1) *
+                   r +
+               1.27045825245236838258e0) *
+                  r +
+              3.64784832476320460504e0) *
+                 r +
+             5.76949722146069140550e0) *
+                r +
+            4.63033784615654529590e0) *
+               r +
+           1.42343711074968357734e0) /
+          (((((((1.05075007164441684324e-9 * r + 5.47593808499534494600e-4) * r +
+                1.51986665636164571966e-2) *
+                   r +
+               1.48103976427480074590e-1) *
+                  r +
+              6.89767334985100004550e-1) *
+                 r +
+             1.67638483018380384940e0) *
+                r +
+            2.05319162663775882187e0) *
+               r +
+           1.0);
+    } else {
+      r -= 5.0;
+      r = (((((((2.01033439929228813265e-7 * r + 2.71155556874348757815e-5) * r +
+                1.24266094738807843860e-3) *
+                   r +
+               2.65321895265761230930e-2) *
+                  r +
+              2.96560571828504891230e-1) *
+                 r +
+             1.78482653991729133580e0) *
+                r +
+            5.46378491116411436990e0) *
+               r +
+           6.65790464350110377720e0) /
+          (((((((2.04426310338993978564e-15 * r + 1.42151175831644588870e-7) * r +
+                1.84631831751005468180e-5) *
+                   r +
+               7.86869131145613259100e-4) *
+                  r +
+              1.48753612908506148525e-2) *
+                 r +
+             1.36929880922735805310e-1) *
+                r +
+            5.99832206555887937690e-1) *
+               r +
+           1.0);
+    }
+    return std::copysign(r, q);
+  }
+}
+
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/util/math_internal.h b/cpp/src/arrow/util/math_internal.h
new file mode 100644
index 0000000000000..3ff30cabf2edc
--- /dev/null
+++ b/cpp/src/arrow/util/math_internal.h
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cmath>
+#include <initializer_list>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow::internal {
+
+/// \brief Percent-point / quantile function (PPF) of the normal distribution.
+///
+/// Given p in [0, 1], return the corresponding quantile value in the normal
+/// distribution. This is the reciprocal of the cumulative distribution function.
+///
+/// If p is not in [0, 1], behavior is undefined.
+///
+/// This function is sometimes also called the probit function.
+ARROW_EXPORT
+double NormalPPF(double p);
+
+/// "Improved Kahan–Babuška algorithm" by Neumaier
+///
+/// https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements
+template <typename Range = std::initializer_list<double>>
+double NeumaierSum(Range&& inputs) {
+  double sum = 0, c = 0;
+  for (const double v : inputs) {
+    double t = sum + v;
+    if (std::isfinite(t)) {
+      if (std::abs(sum) >= std::abs(v)) {
+        // If sum is bigger, low-order digits of v are lost...
+        c += (sum - t) + v;
+      } else {
+        // ... else low-order digits of sum are lost.
+        c += (v - t) + sum;
+      }
+    }
+    sum = t;
+  }
+  return sum + c;
+}
+
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/util/math_test.cc b/cpp/src/arrow/util/math_test.cc
new file mode 100644
index 0000000000000..df36202d4cfde
--- /dev/null
+++ b/cpp/src/arrow/util/math_test.cc
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cmath>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/math_internal.h"
+
+namespace arrow::internal {
+
+TEST(NormalPPF, Basics) {
+  struct PPFTestCase {
+    double input;
+    double expected;
+  };
+  // Test vectors obtained using Scipy's norm.ppf
+  std::vector<PPFTestCase> cases = {
+      {0.0, -HUGE_VAL},
+      {0.001, -3.090232306167813},
+      {0.01, -2.3263478740408408},
+      {0.02, -2.053748910631823},
+      {0.03, -1.880793608151251},
+      {0.04, -1.75068607125217},
+      {0.05, -1.6448536269514729},
+      {0.06, -1.5547735945968535},
+      {0.07, -1.4757910281791706},
+      {0.08, -1.4050715603096329},
+      {0.09, -1.3407550336902165},
+      {0.1, -1.2815515655446004},
+      {0.2, -0.8416212335729142},
+      {0.3, -0.5244005127080409},
+      {0.4, -0.2533471031357997},
+      {0.5, 0.0},
+      {0.6, 0.2533471031357997},
+      {0.7, 0.5244005127080407},
+      {0.8, 0.8416212335729143},
+      {0.9, 1.2815515655446004},
+      {0.91, 1.3407550336902165},
+      {0.92, 1.4050715603096329},
+      {0.93, 1.475791028179171},
+      {0.94, 1.5547735945968535},
+      {0.95, 1.6448536269514722},
+      {0.96, 1.7506860712521692},
+      {0.97, 1.8807936081512509},
+      {0.98, 2.0537489106318225},
+      {0.99, 2.3263478740408408},
+      {0.999, 3.090232306167813},
+      {1.0, HUGE_VAL},
+  };
+  for (auto test_case : cases) {
+    ARROW_SCOPED_TRACE("p = ", test_case.input);
+    EXPECT_DOUBLE_EQ(NormalPPF(test_case.input), test_case.expected);
+  }
+  // Test vectors from https://doi.org/10.2307/2347330
+  cases = {
+      {0.25, -0.6744897501960817},
+      {0.001, -3.090232306167814},
+      {1e-20, -9.262340089798408},
+  };
+  for (auto test_case : cases) {
+    ARROW_SCOPED_TRACE("p = ", test_case.input);
+    EXPECT_DOUBLE_EQ(NormalPPF(test_case.input), test_case.expected);
+  }
+}
+
+TEST(NeumaierSum, Basics) {
+  ASSERT_EQ(0.0, NeumaierSum(std::vector<double>{}));
+  ASSERT_EQ(2.0, NeumaierSum({1., -2., 3.}));
+  ASSERT_EQ(HUGE_VAL, NeumaierSum({1., HUGE_VAL, 1.}));
+  ASSERT_EQ(-HUGE_VAL, NeumaierSum({1., -HUGE_VAL, 1.}));
+  ASSERT_TRUE(std::isnan(NeumaierSum({1., -HUGE_VAL, 1., HUGE_VAL})));
+  ASSERT_TRUE(std::isnan(NeumaierSum({1., NAN, 1.})));
+}
+
+TEST(NeumaierSum, Precision) {
+  // Test cases that would fail with a naive sum
+  ASSERT_EQ(2.0, NeumaierSum({1., 1e100, 1., -1e100}));
+  ASSERT_EQ(2.0, NeumaierSum({1., 1., 1e100, -1e100}));
+  ASSERT_EQ(2.0, NeumaierSum({1e100, -1e100, 1., 1.}));
+  ASSERT_EQ(2.0, NeumaierSum({-1e100, 1e100, 1., 1.}));
+  ASSERT_EQ(2.0, NeumaierSum({1., -1e100, 1e100, 1.}));
+  ASSERT_EQ(2.0, NeumaierSum({1., 1e100, -1e100, 1.}));
+
+  // NumPy code:
+  //   array = np.arange(321000, dtype='float64')
+  //   array -= np.mean(array)
+  //   array *= array
+  constexpr int N = 321000;
+  std::vector<double> values(N);
+  for (int i = 0; i < N; ++i) {
+    values[i] = (i - 160499.5) * (i - 160499.5);
+  }
+  // This is the exact result (as computed using np.sum,
+  // math.fsum or fractions.Fraction).
+  ASSERT_EQ(2756346749973250, NeumaierSum(values));
+}
+
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/util/meson.build b/cpp/src/arrow/util/meson.build
new file mode 100644
index 0000000000000..8105df958e41a
--- /dev/null
+++ b/cpp/src/arrow/util/meson.build
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+conf_data = configuration_data()
+
+conf_data.set('ARROW_VERSION_MAJOR', version_major)
+conf_data.set('ARROW_VERSION_MINOR', version_minor)
+conf_data.set('ARROW_VERSION_PATCH', version_patch)
+
+conf_data.set('ARROW_VERSION', arrow_version)
+conf_data.set('ARROW_VERSION_STRING', arrow_version)
+
+conf_data.set('ARROW_SO_VERSION', arrow_so_version)
+conf_data.set('ARROW_FULL_SO_VERSION', arrow_full_so_version)
+
+conf_data.set('CMAKE_CXX_COMPILER_ID', cpp_compiler.get_id())
+conf_data.set('CMAKE_CXX_COMPILER_VERSION', cpp_compiler.version())
+conf_data.set(
+    'CMAKE_CXX_FLAGS',
+    ' '.join(cpp_compiler.get_supported_arguments()),
+)
+
+conf_data.set('UPPERCASE_BUILD_TYPE', get_option('buildtype').to_upper())
+
+conf_data.set('ARROW_PACKAGE_KIND', get_option('package_kind'))
+
+conf_data.set('ARROW_COMPUTE', false)
+conf_data.set('ARROW_CSV', false)
+conf_data.set('ARROW_CUDA', false)
+conf_data.set('ARROW_DATASET', false)
+conf_data.set('ARROW_FILESYSTEM', false)
+conf_data.set('ARROW_FLIGHT', false)
+conf_data.set('ARROW_FLIGHT_SQL', false)
+conf_data.set('ARROW_IPC', false)
+conf_data.set('ARROW_JEMALLOC', false)
+conf_data.set('ARROW_JEMALLOC_VENDORED', false)
+conf_data.set('ARROW_JSON', false)
+conf_data.set('ARROW_MIMALLOC', false)
+conf_data.set('ARROW_ORC', false)
+conf_data.set('ARROW_PARQUET', false)
+conf_data.set('ARROW_SUBSTRAIT', false)
+conf_data.set('ARROW_AZURE', false)
+conf_data.set('ARROW_ENABLE_THREADING', false)
+conf_data.set('ARROW_GCS', false)
+conf_data.set('ARROW_HDFS', false)
+conf_data.set('ARROW_S3', false)
+conf_data.set('ARROW_USE_GLOG', false)
+conf_data.set('ARROW_USE_NATIVE_INT128', false)
+conf_data.set('ARROW_WITH_BROTLI', false)
+conf_data.set('ARROW_WITH_BZ2', false)
+conf_data.set('ARROW_WITH_LZ4', false)
+conf_data.set('ARROW_WITH_MUSL', false)
+conf_data.set('ARROW_WITH_OPENTELEMETRY', false)
+conf_data.set('ARROW_WITH_RE2', false)
+conf_data.set('ARROW_WITH_SNAPPY', false)
+conf_data.set('ARROW_WITH_UCX', false)
+conf_data.set('ARROW_WITH_UTF8PROC', false)
+conf_data.set('ARROW_WITH_ZLIB', false)
+conf_data.set('ARROW_WITH_ZSTD', false)
+conf_data.set('PARQUET_REQUIRE_ENCRYPTION', false)
+
+configure_file(
+    input: 'config.h.cmake',
+    output: 'config.h',
+    configuration: conf_data,
+    format: 'cmake@',
+    install: true,
+    install_dir: 'arrow/util',
+)
+
+internal_conf_data = configuration_data()
+
+internal_conf_data.set('ARROW_GIT_ID', git_id)
+internal_conf_data.set('ARROW_GIT_DESCRIPTION', git_description)
+
+configure_file(
+    input: 'config_internal.h.cmake',
+    output: 'config_internal.h',
+    configuration: internal_conf_data,
+    format: 'cmake@',
+)
diff --git a/cpp/src/arrow/util/type_fwd.h b/cpp/src/arrow/util/type_fwd.h
index 3174881f4d018..b8934ecbd4c22 100644
--- a/cpp/src/arrow/util/type_fwd.h
+++ b/cpp/src/arrow/util/type_fwd.h
@@ -67,4 +67,16 @@ class Codec;
 class Uri;
 }  // namespace util
 
+template <typename T>
+struct Enumerated {
+  T value;
+  int index;
+  bool last;
+
+  friend inline bool operator==(const Enumerated<T>& left, const Enumerated<T>& right) {
+    return left.index == right.index && left.last == right.last &&
+           left.value == right.value;
+  }
+};
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/vendored/datetime/README.md b/cpp/src/arrow/vendored/datetime/README.md
index 89132d9cba0f5..4399191b73724 100644
--- a/cpp/src/arrow/vendored/datetime/README.md
+++ b/cpp/src/arrow/vendored/datetime/README.md
@@ -23,6 +23,9 @@ of the above project.
 The following changes are made:
 - fix internal inclusion paths (from "date/xxx.h" to simply "xxx.h")
 - enclose the `date` namespace inside the `arrow_vendored` namespace
+- fix 4 declarations like `CONSTCD11 date::day  operator "" _d(unsigned long long d) NOEXCEPT;`
+  to not have offending whitespace for modern clang:
+  `CONSTCD11 date::day  operator ""_d(unsigned long long d) NOEXCEPT;` 
 
 ## How to update
 
diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h
index c17d6f3f7aa54..1b06182a6daa3 100644
--- a/cpp/src/arrow/vendored/datetime/date.h
+++ b/cpp/src/arrow/vendored/datetime/date.h
@@ -963,8 +963,8 @@ operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last&
 inline namespace literals
 {
 
-CONSTCD11 date::day  operator "" _d(unsigned long long d) NOEXCEPT;
-CONSTCD11 date::year operator "" _y(unsigned long long y) NOEXCEPT;
+CONSTCD11 date::day  operator ""_d(unsigned long long d) NOEXCEPT;
+CONSTCD11 date::year operator ""_y(unsigned long long y) NOEXCEPT;
 
 }  // inline namespace literals
 #endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
@@ -1972,7 +1972,7 @@ inline namespace literals
 CONSTCD11
 inline
 date::day
-operator "" _d(unsigned long long d) NOEXCEPT
+operator ""_d(unsigned long long d) NOEXCEPT
 {
     return date::day{static_cast<unsigned>(d)};
 }
@@ -1980,7 +1980,7 @@ operator "" _d(unsigned long long d) NOEXCEPT
 CONSTCD11
 inline
 date::year
-operator "" _y(unsigned long long y) NOEXCEPT
+operator ""_y(unsigned long long y) NOEXCEPT
 {
     return date::year(static_cast<int>(y));
 }
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 47a00016b94b0..27cb849365ca7 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -4296,6 +4296,108 @@ TEST(TestArrowReaderAdHoc, ReadFloat16Files) {
   }
 }
 
+TEST(TestArrowFileReader, RecordBatchReaderEmptyRowGroups) {
+  const int num_columns = 1;
+  const int num_rows = 3;
+  const int num_chunks = 1;
+
+  std::shared_ptr<Table> table;
+  ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, num_chunks, &table));
+
+  const int64_t row_group_size = num_rows;
+  std::shared_ptr<Buffer> buffer;
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
+                                             default_arrow_writer_properties(), &buffer));
+
+  auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
+  std::unique_ptr<FileReader> file_reader;
+  ASSERT_OK(
+      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  // This is the important part in this test.
+  std::vector<int> row_group_indices = {};
+  ASSERT_OK_AND_ASSIGN(auto record_batch_reader,
+                       file_reader->GetRecordBatchReader(row_group_indices));
+  std::shared_ptr<::arrow::RecordBatch> record_batch;
+  ASSERT_OK(record_batch_reader->ReadNext(&record_batch));
+  // No read record batch for empty row groups request.
+  ASSERT_FALSE(record_batch);
+}
+
+TEST(TestArrowFileReader, RecordBatchReaderEmptyInput) {
+  const int num_columns = 1;
+  // This is the important part in this test.
+  const int num_rows = 0;
+  const int num_chunks = 1;
+
+  std::shared_ptr<Table> table;
+  ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, num_chunks, &table));
+
+  const int64_t row_group_size = num_rows;
+  std::shared_ptr<Buffer> buffer;
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
+                                             default_arrow_writer_properties(), &buffer));
+
+  auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
+  std::unique_ptr<FileReader> file_reader;
+  ASSERT_OK(
+      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  ASSERT_OK_AND_ASSIGN(auto record_batch_reader, file_reader->GetRecordBatchReader());
+  std::shared_ptr<::arrow::RecordBatch> record_batch;
+  ASSERT_OK(record_batch_reader->ReadNext(&record_batch));
+  // No read record batch for empty data.
+  ASSERT_FALSE(record_batch);
+}
+
+TEST(TestArrowColumnReader, NextBatchZeroBatchSize) {
+  const int num_columns = 1;
+  const int num_rows = 3;
+  const int num_chunks = 1;
+
+  std::shared_ptr<Table> table;
+  ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, num_chunks, &table));
+
+  const int64_t row_group_size = num_rows;
+  std::shared_ptr<Buffer> buffer;
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
+                                             default_arrow_writer_properties(), &buffer));
+
+  auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
+  std::unique_ptr<FileReader> file_reader;
+  ASSERT_OK(
+      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  std::unique_ptr<arrow::ColumnReader> column_reader;
+  ASSERT_OK(file_reader->GetColumn(0, &column_reader));
+  std::shared_ptr<ChunkedArray> chunked_array;
+  // This is the important part in this test.
+  ASSERT_OK(column_reader->NextBatch(0, &chunked_array));
+  ASSERT_EQ(0, chunked_array->length());
+}
+
+TEST(TestArrowColumnReader, NextBatchEmptyInput) {
+  const int num_columns = 1;
+  // This is the important part in this test.
+  const int num_rows = 0;
+  const int num_chunks = 1;
+
+  std::shared_ptr<Table> table;
+  ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, num_chunks, &table));
+
+  const int64_t row_group_size = num_rows;
+  std::shared_ptr<Buffer> buffer;
+  ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size,
+                                             default_arrow_writer_properties(), &buffer));
+
+  auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer));
+  std::unique_ptr<FileReader> file_reader;
+  ASSERT_OK(
+      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  std::unique_ptr<arrow::ColumnReader> column_reader;
+  ASSERT_OK(file_reader->GetColumn(0, &column_reader));
+  std::shared_ptr<ChunkedArray> chunked_array;
+  ASSERT_OK(column_reader->NextBatch(10, &chunked_array));
+  ASSERT_EQ(0, chunked_array->length());
+}
+
 // direct-as-possible translation of
 // pyarrow/tests/test_parquet.py::test_validate_schema_write_table
 TEST(TestArrowWriterAdHoc, SchemaMismatch) {
diff --git a/cpp/src/parquet/arrow/arrow_statistics_test.cc b/cpp/src/parquet/arrow/arrow_statistics_test.cc
index a8e2287d37085..048518644c6eb 100644
--- a/cpp/src/parquet/arrow/arrow_statistics_test.cc
+++ b/cpp/src/parquet/arrow/arrow_statistics_test.cc
@@ -185,16 +185,17 @@ TEST(StatisticsTest, TruncateOnlyHalfMinMax) {
 
 namespace {
 ::arrow::Result<std::shared_ptr<::arrow::Array>> StatisticsReadArray(
-    std::shared_ptr<::arrow::DataType> data_type, std::shared_ptr<::arrow::Array> array) {
+    std::shared_ptr<::arrow::DataType> data_type, std::shared_ptr<::arrow::Array> array,
+    std::shared_ptr<WriterProperties> writer_properties = default_writer_properties(),
+    const ArrowReaderProperties& reader_properties = default_arrow_reader_properties()) {
   auto schema = ::arrow::schema({::arrow::field("column", data_type)});
   auto record_batch = ::arrow::RecordBatch::Make(schema, array->length(), {array});
   ARROW_ASSIGN_OR_RAISE(auto sink, ::arrow::io::BufferOutputStream::Create());
   const auto arrow_writer_properties =
       parquet::ArrowWriterProperties::Builder().store_schema()->build();
-  ARROW_ASSIGN_OR_RAISE(
-      auto writer,
-      FileWriter::Open(*schema, ::arrow::default_memory_pool(), sink,
-                       default_writer_properties(), arrow_writer_properties));
+  ARROW_ASSIGN_OR_RAISE(auto writer,
+                        FileWriter::Open(*schema, ::arrow::default_memory_pool(), sink,
+                                         writer_properties, arrow_writer_properties));
   ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*record_batch));
   ARROW_RETURN_NOT_OK(writer->Close());
   ARROW_ASSIGN_OR_RAISE(auto buffer, sink->Finish());
@@ -202,8 +203,8 @@ ::arrow::Result<std::shared_ptr<::arrow::Array>> StatisticsReadArray(
   auto reader =
       ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
   std::unique_ptr<FileReader> file_reader;
-  ARROW_RETURN_NOT_OK(
-      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  ARROW_RETURN_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
+                                       reader_properties, &file_reader));
   std::shared_ptr<::arrow::ChunkedArray> chunked_array;
   ARROW_RETURN_NOT_OK(file_reader->ReadColumn(0, &chunked_array));
   return chunked_array->chunk(0);
@@ -326,4 +327,44 @@ TEST(TestStatisticsRead, Duration) {
       ::arrow::duration(::arrow::TimeUnit::NANO));
 }
 
+TEST(TestStatisticsRead, MultipleRowGroupsDefault) {
+  auto arrow_type = ::arrow::int32();
+  auto built_array = ArrayFromJSON(arrow_type, R"([1, null, -1])");
+  auto writer_properties = WriterProperties::Builder().max_row_group_length(2)->build();
+  ASSERT_OK_AND_ASSIGN(
+      auto read_array,
+      StatisticsReadArray(arrow_type, std::move(built_array), writer_properties));
+  auto typed_read_array = std::static_pointer_cast<::arrow::Int32Array>(read_array);
+  auto statistics = typed_read_array->statistics();
+  ASSERT_EQ(nullptr, statistics);
+}
+
+TEST(TestStatisticsRead, MultipleRowGroupsShouldLoadStatistics) {
+  auto arrow_type = ::arrow::int32();
+  auto built_array = ArrayFromJSON(arrow_type, R"([1, null, -1])");
+  auto writer_properties = WriterProperties::Builder().max_row_group_length(2)->build();
+  ArrowReaderProperties reader_properties;
+  reader_properties.set_should_load_statistics(true);
+  ASSERT_OK_AND_ASSIGN(auto read_array,
+                       StatisticsReadArray(arrow_type, std::move(built_array),
+                                           writer_properties, reader_properties));
+  // If we use should_load_statistics, reader doesn't load multiple
+  // row groups at once. So the first array in the read chunked array
+  // has only 2 elements.
+  ASSERT_EQ(2, read_array->length());
+  auto typed_read_array = std::static_pointer_cast<::arrow::Int32Array>(read_array);
+  auto statistics = typed_read_array->statistics();
+  ASSERT_NE(nullptr, statistics);
+  ASSERT_EQ(true, statistics->null_count.has_value());
+  ASSERT_EQ(1, statistics->null_count.value());
+  ASSERT_EQ(false, statistics->distinct_count.has_value());
+  ASSERT_EQ(true, statistics->min.has_value());
+  // This is not -1 because this array has only the first 2 elements.
+  ASSERT_EQ(1, std::get<int64_t>(*statistics->min));
+  ASSERT_EQ(true, statistics->is_min_exact);
+  ASSERT_EQ(true, statistics->max.has_value());
+  ASSERT_EQ(1, std::get<int64_t>(*statistics->max));
+  ASSERT_EQ(true, statistics->is_max_exact);
+}
+
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 465ad5844d31a..03b725beb2a01 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -218,6 +218,7 @@ class FileReaderImpl : public FileReader {
     ctx->iterator_factory = SomeRowGroupsFactory(row_groups);
     ctx->filter_leaves = true;
     ctx->included_leaves = included_leaves;
+    ctx->reader_properties = &reader_properties_;
     return GetReader(manifest_.schema_fields[i], ctx, out);
   }
 
@@ -475,6 +476,8 @@ class LeafReader : public ColumnReaderImpl {
     record_reader_->Reset();
     // Pre-allocation gives much better performance for flat columns
     record_reader_->Reserve(records_to_read);
+    const bool should_load_statistics = ctx_->reader_properties->should_load_statistics();
+    int64_t num_target_row_groups = 0;
     while (records_to_read > 0) {
       if (!record_reader_->HasMoreData()) {
         break;
@@ -483,11 +486,21 @@ class LeafReader : public ColumnReaderImpl {
       records_to_read -= records_read;
       if (records_read == 0) {
         NextRowGroup();
+      } else {
+        num_target_row_groups++;
+        // We can't mix multiple row groups when we load statistics
+        // because statistics are associated with a row group. If we
+        // want to mix multiple row groups and keep valid statistics,
+        // we need to implement a statistics merge logic.
+        if (should_load_statistics) {
+          break;
+        }
       }
     }
-    RETURN_NOT_OK(TransferColumnData(record_reader_.get(),
-                                     input_->column_chunk_metadata(), field_, descr_,
-                                     ctx_.get(), &out_));
+    RETURN_NOT_OK(TransferColumnData(
+        record_reader_.get(),
+        num_target_row_groups == 1 ? input_->column_chunk_metadata() : nullptr, field_,
+        descr_, ctx_.get(), &out_));
     return Status::OK();
     END_PARQUET_CATCH_EXCEPTIONS
   }
@@ -1214,6 +1227,7 @@ Status FileReaderImpl::GetColumn(int i, FileColumnIteratorFactory iterator_facto
   ctx->pool = pool_;
   ctx->iterator_factory = iterator_factory;
   ctx->filter_leaves = false;
+  ctx->reader_properties = &reader_properties_;
   std::unique_ptr<ColumnReaderImpl> result;
   RETURN_NOT_OK(GetReader(manifest_.schema_fields[i], ctx, &result));
   *out = std::move(result);
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index 9d3171ea1a95d..59fe2b4600209 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -322,6 +322,10 @@ template <typename ArrowType, typename ParquetType>
 void AttachStatistics(::arrow::ArrayData* data,
                       std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
                       const ReaderContext* ctx) {
+  if (!metadata) {
+    return;
+  }
+
   using ArrowCType = typename ArrowType::c_type;
 
   auto statistics = metadata->statistics().get();
diff --git a/cpp/src/parquet/arrow/reader_internal.h b/cpp/src/parquet/arrow/reader_internal.h
index fab56c888045d..4ee3bf98bc54c 100644
--- a/cpp/src/parquet/arrow/reader_internal.h
+++ b/cpp/src/parquet/arrow/reader_internal.h
@@ -117,6 +117,7 @@ struct ReaderContext {
   FileColumnIteratorFactory iterator_factory;
   bool filter_leaves;
   std::shared_ptr<std::unordered_set<int>> included_leaves;
+  ArrowReaderProperties* reader_properties;
 
   bool IncludesLeaf(int leaf_index) const {
     if (this->filter_leaves) {
diff --git a/cpp/src/parquet/arrow/test_util.h b/cpp/src/parquet/arrow/test_util.h
index c8fcbbb65d1b6..cfc57ce6ea743 100644
--- a/cpp/src/parquet/arrow/test_util.h
+++ b/cpp/src/parquet/arrow/test_util.h
@@ -229,7 +229,9 @@ ::arrow::enable_if_floating_point<ArrowType, Status> NullableArray(
   }
 
   ::arrow::NumericBuilder<ArrowType> builder;
-  RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
+  if (values.size() > 0) {
+    RETURN_NOT_OK(builder.AppendValues(values.data(), values.size(), valid_bytes.data()));
+  }
   return builder.Finish(out);
 }
 
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 4998e6f301a00..393bf8a162367 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1032,13 +1032,15 @@ void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size,
                                        const std::shared_ptr<Buffer>& values) {
   // Compress the values if needed. Repetition and definition levels are uncompressed in
   // V2.
-  std::shared_ptr<Buffer> compressed_values;
-  if (pager_->has_compressor()) {
+  bool page_is_compressed = false;
+  if (pager_->has_compressor() && values->size() > 0) {
     pager_->Compress(*values, compressor_temp_buffer_.get());
-    compressed_values = compressor_temp_buffer_;
-  } else {
-    compressed_values = values;
+    if (compressor_temp_buffer_->size() < values->size()) {
+      page_is_compressed = true;
+    }
   }
+  std::shared_ptr<Buffer> compressed_values =
+      (page_is_compressed ? compressor_temp_buffer_ : values);
 
   // Concatenate uncompressed levels and the possibly compressed values
   int64_t combined_size =
@@ -1071,14 +1073,14 @@ void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size,
                             combined->CopySlice(0, combined->size(), allocator_));
     std::unique_ptr<DataPage> page_ptr = std::make_unique<DataPageV2>(
         combined, num_values, null_count, num_rows, encoding_, def_levels_byte_length,
-        rep_levels_byte_length, uncompressed_size, pager_->has_compressor(),
+        rep_levels_byte_length, uncompressed_size, page_is_compressed,
         std::move(page_stats), first_row_index, std::move(page_size_stats));
     total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader);
     data_pages_.push_back(std::move(page_ptr));
   } else {
     DataPageV2 page(combined, num_values, null_count, num_rows, encoding_,
                     def_levels_byte_length, rep_levels_byte_length, uncompressed_size,
-                    pager_->has_compressor(), std::move(page_stats), first_row_index,
+                    page_is_compressed, std::move(page_stats), first_row_index,
                     std::move(page_size_stats));
     WriteDataPage(page);
   }
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index 744859cf0f037..41c4a3223e10a 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -410,7 +410,7 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
 
   const ColumnDescriptor* descr_;
 
- private:
+ protected:
   std::unique_ptr<ColumnChunkMetaDataBuilder> metadata_;
   std::shared_ptr<::arrow::io::BufferOutputStream> sink_;
   std::shared_ptr<WriterProperties> writer_properties_;
@@ -1807,5 +1807,50 @@ TEST_F(TestValuesWriterInt32Type, AllNullsCompressionInPageV2) {
   }
 }
 
+#ifdef ARROW_WITH_ZSTD
+
+TEST_F(TestValuesWriterInt32Type, AvoidCompressedInDataPageV2) {
+  Compression::type compression = Compression::ZSTD;
+  auto verify_only_one_uncompressed_page = [&](int total_num_values) {
+    ColumnProperties column_properties;
+    column_properties.set_compression(compression);
+
+    auto writer =
+        this->BuildWriter(SMALL_SIZE, column_properties, ParquetVersion::PARQUET_2_LATEST,
+                          ParquetDataPageVersion::V2);
+    writer->WriteBatch(static_cast<int64_t>(values_.size()), this->def_levels_.data(),
+                       nullptr, this->values_ptr_);
+    writer->Close();
+    ASSERT_OK_AND_ASSIGN(auto buffer, this->sink_->Finish());
+    auto source = std::make_shared<::arrow::io::BufferReader>(buffer);
+    ReaderProperties readerProperties;
+    std::unique_ptr<PageReader> page_reader = PageReader::Open(
+        std::move(source), total_num_values, compression, readerProperties);
+    auto data_page = std::static_pointer_cast<DataPageV2>(page_reader->NextPage());
+    ASSERT_TRUE(data_page != nullptr);
+    ASSERT_FALSE(data_page->is_compressed());
+    ASSERT_TRUE(page_reader->NextPage() == nullptr);
+  };
+  {
+    // zero-sized data buffer should be handled correctly.
+    this->SetUpSchema(Repetition::OPTIONAL);
+    this->GenerateData(SMALL_SIZE);
+    std::fill(this->def_levels_.begin(), this->def_levels_.end(), 0);
+    verify_only_one_uncompressed_page(SMALL_SIZE);
+  }
+  {
+    // When only compress little data, the compressed size would even be
+    // larger than the original size. In this case, the `is_compressed` flag
+    // should be set to false.
+    this->SetUpSchema(Repetition::OPTIONAL);
+    this->GenerateData(1);
+    std::fill(this->def_levels_.begin(), this->def_levels_.end(), 1);
+    values_[0] = 142857;
+    verify_only_one_uncompressed_page(/*total_num_values=*/1);
+  }
+}
+
+#endif
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc
index 731120d9a6396..1794c3412c1bf 100644
--- a/cpp/src/parquet/encryption/encryption.cc
+++ b/cpp/src/parquet/encryption/encryption.cc
@@ -217,6 +217,75 @@ FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::encrypted_
   return this;
 }
 
+void FileEncryptionProperties::encrypt_schema(const SchemaDescriptor& schema) {
+  // Check that all columns in columnEncryptionProperties exist in the schema.
+  // Copy the encrypted_columns map as we are going to modify it while iterating it
+  auto encrypted_columns = ColumnPathToEncryptionPropertiesMap(encrypted_columns_);
+  // if columnEncryptionProperties is empty, every column in file schema will be
+  // encrypted with footer key.
+  if (encrypted_columns.size() != 0) {
+    std::vector<std::pair<std::string, std::string>> column_path_vec;
+    // First, memorize all column or schema paths of the schema as dot-strings.
+    for (int i = 0; i < schema.num_columns(); i++) {
+      auto column = schema.Column(i);
+      auto column_path = column->path()->ToDotString();
+      auto schema_path = column->schema_path()->ToDotString();
+      column_path_vec.emplace_back(column_path, column_path);
+      if (schema_path != column_path) {
+        column_path_vec.emplace_back(schema_path, column_path);
+      }
+    }
+    // Sort them alphabetically, so that we can use binary-search and look up parent
+    // columns.
+    std::sort(column_path_vec.begin(), column_path_vec.end());
+
+    // Check if encrypted column exists in schema, or if it is a parent field of a column.
+    for (const auto& elem : encrypted_columns) {
+      auto& encrypted_column = elem.first;
+      auto encrypted_column_prefix = encrypted_column + ".";
+      auto encrypted_column_prefix_len = encrypted_column_prefix.size();
+
+      // first we look up encrypted_columns as
+      // find first column that equals encrypted_column or starts with encrypted_column
+      auto it = std::lower_bound(
+          column_path_vec.begin(), column_path_vec.end(), encrypted_column,
+          [&](const std::pair<std::string, std::string>& item, const std::string& term) {
+            return item.first < term;
+          });
+      bool matches = false;
+
+      // encrypted_column encrypts column 'it' when 'it' is either equal to
+      // encrypted_column, or 'it' starts with encrypted_column_prefix,
+      // i.e. encrypted_column followed by a '.'
+      while (it != column_path_vec.end() &&
+             (it->first == encrypted_column ||
+               // C++20: can be replaced with it->first.starts_with(encrypted_column_prefix)
+              it->first.compare(0, encrypted_column_prefix_len,
+                                encrypted_column_prefix) == 0)) {
+        // count columns encrypted by encrypted_column
+        matches = true;
+
+        // add column 'it' to file_encryption_properties.encrypted_columns
+        // when encrypted_column is a parent column
+        if (it->second != encrypted_column) {
+          encrypted_columns_.erase(encrypted_column);
+          encrypted_columns_.emplace(it->second, elem.second);
+        }
+
+        // move to next match
+        ++it;
+      }
+
+      // check encrypted_column matches any existing column
+      if (!matches) {
+        std::stringstream ss;
+        ss << "Encrypted column " + encrypted_column + " not in file schema";
+        throw ParquetException(ss.str());
+      }
+    }
+  }
+}
+
 void FileEncryptionProperties::WipeOutEncryptionKeys() {
   footer_key_.clear();
   for (const auto& element : encrypted_columns_) {
diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h
index 1ddef9e8236db..99d8aca7e6811 100644
--- a/cpp/src/parquet/encryption/encryption.h
+++ b/cpp/src/parquet/encryption/encryption.h
@@ -498,6 +498,16 @@ class PARQUET_EXPORT FileEncryptionProperties {
     return encrypted_columns_;
   }
 
+  /// All columns in encrypted_columns must refer to columns in the given schema.
+  /// They can also refer to parent fields if schema contains nested fields. Then
+  /// all those nested fields of a matching parent field are encrypted by the same key.
+  /// This modifies encrypted_columns to reflect this.
+  ///
+  /// Columns in encrypted_columns can refer to the parquet column paths as well as the
+  /// schema paths of columns. Those are usually identical, except for nested fields of
+  /// lists and maps.
+  void encrypt_schema(const SchemaDescriptor& schema);
+
  private:
   EncryptionAlgorithm algorithm_;
   std::string footer_key_;
diff --git a/cpp/src/parquet/encryption/write_configurations_test.cc b/cpp/src/parquet/encryption/write_configurations_test.cc
index f27da82694874..031ece1b521e4 100644
--- a/cpp/src/parquet/encryption/write_configurations_test.cc
+++ b/cpp/src/parquet/encryption/write_configurations_test.cc
@@ -223,6 +223,145 @@ TEST_F(TestEncryptionConfiguration, EncryptTwoColumnsAndFooterUseAES_GCM_CTR) {
                         "tmp_encrypt_columns_and_footer_ctr.parquet.encrypted"));
 }
 
+TEST(TestFileEncryptionProperties, EncryptSchema) {
+  std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey);
+  std::string kColumnEncryptionKey_ = std::string(kColumnEncryptionKey1);
+
+  std::map<std::string, std::shared_ptr<parquet::ColumnEncryptionProperties>>
+      encryption_cols;
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_21("a_map");
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_22("a_list");
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_23("a_struct");
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_24("b_map.key");
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_25(
+      "b_map.key_value.value");
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_26(
+      "b_list.list.element");
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_27("b_struct.f1");
+  parquet::ColumnEncryptionProperties::Builder encryption_col_builder_28(
+      "c_list.element");
+
+  encryption_col_builder_21.key(kColumnEncryptionKey_)->key_id("kc1");
+  encryption_col_builder_22.key(kColumnEncryptionKey_)->key_id("kc1");
+  encryption_col_builder_23.key(kColumnEncryptionKey_)->key_id("kc1");
+  encryption_col_builder_24.key(kColumnEncryptionKey_)->key_id("kc1");
+  encryption_col_builder_25.key(kColumnEncryptionKey_)->key_id("kc1");
+  encryption_col_builder_26.key(kColumnEncryptionKey_)->key_id("kc1");
+  encryption_col_builder_27.key(kColumnEncryptionKey_)->key_id("kc1");
+  encryption_col_builder_28.key(kColumnEncryptionKey_)->key_id("kc1");
+
+  encryption_cols["a_map"] = encryption_col_builder_21.build();
+  encryption_cols["a_list"] = encryption_col_builder_22.build();
+  encryption_cols["a_struct"] = encryption_col_builder_23.build();
+  encryption_cols["b_map.key"] = encryption_col_builder_24.build();
+  encryption_cols["b_map.key_value.value"] = encryption_col_builder_25.build();
+  encryption_cols["b_list.list.element"] = encryption_col_builder_26.build();
+  encryption_cols["b_struct.f1"] = encryption_col_builder_27.build();
+  encryption_cols["c_list.element"] = encryption_col_builder_28.build();
+
+  parquet::FileEncryptionProperties::Builder file_encryption_builder(
+      kFooterEncryptionKey_);
+  file_encryption_builder.encrypted_columns(encryption_cols);
+  auto encryption_configurations = file_encryption_builder.build();
+
+  auto a_key = parquet::schema::PrimitiveNode::Make("key", Repetition::REQUIRED,
+                                                    Type::INT32, ConvertedType::INT_32);
+  auto a_value = parquet::schema::PrimitiveNode::Make(
+      "value", Repetition::OPTIONAL, Type::BYTE_ARRAY, ConvertedType::UTF8);
+  auto a_key_value = parquet::schema::GroupNode::Make(
+      "key_value", Repetition::REPEATED, {a_key, a_value}, ConvertedType::NONE);
+  auto a_map = parquet::schema::GroupNode::Make("a_map", Repetition::OPTIONAL,
+                                                {a_key_value}, ConvertedType::MAP);
+
+  auto a_list_elem = parquet::schema::PrimitiveNode::Make(
+      "element", Repetition::OPTIONAL, Type::INT32, ConvertedType::INT_32);
+  auto a_list_list = parquet::schema::GroupNode::Make("list", Repetition::REPEATED,
+                                                      {a_list_elem}, ConvertedType::NONE);
+  auto a_list = parquet::schema::GroupNode::Make("a_list", Repetition::OPTIONAL,
+                                                 {a_list_list}, ConvertedType::LIST);
+
+  auto a_struct_f1 = parquet::schema::PrimitiveNode::Make(
+      "f1", Repetition::OPTIONAL, Type::INT32, ConvertedType::INT_32);
+  auto a_struct_f2 = parquet::schema::PrimitiveNode::Make(
+      "f2", Repetition::OPTIONAL, Type::INT64, ConvertedType::INT_64);
+  auto a_struct = parquet::schema::GroupNode::Make(
+      "a_struct", Repetition::OPTIONAL, {a_struct_f1, a_struct_f2}, ConvertedType::NONE);
+
+  auto b_key = parquet::schema::PrimitiveNode::Make("key", Repetition::REQUIRED,
+                                                    Type::INT32, ConvertedType::INT_32);
+  auto b_value = parquet::schema::PrimitiveNode::Make(
+      "value", Repetition::OPTIONAL, Type::BYTE_ARRAY, ConvertedType::UTF8);
+  auto b_key_value = parquet::schema::GroupNode::Make(
+      "key_value", Repetition::REPEATED, {b_key, b_value}, ConvertedType::NONE);
+  auto b_map = parquet::schema::GroupNode::Make("b_map", Repetition::OPTIONAL,
+                                                {b_key_value}, ConvertedType::MAP);
+
+  auto b_list_elem = parquet::schema::PrimitiveNode::Make(
+      "element", Repetition::OPTIONAL, Type::INT32, ConvertedType::INT_32);
+  auto b_list_list = parquet::schema::GroupNode::Make("list", Repetition::REPEATED,
+                                                      {b_list_elem}, ConvertedType::NONE);
+  auto b_list = parquet::schema::GroupNode::Make("b_list", Repetition::OPTIONAL,
+                                                 {b_list_list}, ConvertedType::LIST);
+
+  auto b_struct_f1 = parquet::schema::PrimitiveNode::Make(
+      "f1", Repetition::OPTIONAL, Type::INT32, ConvertedType::INT_32);
+  auto b_struct_f2 = parquet::schema::PrimitiveNode::Make(
+      "f2", Repetition::OPTIONAL, Type::INT64, ConvertedType::INT_64);
+  auto b_struct = parquet::schema::GroupNode::Make(
+      "b_struct", Repetition::OPTIONAL, {b_struct_f1, b_struct_f2}, ConvertedType::NONE);
+
+  auto c_list_elem = parquet::schema::PrimitiveNode::Make(
+      "element", Repetition::OPTIONAL, Type::INT32, ConvertedType::INT_32);
+  auto c_list_list = parquet::schema::GroupNode::Make("list", Repetition::REPEATED,
+                                                      {c_list_elem}, ConvertedType::NONE);
+  auto c_list = parquet::schema::GroupNode::Make("c_list", Repetition::OPTIONAL,
+                                                 {c_list_list}, ConvertedType::LIST);
+
+  auto a_structs_f1 = parquet::schema::PrimitiveNode::Make(
+      "f1", Repetition::OPTIONAL, Type::INT32, ConvertedType::INT_32);
+  auto a_structs_f2 = parquet::schema::PrimitiveNode::Make(
+      "f2", Repetition::OPTIONAL, Type::INT64, ConvertedType::INT_64);
+  auto a_structs =
+      parquet::schema::GroupNode::Make("a_structs", Repetition::OPTIONAL,
+                                       {a_structs_f1, a_structs_f2}, ConvertedType::NONE);
+
+  auto schema = parquet::schema::GroupNode::Make(
+      "schema", Repetition::REQUIRED,
+      {a_map, a_list, a_struct, b_map, b_list, b_struct, c_list, a_structs});
+
+  SchemaDescriptor descr;
+  descr.Init(schema);
+
+  // original configuration as set above
+  auto cols = encryption_configurations->encrypted_columns();
+  ASSERT_EQ(cols.at("a_map")->column_path(), "a_map");
+  ASSERT_EQ(cols.at("a_list")->column_path(), "a_list");
+  ASSERT_EQ(cols.at("a_struct")->column_path(), "a_struct");
+  ASSERT_EQ(cols.at("b_map.key")->column_path(), "b_map.key");
+  ASSERT_EQ(cols.at("b_map.key_value.value")->column_path(), "b_map.key_value.value");
+  ASSERT_EQ(cols.at("b_list.list.element")->column_path(), "b_list.list.element");
+  ASSERT_EQ(cols.at("b_struct.f1")->column_path(), "b_struct.f1");
+  ASSERT_EQ(cols.at("c_list.element")->column_path(), "c_list.element");
+  ASSERT_EQ(cols.size(), 8);
+
+  encryption_configurations->encrypt_schema(descr);
+
+  // the updated configuration where parent fields have been replaced with all their leaf
+  // fields
+  cols = encryption_configurations->encrypted_columns();
+  ASSERT_EQ(cols.at("a_map.key_value.key")->column_path(), "a_map");
+  ASSERT_EQ(cols.at("a_map.key_value.value")->column_path(), "a_map");
+  ASSERT_EQ(cols.at("a_list.list.element")->column_path(), "a_list");
+  ASSERT_EQ(cols.at("a_struct.f1")->column_path(), "a_struct");
+  ASSERT_EQ(cols.at("a_struct.f2")->column_path(), "a_struct");
+  ASSERT_EQ(cols.at("b_map.key_value.key")->column_path(), "b_map.key");
+  ASSERT_EQ(cols.at("b_map.key_value.value")->column_path(), "b_map.key_value.value");
+  ASSERT_EQ(cols.at("b_list.list.element")->column_path(), "b_list.list.element");
+  ASSERT_EQ(cols.at("b_struct.f1")->column_path(), "b_struct.f1");
+  ASSERT_EQ(cols.at("c_list.list.element")->column_path(), "c_list.element");
+  ASSERT_EQ(cols.size(), 10);
+}
+
 // Set temp_dir before running the write/read tests. The encrypted files will
 // be written/read from this directory.
 void TestEncryptionConfiguration::SetUpTestCase() {
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 1f3ca8b91054b..ed0879c1f1fa5 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -923,6 +923,16 @@ void ParquetFileReader::PreBuffer(const std::vector<int>& row_groups,
   file->PreBuffer(row_groups, column_indices, ctx, options);
 }
 
+::arrow::Result<std::vector<::arrow::io::ReadRange>> ParquetFileReader::GetReadRanges(
+    const std::vector<int>& row_groups, const std::vector<int>& column_indices,
+    int64_t hole_size_limit, int64_t range_size_limit) {
+  // Access private methods here
+  SerializedFile* file =
+      ::arrow::internal::checked_cast<SerializedFile*>(contents_.get());
+  return file->GetReadRanges(row_groups, column_indices, hole_size_limit,
+                             range_size_limit);
+}
+
 ::arrow::Future<> ParquetFileReader::WhenBuffered(
     const std::vector<int>& row_groups, const std::vector<int>& column_indices) const {
   // Access private methods here
diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index f80a095a13587..16b582585d9ee 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -481,26 +481,9 @@ class FileSerializer : public ParquetFileWriter::Contents {
       // Unencrypted parquet files always start with PAR1
       PARQUET_THROW_NOT_OK(sink_->Write(kParquetMagic, 4));
     } else {
-      // Check that all columns in columnEncryptionProperties exist in the schema.
-      auto encrypted_columns = file_encryption_properties->encrypted_columns();
-      // if columnEncryptionProperties is empty, every column in file schema will be
-      // encrypted with footer key.
-      if (encrypted_columns.size() != 0) {
-        std::vector<std::string> column_path_vec;
-        // First, save all column paths in schema.
-        for (int i = 0; i < num_columns(); i++) {
-          column_path_vec.push_back(schema_.Column(i)->path()->ToDotString());
-        }
-        // Check if column exists in schema.
-        for (const auto& elem : encrypted_columns) {
-          auto it = std::find(column_path_vec.begin(), column_path_vec.end(), elem.first);
-          if (it == column_path_vec.end()) {
-            std::stringstream ss;
-            ss << "Encrypted column " + elem.first + " not in file schema";
-            throw ParquetException(ss.str());
-          }
-        }
-      }
+      // make the file encryption encrypt this schema
+      // this modifies file_encryption_properties->encrypted_columns()
+      file_encryption_properties->encrypt_schema(schema_);
 
       file_encryptor_ = std::make_unique<InternalFileEncryptor>(
           file_encryption_properties, properties_->memory_pool());
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 8ae3660014f76..19436b84a379b 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -913,7 +913,8 @@ class PARQUET_EXPORT ArrowReaderProperties {
         pre_buffer_(true),
         cache_options_(::arrow::io::CacheOptions::LazyDefaults()),
         coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO),
-        arrow_extensions_enabled_(false) {}
+        arrow_extensions_enabled_(false),
+        should_load_statistics_(false) {}
 
   /// \brief Set whether to use the IO thread pool to parse columns in parallel.
   ///
@@ -996,6 +997,15 @@ class PARQUET_EXPORT ArrowReaderProperties {
   }
   bool get_arrow_extensions_enabled() const { return arrow_extensions_enabled_; }
 
+  /// \brief Set whether to load statistics as much as possible.
+  ///
+  /// Default is false.
+  void set_should_load_statistics(bool should_load_statistics) {
+    should_load_statistics_ = should_load_statistics;
+  }
+  /// Return whether loading statistics as much as possible.
+  bool should_load_statistics() const { return should_load_statistics_; }
+
  private:
   bool use_threads_;
   std::unordered_set<int> read_dict_indices_;
@@ -1005,6 +1015,7 @@ class PARQUET_EXPORT ArrowReaderProperties {
   ::arrow::io::CacheOptions cache_options_;
   ::arrow::TimeUnit::type coerce_int96_timestamp_unit_;
   bool arrow_extensions_enabled_;
+  bool should_load_statistics_;
 };
 
 /// EXPERIMENTAL: Constructs the default ArrowReaderProperties
diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc
index 47fa72d829658..3c0f8ba0bdf5c 100644
--- a/cpp/src/parquet/schema.cc
+++ b/cpp/src/parquet/schema.cc
@@ -67,11 +67,23 @@ std::shared_ptr<ColumnPath> ColumnPath::FromDotString(const std::string& dotstri
 }
 
 std::shared_ptr<ColumnPath> ColumnPath::FromNode(const Node& node) {
+  return FromNode(node, false);
+}
+
+std::shared_ptr<ColumnPath> ColumnPath::FromNode(const Node& node, bool schema_path) {
   // Build the path in reverse order as we traverse the nodes to the top
   std::vector<std::string> rpath_;
   const Node* cursor = &node;
   // The schema node is not part of the ColumnPath
   while (cursor->parent()) {
+    if (schema_path &&
+        (
+            // nested fields in arrow schema do not know these intermediate nodes
+            cursor->parent()->converted_type() == ConvertedType::MAP ||
+            cursor->parent()->converted_type() == ConvertedType::LIST)) {
+      cursor = cursor->parent();
+      continue;
+    }
     rpath_.push_back(cursor->name());
     cursor = cursor->parent();
   }
@@ -113,6 +125,10 @@ const std::shared_ptr<ColumnPath> Node::path() const {
   return ColumnPath::FromNode(*this);
 }
 
+const std::shared_ptr<ColumnPath> Node::schema_path() const {
+  return ColumnPath::FromNode(*this, true);
+}
+
 bool Node::EqualsInternal(const Node* other) const {
   return type_ == other->type_ && name_ == other->name_ &&
          repetition_ == other->repetition_ && converted_type_ == other->converted_type_ &&
@@ -960,4 +976,8 @@ const std::shared_ptr<ColumnPath> ColumnDescriptor::path() const {
   return primitive_node_->path();
 }
 
+const std::shared_ptr<ColumnPath> ColumnDescriptor::schema_path() const {
+  return primitive_node_->schema_path();
+}
+
 }  // namespace parquet
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index 1addc73bd367d..4c299ad0c9b74 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -84,6 +84,8 @@ class PARQUET_EXPORT ColumnPath {
 
   static std::shared_ptr<ColumnPath> FromDotString(const std::string& dotstring);
   static std::shared_ptr<ColumnPath> FromNode(const Node& node);
+  static std::shared_ptr<ColumnPath> FromNode(const Node& node,
+                                              bool filter_converted_types);
 
   std::shared_ptr<ColumnPath> extend(const std::string& node_name) const;
   std::string ToDotString() const;
@@ -132,6 +134,8 @@ class PARQUET_EXPORT Node {
 
   const std::shared_ptr<ColumnPath> path() const;
 
+  const std::shared_ptr<ColumnPath> schema_path() const;
+
   virtual void ToParquet(void* element) const = 0;
 
   // Node::Visitor abstract class for walking schemas with the visitor pattern
@@ -386,6 +390,8 @@ class PARQUET_EXPORT ColumnDescriptor {
 
   const std::shared_ptr<schema::ColumnPath> path() const;
 
+  const std::shared_ptr<schema::ColumnPath> schema_path() const;
+
   const schema::NodePtr& schema_node() const { return node_; }
 
   std::string ToString() const;
diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc
index 2532a8656e69f..b628dcb0a4843 100644
--- a/cpp/src/parquet/schema_test.cc
+++ b/cpp/src/parquet/schema_test.cc
@@ -110,6 +110,42 @@ TEST(TestColumnPath, TestAttrs) {
   ASSERT_EQ(extended->ToDotString(), "toplevel.leaf.anotherlevel");
 }
 
+TEST(TestColumnPath, FromNode) {
+  auto key = PrimitiveNode::Make("key", Repetition::REQUIRED, Type::INT32,
+                                 ConvertedType::INT_32);
+  auto key_value =
+      GroupNode::Make("key_value", Repetition::REQUIRED, {key}, ConvertedType::NONE);
+  auto map = GroupNode::Make("a", Repetition::REQUIRED, {key_value}, ConvertedType::MAP);
+
+  auto element = PrimitiveNode::Make("element", Repetition::REPEATED, Type::INT32,
+                                     ConvertedType::INT_32);
+  auto inner_list =
+      GroupNode::Make("list", Repetition::REQUIRED, {element}, ConvertedType::NONE);
+  auto list =
+      GroupNode::Make("b", Repetition::REQUIRED, {inner_list}, ConvertedType::LIST);
+
+  auto f1 =
+      PrimitiveNode::Make("f1", Repetition::OPTIONAL, Type::INT32, ConvertedType::INT_32);
+  auto f2 = PrimitiveNode::Make("f2", Repetition::OPTIONAL, Type::BYTE_ARRAY,
+                                ConvertedType::UTF8);
+  auto struct_ =
+      GroupNode::Make("c", Repetition::REQUIRED, {f1, f2}, ConvertedType::NONE);
+
+  auto schema = GroupNode::Make("schema", Repetition::REQUIRED, {map, list, struct_},
+                                ConvertedType::NONE);
+
+  ASSERT_EQ(ColumnPath::FromNode(*key)->ToDotString(), "a.key_value.key");
+  ASSERT_EQ(ColumnPath::FromNode(*key, true)->ToDotString(), "a.key");
+
+  ASSERT_EQ(ColumnPath::FromNode(*element)->ToDotString(), "b.list.element");
+  ASSERT_EQ(ColumnPath::FromNode(*element, true)->ToDotString(), "b.element");
+
+  ASSERT_EQ(ColumnPath::FromNode(*f1)->ToDotString(), "c.f1");
+  ASSERT_EQ(ColumnPath::FromNode(*f1, true)->ToDotString(), "c.f1");
+  ASSERT_EQ(ColumnPath::FromNode(*f2)->ToDotString(), "c.f2");
+  ASSERT_EQ(ColumnPath::FromNode(*f2, true)->ToDotString(), "c.f2");
+}
+
 // ----------------------------------------------------------------------
 // Primitive node
 
diff --git a/cpp/src/parquet/size_statistics.cc b/cpp/src/parquet/size_statistics.cc
index 1ce6c937ad5e6..e45eef3f03383 100644
--- a/cpp/src/parquet/size_statistics.cc
+++ b/cpp/src/parquet/size_statistics.cc
@@ -18,9 +18,11 @@
 #include "parquet/size_statistics.h"
 
 #include <algorithm>
+#include <array>
 #include <numeric>
 #include <ostream>
 #include <string_view>
+#include <vector>
 
 #include "arrow/util/logging.h"
 #include "parquet/exception.h"
diff --git a/cpp/src/skyhook/client/file_skyhook.cc b/cpp/src/skyhook/client/file_skyhook.cc
index 5af1b2b6a575d..18f0b0eac2c59 100644
--- a/cpp/src/skyhook/client/file_skyhook.cc
+++ b/cpp/src/skyhook/client/file_skyhook.cc
@@ -23,6 +23,8 @@
 #include "arrow/dataset/file_base.h"
 #include "arrow/dataset/file_ipc.h"
 #include "arrow/dataset/file_parquet.h"
+#include "arrow/record_batch.h"
+#include "arrow/util/async_generator.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/compression.h"
 
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 29f0cc7d1b418..6c678efc5f4e1 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -117,8 +117,8 @@ ARROW_XSIMD_BUILD_VERSION=13.0.0
 ARROW_XSIMD_BUILD_SHA256_CHECKSUM=8bdbbad0c3e7afa38d88d0d484d70a1671a1d8aefff03f4223ab2eb6a41110a3
 ARROW_ZLIB_BUILD_VERSION=1.3.1
 ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23
-ARROW_ZSTD_BUILD_VERSION=1.5.6
-ARROW_ZSTD_BUILD_SHA256_CHECKSUM=8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1
+ARROW_ZSTD_BUILD_VERSION=1.5.7
+ARROW_ZSTD_BUILD_SHA256_CHECKSUM=eb33e51f49a15e023950cd7825ca74a4a2b43db8354825ac24fc1b7ee09e6fa3
 
 
 # The first field is the name of the environment variable expected by cmake.
diff --git a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
index e8c387a1f3946..940bf62a3b5a0 100644
--- a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
+++ b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
@@ -13,7 +13,7 @@
 
   <ItemGroup>
     <PackageReference Include="K4os.Compression.LZ4.Streams" Version="1.3.8" />
-    <PackageReference Include="ZstdSharp.Port" Version="0.8.4" />
+    <PackageReference Include="ZstdSharp.Port" Version="0.8.5" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index 1e8911e1fa49f..8c721af069b2c 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.69.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.70.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index de09925147008..0c8ae2a4c6eaf 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -7,7 +7,7 @@
   <ItemGroup>
     <PackageReference Include="Google.Protobuf" Version="3.29.3" />
     <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.69.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.70.0" PrivateAssets="All" />
     <PackageReference Include="System.Memory" Version="4.6.0" />
   </ItemGroup>
 
diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index 10660f40b4c3e..c11b7e532dfbb 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -33,6 +33,11 @@ public FlightClient(ChannelBase grpcChannel)
             _client = new FlightService.FlightServiceClient(grpcChannel);
         }
 
+        public FlightClient(CallInvoker callInvoker)
+        {
+            _client = new FlightService.FlightServiceClient(callInvoker);
+        }
+
         public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria = null, Metadata headers = null)
         {
             return ListFlights(criteria, headers, null, CancellationToken.None);
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 32c6304a1854b..b963b7ebd2e90 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -7,9 +7,9 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />
     <PackageReference Include="xunit" Version="2.9.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.2" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index dd55ca912deca..945c4d1e384ca 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -6,9 +6,9 @@
     </PropertyGroup>
 
     <ItemGroup>
-      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
+      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />
       <PackageReference Include="xunit" Version="2.9.3" />
-      <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1" />
+      <PackageReference Include="xunit.runner.visualstudio" Version="3.0.2" />
       <PackageReference Include="coverlet.collector" Version="6.0.4" />
     </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index eca1f70760cfe..6baf70c1955aa 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -6,9 +6,10 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
+    <PackageReference Include="Grpc.Net.ClientFactory" Version="2.67.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />
     <PackageReference Include="xunit" Version="2.9.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.2" />
     <PackageReference Include="coverlet.collector" Version="6.0.4" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
index 241b3c006a003..acdd824590446 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
+++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
@@ -24,6 +24,7 @@
 using Google.Protobuf;
 using Grpc.Core;
 using Grpc.Core.Utils;
+using Microsoft.Extensions.DependencyInjection;
 using Xunit;
 
 namespace Apache.Arrow.Flight.Tests
@@ -546,7 +547,30 @@ public async Task EnsureCallRaisesRequestCancelled()
             var handshakeStreamingCall = _flightClient.Handshake(null, null, cts.Token);
             exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
             Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+        }
+
+        [Fact]
+        public async Task TestIntegrationWithGrpcNetClientFactory()
+        {
+            IServiceCollection services = new ServiceCollection();
 
+            services.AddGrpcClient<FlightClient>(grpc => grpc.Address = new Uri(_testWebFactory.GetAddress()));
+
+            IServiceProvider provider = services.BuildServiceProvider();
+
+            // Test that an instance of the FlightClient can be resolved whilst using the Grpc.Net.ClientFactory library.
+            FlightClient flightClient = provider.GetRequiredService<FlightClient>();
+
+            // Test that the resolved client is functional.
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test");
+            var expectedBatch = CreateTestBatch(0, 100);
+            var expectedSchema = expectedBatch.Schema;
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch));
+
+            var actualSchema = await flightClient.GetSchema(flightDescriptor);
+
+            SchemaComparer.Compare(expectedSchema, actualSchema);
         }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 7055bb5ab1bc6..6a0f51df98c84 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -22,14 +22,14 @@
     </PackageReference>
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFramework)' != 'net462'">
-    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.1">
+    <PackageReference Include="xunit.runner.visualstudio" Version="3.0.2">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.13.0" />
     <PackageReference Include="xunit" Version="2.9.3" />
     <PackageReference Include="xunit.skippablefact" Version="1.5.23" />
     <PackageReference Include="pythonnet" Version="3.0.5" />
diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index 2b87b9386877f..c361ed8711ec1 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -376,8 +376,10 @@ def _clone_arrow_and_crossbow(dest, crossbow_repo, arrow_repo_url, pr_number):
               help='Set target version explicitly.')
 @click.option('--wait', default=60,
               help='Wait the specified seconds before generating a report.')
+@click.option('--prefix', default='actions',
+              help='Prefix for job IDs.')
 @click.pass_obj
-def submit(obj, tasks, groups, params, arrow_version, wait):
+def submit(obj, tasks, groups, params, arrow_version, wait, prefix):
     """
     Submit crossbow testing tasks.
 
@@ -411,7 +413,7 @@ def submit(obj, tasks, groups, params, arrow_version, wait):
                               groups=groups, params=params)
 
         # add the job to the crossbow queue and push to the remote repository
-        queue.put(job, prefix="actions", increment_job_id=False)
+        queue.put(job, prefix=prefix, increment_job_id=False)
         queue.push()
 
         # render the response comment's content
diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index 2081d7ab9d9b1..f93097586e270 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -136,8 +136,8 @@ def test_version_pre_tag
       {
         path: "c_glib/meson.build",
         hunks: [
-          ["-version = '#{@snapshot_version}'",
-           "+version = '#{@release_version}'"],
+          ["-    version: '#{@snapshot_version}',",
+           "+    version: '#{@release_version}',"],
         ],
       },
       {
@@ -161,6 +161,13 @@ def test_version_pre_tag
            "+set(ARROW_VERSION \"#{@release_version}\")"],
         ],
       },
+      {
+        path: "cpp/meson.build",
+        hunks: [
+          ["-    version: '#{@snapshot_version}',",
+           "+    version: '#{@release_version}',"],
+        ],
+      },
       {
         path: "cpp/vcpkg.json",
         hunks: [
@@ -264,7 +271,8 @@ def test_version_pre_tag
               "-<p><a href=\"../r/\">#{@previous_r_version} (release)</a></p>",
               "+<body><p><a href=\"../dev/r/\">#{@release_version}.9000 (dev)</a></p>",
               "+<p><a href=\"../r/\">#{@release_version} (release)</a></p>",
-              "+<p><a href=\"../#{@previous_compatible_version}/r/\">#{@previous_r_version}</a></p>",
+              "+<p><a href=\"../#{@previous_compatible_version}/r/\">" +
+              "#{@previous_r_version}</a></p>",
             ]
           ],
         },
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index 0c0792334825e..d5fb9be9cdbde 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1086,7 +1086,6 @@ def available_apt_targets
     [
       ["debian", "bookworm", "main"],
       ["debian", "trixie", "main"],
-      ["ubuntu", "focal", "main"],
       ["ubuntu", "jammy", "main"],
       ["ubuntu", "noble", "main"],
     ]
@@ -2105,8 +2104,6 @@ def apt_test_targets_default
       # "debian-bookworm-arm64",
       "debian-trixie",
       # "debian-trixie-arm64",
-      "ubuntu-focal",
-      # "ubuntu-focal-arm64",
       "ubuntu-jammy",
       # "ubuntu-jammy-arm64",
       "ubuntu-noble",
diff --git a/dev/release/post-03-binary.sh b/dev/release/post-03-binary.sh
index 980f6e3c19a6a..b2f3c08870172 100755
--- a/dev/release/post-03-binary.sh
+++ b/dev/release/post-03-binary.sh
@@ -50,7 +50,6 @@ fi
 : ${DEPLOY_CENTOS:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_DEBIAN:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_DOCS:=${DEPLOY_DEFAULT}}
-: ${DEPLOY_NUGET:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_PYTHON:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_R:=${DEPLOY_DEFAULT}}
 : ${DEPLOY_UBUNTU:=${DEPLOY_DEFAULT}}
@@ -77,9 +76,6 @@ fi
 if [ ${DEPLOY_DOCS} -gt 0 ]; then
   rake_tasks+=(docs:release)
 fi
-if [ ${DEPLOY_NUGET} -gt 0 ]; then
-  rake_tasks+=(nuget:release)
-fi
 if [ ${DEPLOY_PYTHON} -gt 0 ]; then
   rake_tasks+=(python:release)
 fi
diff --git a/dev/release/post-12-bump-versions-test.rb b/dev/release/post-12-bump-versions-test.rb
index 229dc7e1e1b08..b7c1509b6b6e9 100644
--- a/dev/release/post-12-bump-versions-test.rb
+++ b/dev/release/post-12-bump-versions-test.rb
@@ -166,8 +166,8 @@ def test_version_post_tag
         {
           path: "c_glib/meson.build",
           hunks: [
-            ["-version = '#{@snapshot_version}'",
-             "+version = '#{@next_snapshot_version}'"],
+            ["-    version: '#{@snapshot_version}',",
+             "+    version: '#{@next_snapshot_version}',"],
           ],
         },
         {
@@ -191,6 +191,13 @@ def test_version_post_tag
              "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"],
           ],
         },
+        {
+          path: "cpp/meson.build",
+          hunks: [
+            ["-    version: '#{@snapshot_version}',",
+             "+    version: '#{@next_snapshot_version}',"],
+          ],
+        },
         {
           path: "cpp/vcpkg.json",
           hunks: [
@@ -302,7 +309,8 @@ def test_version_post_tag
                 "-<p><a href=\"../r/\">#{@previous_r_version} (release)</a></p>",
                 "+<body><p><a href=\"../dev/r/\">#{@release_version}.9000 (dev)</a></p>",
                 "+<p><a href=\"../r/\">#{@release_version} (release)</a></p>",
-                "+<p><a href=\"../#{@previous_compatible_version}/r/\">#{@previous_r_version}</a></p>",
+                "+<p><a href=\"../#{@previous_compatible_version}/r/\">" +
+                "#{@previous_r_version}</a></p>",
               ],
             ],
           },
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 5057667eb94d1..0c827421fe87d 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -130,6 +130,7 @@ csharp/test/Apache.Arrow.Tests/app.config
 *.svg
 *.devhelp2
 *.scss
+r/.lintr
 r/R/arrowExports.R
 r/src/arrowExports.cpp
 r/DESCRIPTION
diff --git a/dev/release/setup-ubuntu.sh b/dev/release/setup-ubuntu.sh
index 997b3e9c19ece..6951226bd7658 100755
--- a/dev/release/setup-ubuntu.sh
+++ b/dev/release/setup-ubuntu.sh
@@ -26,11 +26,6 @@ version=$(. /etc/os-release && echo ${VERSION_ID})
 
 apt-get update -y -q
 
-if [ ${version} \> "20.04" ]; then
-  apt-get install -y -q --no-install-recommends \
-    libxsimd-dev
-fi
-
 if [ ${version} \> "22.04" ]; then
   # Some tests rely on legacy timezone aliases such as "US/Pacific"
   apt-get install -y -q --no-install-recommends \
@@ -50,6 +45,7 @@ apt-get install -y -q --no-install-recommends \
   libglib2.0-dev \
   libsqlite3-dev \
   libssl-dev \
+  libxsimd-dev \
   llvm-dev \
   ninja-build \
   nlohmann-json3-dev \
diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh
index 36b34e4353943..76a313488509c 100644
--- a/dev/release/utils-prepare.sh
+++ b/dev/release/utils-prepare.sh
@@ -38,7 +38,7 @@ update_versions() {
 
   pushd "${ARROW_DIR}/c_glib"
   sed -i.bak -E -e \
-    "s/^version = '.+'/version = '${version}'/" \
+    "s/^    version: '.+'/    version: '${version}'/" \
     meson.build
   rm -f meson.build.bak
   git add meson.build
@@ -77,6 +77,12 @@ update_versions() {
   rm -f CMakeLists.txt.bak
   git add CMakeLists.txt
 
+  sed -i.bak -E -e \
+    "s/^    version: '.+'/    version: '${version}'/" \
+    meson.build
+  rm -f meson.build.bak
+  git add meson.build
+
   sed -i.bak -E -e \
     "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \
     vcpkg.json
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 52977bc4c0149..ba7fde4dc4422 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -186,7 +186,6 @@ test_apt() {
   if [ "$(arch)" = "x86_64" ]; then
     for target in "debian:bookworm" \
                   "debian:trixie" \
-                  "ubuntu:focal" \
                   "ubuntu:jammy" \
                   "ubuntu:noble"; do \
       if ! docker run \
@@ -207,7 +206,6 @@ test_apt() {
   if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
     for target in "arm64v8/debian:bookworm" \
                   "arm64v8/debian:trixie" \
-                  "arm64v8/ubuntu:focal" \
                   "arm64v8/ubuntu:jammy" \
                   "arm64v8/ubuntu:noble"; do \
       if ! docker run \
diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
index 82d8464abed9d..f403d5db87aa6 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
@@ -41,45 +41,27 @@ class ApacheArrowGlib < Formula
   depends_on "gobject-introspection" => :build
   depends_on "meson" => :build
   depends_on "ninja" => :build
-  depends_on "pkg-config" => :build
-  depends_on "vala" => :build
+  depends_on "pkgconf" => [:build, :test]
   depends_on "apache-arrow"
   depends_on "glib"
 
-  fails_with gcc: "5"
-
   def install
-    system "meson", "setup", "build", "c_glib", *std_meson_args, "-Dvapi=true"
+    system "meson", "setup", "build", "c_glib", *std_meson_args
     system "meson", "compile", "-C", "build", "--verbose"
     system "meson", "install", "-C", "build"
   end
 
   test do
-    (testpath/"test.c").write <<~SOURCE
+    (testpath/"test.c").write <<~C
       #include <arrow-glib/arrow-glib.h>
       int main(void) {
         GArrowNullArray *array = garrow_null_array_new(10);
         g_object_unref(array);
         return 0;
       }
-    SOURCE
-    apache_arrow = Formula["apache-arrow"]
-    glib = Formula["glib"]
-    flags = %W[
-      -I#{include}
-      -I#{apache_arrow.opt_include}
-      -I#{glib.opt_include}/glib-2.0
-      -I#{glib.opt_lib}/glib-2.0/include
-      -L#{lib}
-      -L#{apache_arrow.opt_lib}
-      -L#{glib.opt_lib}
-      -DNDEBUG
-      -larrow-glib
-      -larrow
-      -lgio-2.0
-      -lgobject-2.0
-      -lglib-2.0
-    ]
+    C
+
+    flags = shell_output("pkgconf --cflags --libs arrow-glib gobject-2.0").chomp.split
     system ENV.cc, "test.c", "-o", "test", *flags
     system "./test"
   end
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb
index caf82b8db89d4..6e1505859ae6a 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -36,35 +36,42 @@ class ApacheArrow < Formula
 
   depends_on "boost" => :build
   depends_on "cmake" => :build
-  depends_on "llvm@15" => :build
+  depends_on "gflags" => :build
+  depends_on "rapidjson" => :build
+  depends_on "xsimd" => :build
+  depends_on "abseil"
+  depends_on "aws-crt-cpp"
   depends_on "aws-sdk-cpp"
   depends_on "brotli"
-  depends_on "bzip2"
-  depends_on "glog"
   depends_on "grpc"
+  depends_on "llvm"
   depends_on "lz4"
-  depends_on "mimalloc"
   depends_on "openssl@3"
   depends_on "protobuf"
-  depends_on "rapidjson"
   depends_on "re2"
   depends_on "snappy"
   depends_on "thrift"
   depends_on "utf8proc"
   depends_on "zstd"
+
   uses_from_macos "python" => :build
+  uses_from_macos "bzip2"
+  uses_from_macos "zlib"
 
-  fails_with gcc: "5"
+  # Issue ref: https://github.com/protocolbuffers/protobuf/issues/19447
+  fails_with :gcc do
+    version "12"
+    cause "Protobuf 29+ generated code with visibility and deprecated attributes needs GCC 13+"
+  end
 
   def install
-    # This isn't for https://github.com/Homebrew/homebrew-core/issues/76537 .
-    # This may improve performance.
-    ENV.runtime_cpu_detection if Hardware::CPU.intel?
+    ENV.llvm_clang if OS.linux?
 
-    # link against system libc++ instead of llvm provided libc++
-    ENV.remove "HOMEBREW_LIBRARY_PATHS", Formula["llvm"].opt_lib
+    # We set `ARROW_ORC=OFF` because it fails to build with Protobuf 27.0
     args = %W[
       -DCMAKE_INSTALL_RPATH=#{rpath}
+      -DLLVM_ROOT=#{Formula["llvm"].opt_prefix}
+      -DARROW_DEPENDENCY_SOURCE=SYSTEM
       -DARROW_ACERO=ON
       -DARROW_COMPUTE=ON
       -DARROW_CSV=ON
@@ -73,29 +80,31 @@ def install
       -DARROW_FLIGHT=ON
       -DARROW_FLIGHT_SQL=ON
       -DARROW_GANDIVA=ON
-      -DARROW_GCS=ON
       -DARROW_HDFS=ON
-      -DARROW_INSTALL_NAME_RPATH=OFF
       -DARROW_JSON=ON
-      -DARROW_MIMALLOC=ON
-      -DARROW_ORC=ON
+      -DARROW_ORC=OFF
       -DARROW_PARQUET=ON
       -DARROW_PROTOBUF_USE_SHARED=ON
       -DARROW_S3=ON
-      -DARROW_WITH_BROTLI=ON
       -DARROW_WITH_BZ2=ON
+      -DARROW_WITH_ZLIB=ON
+      -DARROW_WITH_ZSTD=ON
       -DARROW_WITH_LZ4=ON
       -DARROW_WITH_SNAPPY=ON
+      -DARROW_WITH_BROTLI=ON
       -DARROW_WITH_UTF8PROC=ON
-      -DARROW_WITH_ZLIB=ON
-      -DARROW_WITH_ZSTD=ON
+      -DARROW_INSTALL_NAME_RPATH=OFF
       -DPARQUET_BUILD_EXECUTABLES=ON
     ]
-    # Disable runtime SIMD dispatch. It may cause "illegal opcode"
-    # error on Intel Mac because of one-definition-rule violation.
-    #
-    # https://github.com/apache/arrow/issues/36685
-    args << "-DARROW_RUNTIME_SIMD_LEVEL=NONE" if OS.mac? and Hardware::CPU.intel?
+    args << "-DARROW_MIMALLOC=ON" unless Hardware::CPU.arm?
+    # Reduce overlinking. Can remove on Linux if GCC 11 issue is fixed
+    args << "-DCMAKE_SHARED_LINKER_FLAGS=-Wl,#{OS.mac? ? "-dead_strip_dylibs" : "--as-needed"}"
+    # ARROW_SIMD_LEVEL sets the minimum required SIMD. Since this defaults to
+    # SSE4.2 on x86_64, we need to reduce level to match oldest supported CPU.
+    # Ref: https://arrow.apache.org/docs/cpp/env_vars.html#envvar-ARROW_USER_SIMD_LEVEL
+    if build.bottle? && Hardware::CPU.intel? && (!OS.mac? || !MacOS.version.requires_sse42?)
+      args << "-DARROW_SIMD_LEVEL=NONE"
+    end
 
     system "cmake", "-S", "cpp", "-B", "build", *args, *std_cmake_args
     system "cmake", "--build", "build"
@@ -103,13 +112,15 @@ def install
   end
 
   test do
-    (testpath/"test.cpp").write <<~EOS
+    ENV.method(DevelopmentTools.default_compiler).call if OS.linux?
+
+    (testpath/"test.cpp").write <<~CPP
       #include "arrow/api.h"
       int main(void) {
         arrow::int64();
         return 0;
       }
-    EOS
+    CPP
     system ENV.cxx, "test.cpp", "-std=c++17", "-I#{include}", "-L#{lib}", "-larrow", "-o", "test"
     system "./test"
   end
diff --git a/dev/tasks/linux-packages/README.md b/dev/tasks/linux-packages/README.md
index cafcc04ed0469..f884674643475 100644
--- a/dev/tasks/linux-packages/README.md
+++ b/dev/tasks/linux-packages/README.md
@@ -25,16 +25,89 @@
   * Docker
   * Tools to build tar.gz for Apache Arrow C++ and GLib
 
-## How to build .deb packages
+## How to build .deb packages for all supported platforms
+
+```bash
+cd dev/tasks/linux-packages/apache-arrow
+rake version:update
+rake apt:build
+```
+
+## How to build only specific .deb packages for supported platforms
+
+The following command line shows all supported platforms, bear in mind
+to execute this command from your root `arrow` clone folder:
+
+```bash
+for x in dev/tasks/linux-packages/apache-arrow/apt/{debian,ubuntu}*; do basename $x; done
+```
+
+You can specify target platforms by setting `APT_TARGETS`:
+
+```bash
+cd dev/tasks/linux-packages/apache-arrow
+rake version:update
+rake apt:build APT_TARGETS=debian-bookworm,ubuntu-noble
+```
+
+## How to debug .deb packages build
+
+You can use `apt:build:console` task to debug .deb packages build:
+
+```bash
+cd dev/tasks/linux-packages/apache-arrow
+rake version:update
+rake apt:build:console APT_TARGETS=debian-bookworm
+```
+
+It will show a Bash prompt. You can start .deb build by `/host/build.sh`:
 
 ```console
-% rake version:update
-% rake apt
+host$ rake apt:build:console APT_TARGETS=debian-bookworm
+container$ /host/build.sh
+```
+
+You can keep the Bash session even when the .deb build failed. You can
+debug in the Bash session.
+
+## How to build .rpm packages for all supported platforms
+
+```bash
+cd dev/tasks/linux-packages/apache-arrow
+rake yum:build
+```
+
+## How to build only specific .rpm packages for supported platforms
+
+The following command line shows all supported platforms, bear in mind
+to execute this command from your root `arrow` clone folder:
+
+```bash
+for x in dev/tasks/linux-packages/apache-arrow/yum/{alma,amazon,centos}*; do basename $x; done
 ```
 
-## How to build .rpm packages
+You can specify target platforms by setting `YUM_TARGETS`:
+
+```bash
+cd dev/tasks/linux-packages/apache-arrow
+rake yum:build YUM_TARGETS=almalinux-9,amazon-linux-2023
+```
+
+## How to debug .rpm packages build
+
+You can use `yum:build:console` task to debug .rpm packages build:
+
+```bash
+cd dev/tasks/linux-packages/apache-arrow
+rake yum:build:console YUM_TARGETS=almalinux-9
+```
+
+It will show a Bash prompt. You can start .rpm build by `/host/build.sh`:
 
 ```console
-% rake version:update
-% rake yum
+host$ rake yum:build:console YUM_TARGETS=almalinux-9
+container$ /host/build.sh
 ```
+
+You can keep the Bash session even when the .rpm build failed. You can
+debug in the Bash session.
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
deleted file mode 100644
index dc902d14d3dfd..0000000000000
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-focal/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu:focal
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    debhelper \
-    devscripts \
-    fakeroot \
-    gnupg \
-    lsb-release && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index 3131a6945b9ba..e780a704a05bd 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (19.0.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Bryce Mecum <petridish@gmail.com>  Tue, 11 Feb 2025 22:16:06 -0000
+
 apache-arrow-apt-source (19.0.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index bf54f698ae10a..612f43a19db63 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -102,6 +102,9 @@ else
 fi
 
 %changelog
+* Tue Feb 11 2025 Bryce Mecum <petridish@gmail.com> - 19.0.1-1
+- New upstream release.
+
 * Tue Jan 07 2025 Bryce Mecum <petridish@gmail.com> - 19.0.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile
index cdc6d2cf35b66..5196752031d21 100644
--- a/dev/tasks/linux-packages/apache-arrow/Rakefile
+++ b/dev/tasks/linux-packages/apache-arrow/Rakefile
@@ -96,31 +96,9 @@ class ApacheArrowPackageTask < PackageTask
     control.gsub(/@CUDA_ARCHITECTURE@/, cuda_architecture)
   end
 
-  def apt_prepare_debian_control_grpc(control, target)
-    case target
-    when /\Aubuntu-focal/
-      use_system_grpc = "#"
-    else
-      use_system_grpc = ""
-    end
-    control.gsub(/@USE_SYSTEM_GRPC@/, use_system_grpc)
-  end
-
-  def apt_prepare_debian_control_protobuf(control, target)
-    case target
-    when /\Aubuntu-focal/
-      use_system_protobuf = "#"
-    else
-      use_system_protobuf = ""
-    end
-    control.gsub(/@USE_SYSTEM_PROTOBUF@/, use_system_protobuf)
-  end
-
   def apt_prepare_debian_control(control_in, target)
     control = control_in.dup
     control = apt_prepare_debian_control_cuda_architecture(control, target)
-    control = apt_prepare_debian_control_grpc(control, target)
-    control = apt_prepare_debian_control_protobuf(control, target)
     control
   end
 end
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
deleted file mode 100644
index fe783638b6344..0000000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ /dev/null
@@ -1,81 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG FROM=ubuntu:focal
-FROM ${FROM}
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    clang \
-    cmake \
-    debhelper \
-    devscripts \
-    git \
-    libboost-filesystem-dev \
-    libboost-system-dev \
-    libbrotli-dev \
-    libbz2-dev \
-    libc-ares-dev \
-    libcurl4-openssl-dev \
-    libgirepository1.0-dev \
-    libglib2.0-doc \
-    libgmock-dev \
-    libgoogle-glog-dev \
-    libgtest-dev \
-    liblz4-dev \
-    libprotobuf-dev \
-    libprotoc-dev \
-    libre2-dev \
-    libsnappy-dev \
-    libssl-dev \
-    libthrift-dev \
-    libutf8proc-dev \
-    libxxhash-dev \
-    libzstd-dev \
-    llvm-dev \
-    lsb-release \
-    meson \
-    ninja-build \
-    nlohmann-json3-dev \
-    pkg-config \
-    protobuf-compiler \
-    python3-dev \
-    python3-pip \
-    python3-setuptools \
-    rapidjson-dev \
-    tzdata \
-    valac \
-    zlib1g-dev && \
-  if apt list | grep '^nvidia-cuda-toolkit/'; then \
-    apt install -y -V ${quiet} nvidia-cuda-toolkit; \
-  fi && \
-  pip3 install gi-docgen && \
-  ln -fs /usr/local/bin/gi-docgen /usr/bin && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
index 1d9065d6b2e61..64db49c358942 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
@@ -34,6 +34,7 @@ RUN \
     build-essential \
     clang \
     cmake \
+    curl \
     debhelper \
     devscripts \
     gi-docgen \
@@ -82,3 +83,12 @@ RUN \
   fi && \
   apt clean && \
   rm -rf /var/lib/apt/lists/*
+
+# We install CMake via apt and create a symbolic link to avoid having to:
+# Temporarily remove the build-depends check on control.in
+# Create the link on debian rules as --buildsystem=cmake expects cmake on /usr/bin
+ARG cmake=3.25.0
+RUN curl -L \
+    "https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-linux-$(uname -m).tar.gz" | \
+    tar -xzf - --directory /usr/local --strip-components=1 && \
+    ln -fs /usr/local/bin/cmake /usr/bin/cmake
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index e2769e4bb6bf8..6c41ddcecb3bb 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (19.0.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Bryce Mecum <petridish@gmail.com>  Tue, 11 Feb 2025 22:16:06 -0000
+
 apache-arrow (19.0.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index 258880265ea3c..87d78d843a25d 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -5,8 +5,7 @@ Maintainer: Apache Arrow Developers <dev@arrow.apache.org>
 Build-Depends:
   cmake,
   debhelper (>= 12),
-# TODO: Enable this after we drop support for Ubuntu 20.04.
-#  gi-docgen,
+  gi-docgen,
   git,
   gobject-introspection,
   libboost-filesystem-dev,
@@ -17,11 +16,11 @@ Build-Depends:
   libcurl4-openssl-dev,
   libgirepository1.0-dev,
   libgoogle-glog-dev,
-@USE_SYSTEM_GRPC@  libgrpc++-dev,
+  libgrpc++-dev,
   libgtest-dev,
   liblz4-dev,
-@USE_SYSTEM_PROTOBUF@  libprotobuf-dev,
-@USE_SYSTEM_PROTOBUF@  libprotoc-dev,
+  libprotobuf-dev,
+  libprotoc-dev,
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
@@ -34,10 +33,10 @@ Build-Depends:
   nlohmann-json-dev | nlohmann-json3-dev,
   nvidia-cuda-toolkit [!arm64],
   pkg-config,
-@USE_SYSTEM_PROTOBUF@  protobuf-compiler,
-@USE_SYSTEM_GRPC@  protobuf-compiler-grpc,
-  valac,
+  protobuf-compiler,
+  protobuf-compiler-grpc,
   tzdata,
+  valac,
   zlib1g-dev
 Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.8
@@ -140,15 +139,15 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
+  libabsl-dev,
   libarrow2000 (= ${binary:Version}),
-@USE_SYSTEM_GRPC@  libabsl-dev,
   libbrotli-dev,
   libbz2-dev,
+  libc-ares-dev,
   libcurl4-openssl-dev,
   liblz4-dev,
-  libc-ares-dev,
-@USE_SYSTEM_PROTOBUF@  libprotobuf-dev,
-@USE_SYSTEM_PROTOBUF@  libprotoc-dev,
+  libprotobuf-dev,
+  libprotoc-dev,
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
@@ -156,7 +155,7 @@ Depends:
   libxxhash-dev,
   libzstd-dev,
   nlohmann-json-dev | nlohmann-json3-dev,
-@USE_SYSTEM_GRPC@  protobuf-compiler-grpc,
+  protobuf-compiler-grpc,
   zlib1g-dev
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -208,7 +207,7 @@ Depends:
   libarrow-dev (= ${binary:Version}),
   libarrow-flight2000 (= ${binary:Version}),
   libc-ares-dev,
-@USE_SYSTEM_GRPC@  libgrpc++-dev
+  libgrpc++-dev
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight RPC system.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile
index a19641f8f961d..a5664dbbbe9e4 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile
@@ -60,3 +60,9 @@ RUN \
     which \
     zlib-devel && \
   dnf clean ${quiet} all
+
+ARG cmake=3.25.0
+RUN curl -L \
+    "https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-linux-$(uname -m).tar.gz" | \
+    tar -xzf - --directory /usr/local --strip-components=1 && \
+    ln -fs /usr/local/bin/cmake /usr/bin/cmake
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 3766073bef590..b2cff1a26b4c2 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -894,6 +894,9 @@ Documentation for Apache Parquet GLib.
 %endif
 
 %changelog
+* Tue Feb 11 2025 Bryce Mecum <petridish@gmail.com> - 19.0.1-1
+- New upstream release.
+
 * Tue Jan 07 2025 Bryce Mecum <petridish@gmail.com> - 19.0.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index b10c40937d32a..56e2f52ff3c95 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -75,6 +75,12 @@ RUN \
     zlib-devel && \
   yum clean ${quiet} all
 
+ARG cmake=3.25.0
+RUN curl -L \
+    "https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-linux-$(uname -m).tar.gz" | \
+    tar -xzf - --directory /usr/local --strip-components=1 && \
+    ln -fs /usr/local/bin/cmake /usr/bin/cmake3
+
 ENV \
   BOOST_INCLUDEDIR=/usr/include/boost169 \
   BOOST_LIBRARYDIR=/usr/lib64/boost169
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index 6bcc397277e3a..4a97ab184e0cd 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -271,8 +271,6 @@ def apt_targets_default
       # "debian-bookworm-arm64",
       "debian-trixie",
       # "debian-trixie-arm64",
-      "ubuntu-focal",
-      # "ubuntu-focal-arm64",
       "ubuntu-jammy",
       # "ubuntu-jammy-arm64",
       "ubuntu-noble",
diff --git a/dev/tasks/matlab/github.yml b/dev/tasks/matlab/github.yml
index 8367ae58487ec..e727ceb509752 100644
--- a/dev/tasks/matlab/github.yml
+++ b/dev/tasks/matlab/github.yml
@@ -22,8 +22,8 @@
 jobs:
 
   ubuntu:
-    name: AMD64 Ubuntu 20.04 MATLAB
-    runs-on: ubuntu-20.04
+    name: AMD64 Ubuntu 22.04 MATLAB
+    runs-on: ubuntu-22.04
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       - name: Install ninja-build
@@ -37,13 +37,13 @@ jobs:
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: arrow/ci/scripts/matlab_build.sh $(pwd)/arrow
       - name: Change shared library dependency name
-        # MATLAB's programmatic packaging interface does not properly 
+        # MATLAB's programmatic packaging interface does not properly
         # include symbolic link files in the package MLTBX - this is a
-        # bug. As a temporary workaround, change the expected name of the 
+        # bug. As a temporary workaround, change the expected name of the
         # Arrow C++ library which libarrowproxy.so depends on. For example,
         # change libarrow.so.1500 to libarrow.so.1500.0.0.
         run: |
-          pushd arrow/matlab/install/arrow_matlab/+libmexclass/+proxy/ 
+          pushd arrow/matlab/install/arrow_matlab/+libmexclass/+proxy/
           SYMLINK_ARROW_LIB="$(find . -name 'libarrow.so.*' -type l | xargs basename)"
           REGULAR_ARROW_LIB="$(echo libarrow.so.*.*)"
           echo "SYMLINK_ARROW_LIB = ${SYMLINK_ARROW_LIB}"
@@ -139,7 +139,7 @@ jobs:
           tar -xzvf matlab-arrow-macos-x64.tar.gz
           tar -xzvf matlab-arrow-macos-arm64.tar.gz
           tar -xzvf matlab-arrow-windows.tar.gz
-      - name: Copy LICENSE.txt and NOTICE.txt for packaging 
+      - name: Copy LICENSE.txt and NOTICE.txt for packaging
         run: |
           cp arrow/LICENSE.txt arrow/matlab/install/arrow_matlab/LICENSE.txt
           cp arrow/NOTICE.txt arrow/matlab/install/arrow_matlab/NOTICE.txt
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index b32b1805b6dfb..0aa8bf9b23b6f 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -39,11 +39,7 @@ jobs:
       {% endif %}
       PYTHON: "{{ python_version }}"
       PYTHON_ABI_TAG: "{{ python_abi_tag }}"
-      {% if python_version == "3.13" %}
-      PYTHON_IMAGE_TAG: "3.13-rc"
-      {% else %}
       PYTHON_IMAGE_TAG: "{{ python_version }}"
-      {% endif %}
 
     steps:
       {{ macros.github_checkout_arrow()|indent }}
@@ -103,21 +99,6 @@ jobs:
             -e TEST_WHEELS=1 \
             almalinux-verify-rc
 
-      - name: Test wheel on Ubuntu 20.04
-        shell: bash
-        if: |
-          '{{ python_version }}' == '3.9'
-        env:
-          UBUNTU: "20.04"
-        run: |
-          archery docker run \
-            -e TEST_DEFAULT=0 \
-            -e TEST_PYARROW_VERSION={{ arrow.no_rc_version }} \
-            -e TEST_PYTHON_VERSIONS={{ python_version }} \
-            -e TEST_WHEEL_PLATFORM_TAGS={{ wheel_platform_tag }} \
-            -e TEST_WHEELS=1 \
-            ubuntu-verify-rc
-
       - name: Test wheel on Ubuntu 22.04
         shell: bash
         if: |
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index 031bad94227e8..b353d7f06f2f4 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -63,7 +63,7 @@ jobs:
       - name: Install CMake 3.29.0
         shell: bash
         run: |
-          arrow/ci/scripts/install_cmake.sh $(arch) macos 3.29.0 ${PWD}/local
+          arrow/ci/scripts/install_cmake.sh 3.29.0 ${PWD}/local
           echo "${PWD}/local/bin" >> $GITHUB_PATH
 
       - name: Retrieve VCPKG version from arrow/.env
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index 2ac942d5bdb4d..063ec1d632ccf 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -22,7 +22,7 @@
 jobs:
   build:
     name: "Build wheel for Windows"
-    runs-on: windows-2019
+    runs-on: windows-2022
     env:
       # archery uses this environment variable
       PYTHON: "{{ python_version }}"
@@ -30,7 +30,7 @@ jobs:
       # this is a private repository at the moment (mostly because of licensing
       # consideration of windows images with visual studio), but anyone can
       # recreate the image by manually building it via:
-      # `archery build python-wheel-windows-vs2019`
+      # `archery build python-wheel-windows-vs2022`
       # note that we don't run docker build since there wouldn't be a cache hit
       # and rebuilding the dependencies takes a fair amount of time
       REPO: ghcr.io/ursacomputing/arrow
@@ -62,17 +62,17 @@ jobs:
         run: |
           cd arrow
           @rem We want to use only
-          @rem   archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
+          @rem   archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2022
           @rem but it doesn't use pulled caches.
           @rem It always build an image from scratch.
           @rem We can remove this workaround once we find a way to use
           @rem pulled caches when build an image.
           echo on
-          archery docker pull --no-ignore-pull-failures %TEST_IMAGE_PREFIX%-wheel-windows-vs2019
+          archery docker pull --no-ignore-pull-failures %TEST_IMAGE_PREFIX%-wheel-windows-vs2022
           if errorlevel 1 (
-            archery docker build --no-pull %TEST_IMAGE_PREFIX%-wheel-windows-vs2019 || exit /B 1
+            archery docker build --no-pull %TEST_IMAGE_PREFIX%-wheel-windows-vs2022 || exit /B 1
           )
-          archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} %TEST_IMAGE_PREFIX%-wheel-windows-vs2019
+          archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} %TEST_IMAGE_PREFIX%-wheel-windows-vs2022
 
       - uses: actions/upload-artifact@v4
         with:
@@ -98,6 +98,6 @@ jobs:
         shell: cmd
         run: |
           cd arrow
-          archery docker push %TEST_IMAGE_PREFIX%-wheel-windows-vs2019
+          archery docker push %TEST_IMAGE_PREFIX%-wheel-windows-vs2022
           archery docker push %TEST_IMAGE_PREFIX%-wheel-windows-test
       {% endif %}
diff --git a/dev/tasks/r/github.linux.arrow.version.back.compat.yml b/dev/tasks/r/github.linux.arrow.version.back.compat.yml
index 90b2554eb8cd7..ba2b3b6b41b9f 100644
--- a/dev/tasks/r/github.linux.arrow.version.back.compat.yml
+++ b/dev/tasks/r/github.linux.arrow.version.back.compat.yml
@@ -22,12 +22,12 @@
 jobs:
   write-files:
     name: "Write files"
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       fail-fast: false
     env:
       ARROW_R_DEV: "TRUE"
-      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
 
@@ -66,13 +66,30 @@ jobs:
   read-files:
     name: "Read files with Arrow {{ '${{ matrix.config.old_arrow_version }}' }}"
     needs: [write-files]
-    runs-on: ubuntu-20.04
+    # condition && true-case || false-case
+    # ==
+    # condition ? true-case : false-case
+    #
+    # We use Ubuntu 20.04 for 3.0.0 or earlier because 3.0.0 or
+    # earlier can't be built on Ubuntu 22.04. We will drop 3.0.0 or
+    # earlier when Ubuntu 20.04 is unavailable on GitHub Actions.
+    runs-on: >-
+      {{ "${{ (startsWith(matrix.config.old_arrow_version, '3.') ||
+               startsWith(matrix.config.old_arrow_version, '2.') ||
+               startsWith(matrix.config.old_arrow_version, '1.')) &&
+                 'ubuntu-20.04' ||
+                 'ubuntu-22.04' }}" }}
     strategy:
       fail-fast: false
       matrix:
         config:
         # We use the R version that was released at the time of the arrow release in order
         # to make sure we can download binaries from RSPM.
+        - { old_arrow_version: '19.0.1', r: '4.4' }
+        - { old_arrow_version: '18.1.0', r: '4.4' }
+        - { old_arrow_version: '17.0.0', r: '4.4' }
+        - { old_arrow_version: '16.1.0', r: '4.4' }
+        - { old_arrow_version: '15.0.1', r: '4.3' }
         - { old_arrow_version: '14.0.2.1', r: '4.3' }
         - { old_arrow_version: '13.0.0.1', r: '4.3' }
         - { old_arrow_version: '12.0.1.1', r: '4.3' }
@@ -89,11 +106,24 @@ jobs:
         - { old_arrow_version: '1.0.1', r: '4.0' }
     env:
       ARROW_R_DEV: "TRUE"
-      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
       OLD_ARROW_VERSION: {{ '${{ matrix.config.old_arrow_version }}' }}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
 
+      - name: Prepare RSPM
+        run: |
+          old_arrow_version_major=$(echo ${OLD_ARROW_VERSION} | cut -d. -f1)
+          if [ ${old_arrow_version_major} -ge 6 ]; then
+            # Binary arrow packages for Ubuntu 22.04 are available only
+            # for 6.0.0 or later.
+            rspm="https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"
+            echo "RSPM=${rspm}" >> $GITHUB_ENV
+          elif [ ${old_arrow_version_major} -le 3 ]; then
+            # We use Ubuntu 20.04 for 3.0.0 or earlier because 3.0.0 or earlier
+            # can't be built on Ubuntu 22.04.
+            rspm="https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+            echo "RSPM=${rspm}" >> $GITHUB_ENV
+          fi
       - uses: r-lib/actions/setup-r@v2
         with:
           r-version: {{ '${{ matrix.config.r }}' }}
diff --git a/dev/tasks/r/github.linux.cran.yml b/dev/tasks/r/github.linux.cran.yml
index 8f56bf771d224..435e37fc53c46 100644
--- a/dev/tasks/r/github.linux.cran.yml
+++ b/dev/tasks/r/github.linux.cran.yml
@@ -21,20 +21,21 @@
 
 jobs:
   as-cran:
-    name: "rhub/{{ MATRIX }}"
+    name: "rhub/{{ '${{ matrix.config.r_image }}' }}"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
-        # See https://hub.docker.com/r/rhub
-        r_image:
-          - ubuntu-gcc12    # ~ r-devel-linux-x86_64-debian-gcc
-          - ubuntu-clang    # ~ r-devel-linux-x86_64-debian-clang
-          - ubuntu-next     # ~ r-patched-linux-x86_64
-          - ubuntu-release  # ~ r-release-linux-x86_64
+        config:
+          # See https://r-hub.github.io/containers/
+          - { r_image: "ubuntu-gcc12" }  # ~ r-devel-linux-x86_64-debian-gcc
+          - { r_image: "ubuntu-clang" }  # ~ r-devel-linux-x86_64-debian-clang
+          - { r_image: "ubuntu-next" }  # ~ r-patched-linux-x86_64
+          - { r_image: "ubuntu-release" }  # ~ r-release-linux-x86_64
+          - { r_image: "clang20", skip_vignettes: true }  # ~ r-devel-linux-x86_64-fedora-clang
     env:
       R_ORG: "rhub"
-      R_IMAGE: {{ MATRIX }}
+      R_IMAGE: {{ '${{ matrix.config.r_image }}' }}
       R_TAG: "latest"
       ARROW_R_DEV: "FALSE"
     steps:
@@ -46,7 +47,7 @@ jobs:
         env:
         {{ macros.github_set_sccache_envvars()|indent(8)}}
         # setting ARROW_SOURCE_HOME='' here ensures that we use the cpp source copied into tools/
-        run: archery docker run -e ARROW_SOURCE_HOME='' r
+        run: archery docker run -e ARROW_SOURCE_HOME='' -e SKIP_VIGNETTES={{ '${{ matrix.config.skip_vignettes }}' }} r
       - name: Dump install logs
         run: cat arrow/r/check/arrow.Rcheck/00install.out
         if: always()
@@ -57,5 +58,5 @@ jobs:
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: test-output-{{ "${{ matrix.r_image }}" }}
+          name: test-output-{{ "${{ matrix.config.r_image }}" }}
           path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml
index 62cdaa02051dd..ac4ad1013739c 100644
--- a/dev/tasks/r/github.linux.offline.build.yml
+++ b/dev/tasks/r/github.linux.offline.build.yml
@@ -22,12 +22,12 @@
 jobs:
   grab-dependencies:
     name: "Download thirdparty dependencies"
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       fail-fast: false
     env:
       ARROW_R_DEV: "TRUE"
-      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
 
@@ -49,12 +49,12 @@ jobs:
   install-offline:
     name: "Install offline"
     needs: [grab-dependencies]
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       fail-fast: false
     env:
       ARROW_R_DEV: TRUE
-      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
 
diff --git a/dev/tasks/r/github.linux.rchk.yml b/dev/tasks/r/github.linux.rchk.yml
index 65c17ae751290..0a2d991595c55 100644
--- a/dev/tasks/r/github.linux.rchk.yml
+++ b/dev/tasks/r/github.linux.rchk.yml
@@ -22,12 +22,12 @@
 jobs:
   as-cran:
     name: "rchk"
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       fail-fast: false
     env:
       ARROW_R_DEV: "FALSE"
-      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"
+      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
 
diff --git a/dev/tasks/r/github.linux.versions.yml b/dev/tasks/r/github.linux.versions.yml
index 092ac97de8ec4..d05b12d7aa241 100644
--- a/dev/tasks/r/github.linux.versions.yml
+++ b/dev/tasks/r/github.linux.versions.yml
@@ -21,7 +21,7 @@
 
 jobs:
   r-versions:
-    name: "rstudio/r-base:{{ MATRIX }}-focal"
+    name: "rstudio/r-base:{{ MATRIX }}-jammy"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -36,7 +36,7 @@ jobs:
     env:
       R_ORG: "rstudio"
       R_IMAGE: "r-base"
-      R_TAG: "{{ MATRIX }}-focal"
+      R_TAG: "{{ MATRIX }}-jammy"
       ARROW_R_DEV: "TRUE"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
diff --git a/dev/tasks/r/github.macos-linux.local.yml b/dev/tasks/r/github.macos-linux.local.yml
index 2db80f254fec5..ada1c7642d75a 100644
--- a/dev/tasks/r/github.macos-linux.local.yml
+++ b/dev/tasks/r/github.macos-linux.local.yml
@@ -32,7 +32,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macOS-latest, ubuntu-20.04]
+        os: [macOS-latest, ubuntu-22.04]
 
     steps:
       {{ macros.github_checkout_arrow()|indent }}
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index 9e8851e6e77d9..4ffe3ef67d639 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -313,8 +313,8 @@ jobs:
           # fedora-clang-devel cannot use binaries bc of libc++ (uncomment to see the error)
           # - {image: "rhub/fedora-clang-devel", libarrow_binary: "TRUE"}
           - {image: "rhub/ubuntu-release"} # currently ubuntu-22.04
-          - {image: "rocker/r-ver:4.0.0"} # ubuntu-20.04
-          - {image: "rstudio/r-base:4.1-focal"}
+          - {image: "rstudio/r-base:4.0-jammy"}
+          - {image: "rstudio/r-base:4.1-jammy"}
           - {image: "rstudio/r-base:4.2-jammy"}
           - {image: "rstudio/r-base:4.3-noble"}
     steps:
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 72d22b552201c..9924d26f4bca6 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -90,7 +90,7 @@ groups:
     - example-*python*
 
   r:
-    - test*-r-*
+    - test-r-*
     - r-binary-packages
     - r-recheck-most
 
@@ -465,7 +465,6 @@ tasks:
 
 {% for target in ["debian-bookworm",
                   "debian-trixie",
-                  "ubuntu-focal",
                   "ubuntu-jammy",
                   "ubuntu-noble"] %}
   {% for architecture in ["amd64", "arm64"] %}
@@ -730,17 +729,14 @@ tasks:
 
 {% for distribution, version in [("conda", "latest"),
                                  ("almalinux", "8"),
-                                 ("ubuntu", "20.04"),
-                                 ("ubuntu", "22.04")] %}
+                                 ("ubuntu", "22.04"),
+                                 ("ubuntu", "24.04")] %}
   {% for target in ["cpp",
                     "csharp",
                     "integration",
                     "js",
                     "python",
                     "ruby"] %}
-    # Skip verification for python and integration on Ubuntu 20.04
-    # GH-44253. Remove once we drop support for 20.04
-    {% if not (target in ["python", "integration"] and version == "20.04") %}
   verify-rc-source-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64:
     ci: github
     template: verify-rc/github.linux.amd64.docker.yml
@@ -749,20 +745,15 @@ tasks:
         {{ distribution.upper() }}: "{{ version }}"
       target: {{ target }}
       distro: {{ distribution }}
-    {% endif %}
   {% endfor %}
 
   {% for target in ["jars", "wheels"] %}
-    # Skip verification for wheels on Ubuntu 20.04
-    # GH-44253. Remove once we drop support for 20.04
-    {% if not (target == "wheels" and version == "20.04") %}
   verify-rc-binaries-{{ target }}-linux-{{ distribution }}-{{ version }}-amd64:
     ci: github
     template: verify-rc/github.linux.amd64.docker.yml
     params:
       target: {{ target }}
       distro: {{ distribution }}
-    {% endif %}
   {% endfor %}
 
 {% endfor %}
@@ -878,28 +869,33 @@ tasks:
       image: {{ image }}
 {% endfor %}
 
+  test-conda-cpp-meson:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      flags: -e ARROW_USE_MESON=ON
+      image: conda-cpp
+
   test-conda-cpp-valgrind:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       image: conda-cpp-valgrind
 
-{% for ubuntu_version in ["20.04", "22.04"] %}
-  test-ubuntu-{{ ubuntu_version }}-cpp:
+  test-ubuntu-22.04-cpp:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        UBUNTU: "{{ ubuntu_version }}"
+        UBUNTU: 22.04
       image: ubuntu-cpp
-{% endfor %}
 
-  test-ubuntu-20.04-cpp-bundled:
+  test-ubuntu-22.04-cpp-bundled:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        UBUNTU: 20.04
+        UBUNTU: 22.04
       image: ubuntu-cpp-bundled
 
   test-ubuntu-24.04-cpp-bundled-offline:
@@ -1088,14 +1084,6 @@ tasks:
         PYTEST_ARGS: "-m hypothesis -W error"
       image: conda-python-pandas
 
-  test-conda-python-3.10-substrait:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        PYTHON: "3.10"
-      image: conda-python-substrait
-
   test-ubuntu-22.04-python-313-freethreading:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1166,8 +1154,6 @@ tasks:
   test-r-linux-as-cran:
     ci: github
     template: r/github.linux.cran.yml
-    params:
-      MATRIX: {{ "${{ matrix.r_image }}" }}
 
   test-r-macos-as-cran:
     ci: github
@@ -1308,13 +1294,14 @@ tasks:
       r_tag: latest
       flags: "-e LIBARROW_MINIMAL=TRUE"
 
-  test-ubuntu-r-sanitizer:
+  test-r-linux-sanitizer:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         R_PRUNE_DEPS: TRUE
       image: ubuntu-r-sanitizer
+      timeout: 120
 
   test-r-clang-sanitizer:
     ci: github
@@ -1339,21 +1326,13 @@ tasks:
 
   ############################## CUDA tests #################################
 
-  test-cuda-cpp-ubuntu-20.04-cuda-11.2.2:
-    ci: github
-    template: docker-tests/github.cuda.yml
-    params:
-      env:
-        UBUNTU: 20.04
-      image: ubuntu-cuda-cpp
-
   test-cuda-cpp-ubuntu-22.04-cuda-11.7.1:
     ci: github
     template: docker-tests/github.cuda.yml
     params:
       env:
-        UBUNTU: 22.04
         CUDA: 11.7.1
+        UBUNTU: 22.04
       image: ubuntu-cuda-cpp
 
   test-cuda-python-ubuntu-22.04-cuda-11.7.1:
@@ -1361,8 +1340,8 @@ tasks:
     template: docker-tests/github.cuda.yml
     params:
       env:
-        UBUNTU: 22.04
         CUDA: 11.7.1
+        UBUNTU: 22.04
       image: ubuntu-cuda-python
 
   ############################## Fuzz tests #################################
diff --git a/dev/tasks/vcpkg-tests/github.windows.yml b/dev/tasks/vcpkg-tests/github.windows.yml
index af12db595286f..0fd4892c0a67c 100644
--- a/dev/tasks/vcpkg-tests/github.windows.yml
+++ b/dev/tasks/vcpkg-tests/github.windows.yml
@@ -32,7 +32,7 @@ jobs:
       - name: Install CMake 3.29.0
         shell: bash
         run: |
-          arrow/ci/scripts/install_cmake.sh amd64 windows 3.29.0 /c/cmake
+          arrow/ci/scripts/install_cmake.sh 3.29.0 /c/cmake
           echo "c:\\cmake\\bin" >> $GITHUB_PATH
       - name: Download Timezone Database
         shell: bash
diff --git a/docker-compose.yml b/docker-compose.yml
index 87383568aaba4..64d5fa8856806 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -128,7 +128,6 @@ x-hierarchy:
         - conda-python-hdfs
         - conda-python-no-numpy
         - conda-python-spark
-        - conda-python-substrait
   - conda-verify-rc
   - conan
   - debian-cpp:
@@ -175,9 +174,9 @@ x-hierarchy:
   - python-free-threaded-wheel-manylinux-test-imports
   - python-wheel-manylinux-test-unittests
   - python-free-threaded-wheel-manylinux-test-unittests
-  - python-wheel-windows-vs2019-base:
-    - python-wheel-windows-vs2019
-    - python-free-threaded-wheel-windows-vs2019
+  - python-wheel-windows-vs2022-base:
+    - python-wheel-windows-vs2022
+    - python-free-threaded-wheel-windows-vs2022
   - python-wheel-windows-test-base:
     - python-wheel-windows-test
     - python-free-threaded-wheel-windows-test
@@ -379,7 +378,7 @@ services:
     #   docker compose run --rm ubuntu-cpp
     # Parameters:
     #   ARCH: amd64, arm64v8, s390x, ...
-    #   UBUNTU: 20.04, 22.04, 24.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
     build:
       context: .
@@ -390,6 +389,7 @@ services:
         arch: ${ARCH}
         base: "${ARCH}/ubuntu:${UBUNTU}"
         clang_tools: ${CLANG_TOOLS}
+        cmake: ${CMAKE}
         gcc: ${GCC}
         llvm: ${LLVM}
     shm_size: *shm-size
@@ -415,7 +415,7 @@ services:
     #   docker compose run --rm ubuntu-cpp-static
     # Parameters:
     #   ARCH: amd64, arm64v8, s390x, ...
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp-static
     build:
       context: .
@@ -426,6 +426,7 @@ services:
         arch: ${ARCH}
         base: "${ARCH}/ubuntu:${UBUNTU}"
         clang_tools: ${CLANG_TOOLS}
+        cmake: ${CMAKE}
         gcc: ${GCC}
         llvm: ${LLVM}
     shm_size: *shm-size
@@ -455,6 +456,8 @@ services:
       dockerfile: ci/docker/centos-7-cpp.dockerfile
       cache_from:
         - ${REPO}:centos-7-cpp-static
+      args:
+        cmake: ${CMAKE}
     shm_size: *shm-size
     volumes:
       - .:/arrow:delegated
@@ -477,6 +480,7 @@ services:
       args:
         arch: ${ARCH}
         base: "${ARCH}/ubuntu:${UBUNTU}"
+        cmake: ${CMAKE}
         llvm: ${LLVM}
     shm_size: *shm-size
     ulimits: *ulimits
@@ -571,6 +575,7 @@ services:
         arch: ${ARCH}
         base: nvidia/cuda:${CUDA}-devel-ubuntu${UBUNTU}
         clang_tools: ${CLANG_TOOLS}
+        cmake: ${CMAKE}
         llvm: ${LLVM}
     shm_size: *shm-size
     ulimits: *ulimits
@@ -621,6 +626,7 @@ services:
       args:
         arch: ${ARCH}
         clang_tools: ${CLANG_TOOLS}
+        cmake: ${CMAKE}
         llvm: ${LLVM}
     shm_size: *shm-size
     volumes: *ubuntu-volumes
@@ -656,7 +662,7 @@ services:
     #   docker compose run --rm ubuntu-cpp-thread-sanitizer
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
     build:
       context: .
@@ -700,6 +706,7 @@ services:
       args:
         arch: ${ARCH}
         clang_tools: ${CLANG_TOOLS}
+        cmake: ${CMAKE}
         llvm: ${LLVM}
     shm_size: *shm-size
     volumes: *ubuntu-volumes
@@ -742,8 +749,8 @@ services:
     # Usage:
     #   docker compose run --rm conan
     # Parameters:
-    #   CONAN_BASE: gcc11, gcc11-armv7, ...
-    #   CONAN_VERSION: 1.62.0
+    #   CONAN_BASE: gcc11-ubuntu16.04, ...
+    #   CONAN_VERSION: 2.12.1, ...
     #   See https://github.com/conan-io/conan-docker-tools#readme for
     #   available images.
     image: conanio/${CONAN_BASE}:${CONAN_VERSION}
@@ -797,7 +804,7 @@ services:
     #   docker compose run --rm ubuntu-c-glib
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-c-glib
     build:
       context: .
@@ -857,7 +864,7 @@ services:
     #   docker compose run --rm ubuntu-ruby
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-ruby
     build:
       context: .
@@ -1124,7 +1131,7 @@ services:
     #   docker compose run --rm ubuntu-python-313-freethreading
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   UBUNTU: 20.04, 22.04, 24.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-313-freethreading
     build:
       context: .
@@ -1266,81 +1273,81 @@ services:
       CHECK_UNITTESTS: "ON"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
-  python-wheel-windows-vs2019-base:
-    image: ${REPO}:python-wheel-windows-vs2019-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+  python-wheel-windows-vs2022-base:
+    image: ${REPO}:python-wheel-windows-vs2022-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     build:
       args:
         vcpkg: ${VCPKG}
       context: .
-      dockerfile: ci/docker/python-wheel-windows-vs2019-base.dockerfile
+      dockerfile: ci/docker/python-wheel-windows-vs2022-base.dockerfile
       # This should make the pushed images reusable, but the image gets rebuilt.
       # Uncomment if no local cache is available.
       # cache_from:
-      #   - abrarov/msvc-2019:2.11.0
-      #   - ${REPO}:python-wheel-windows-vs2019-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
-    volumes: &python-wheel-windows-vs2019-volumes
+      #   - mcr.microsoft.com/windows/servercore:ltsc2022
+      #   - ${REPO}:python-wheel-windows-vs2022-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+    volumes: &python-wheel-windows-vs2022-volumes
       - type: bind
         source: .
         target: "C:/arrow"
 
-  python-wheel-windows-vs2019:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+  python-wheel-windows-vs2022:
+    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2022-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     build:
       args:
-        base: ${REPO}:python-wheel-windows-vs2019-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+        base: ${REPO}:python-wheel-windows-vs2022-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
         python: ${PYTHON}
       context: .
-      dockerfile: ci/docker/python-wheel-windows-vs2019.dockerfile
+      dockerfile: ci/docker/python-wheel-windows-vs2022.dockerfile
       # This should make the pushed images reusable, but the image gets rebuilt.
       # Uncomment if no local cache is available.
       # cache_from:
-      #   - abrarov/msvc-2019:2.11.0
-      #   - ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
-    volumes: *python-wheel-windows-vs2019-volumes
+      #   - mcr.microsoft.com/windows/servercore:ltsc2022
+      #   - ${REPO}:python-${PYTHON}-wheel-windows-vs2022-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+    volumes: *python-wheel-windows-vs2022-volumes
     command: arrow\\ci\\scripts\\python_wheel_windows_build.bat
 
-  python-free-threaded-wheel-windows-vs2019:
-    image: ${REPO}:python-${PYTHON}-free-threaded-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+  python-free-threaded-wheel-windows-vs2022:
+    image: ${REPO}:python-${PYTHON}-free-threaded-wheel-windows-vs2022-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     build:
       args:
-        base: ${REPO}:python-wheel-windows-vs2019-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+        base: ${REPO}:python-wheel-windows-vs2022-base-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
         python: ${PYTHON}
       context: .
-      dockerfile: ci/docker/python-free-threaded-wheel-windows-vs2019.dockerfile
+      dockerfile: ci/docker/python-free-threaded-wheel-windows-vs2022.dockerfile
       # This should make the pushed images reusable, but the image gets rebuilt.
       # Uncomment if no local cache is available.
       # cache_from:
-      #   - abrarov/msvc-2019:2.11.0
-      #   - ${REPO}:python-${PYTHON}-free-threaded-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
-    volumes: *python-wheel-windows-vs2019-volumes
+      #   - mcr.microsoft.com/windows/servercore:ltsc2022
+      #   - ${REPO}:python-${PYTHON}-free-threaded-wheel-windows-vs2022-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+    volumes: *python-wheel-windows-vs2022-volumes
     command: arrow\\ci\\scripts\\python_wheel_windows_build.bat
 
   python-wheel-windows-test-base:
-    image: ${REPO}:python-wheel-windows-test-vs2019-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
+    image: ${REPO}:python-wheel-windows-test-vs2022-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
     build:
       context: .
-      dockerfile: ci/docker/python-wheel-windows-test-vs2019-base.dockerfile
-    volumes: *python-wheel-windows-vs2019-volumes
+      dockerfile: ci/docker/python-wheel-windows-test-vs2022-base.dockerfile
+    volumes: *python-wheel-windows-vs2022-volumes
 
   python-wheel-windows-test:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2019-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
+    image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2022-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
     build:
       args:
-        base: ${REPO}:python-wheel-windows-test-vs2019-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
+        base: ${REPO}:python-wheel-windows-test-vs2022-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
         python: ${PYTHON}
       context: .
-      dockerfile: ci/docker/python-wheel-windows-test-vs2019.dockerfile
-    volumes: *python-wheel-windows-vs2019-volumes
+      dockerfile: ci/docker/python-wheel-windows-test-vs2022.dockerfile
+    volumes: *python-wheel-windows-vs2022-volumes
     command: arrow\\ci\\scripts\\python_wheel_windows_test.bat
 
   python-free-threaded-wheel-windows-test:
-    image: ${REPO}:python-${PYTHON}-free-threaded-wheel-windows-test-vs2019-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
+    image: ${REPO}:python-${PYTHON}-free-threaded-wheel-windows-test-vs2022-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
     build:
       args:
-        base: ${REPO}:python-wheel-windows-test-vs2019-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
+        base: ${REPO}:python-wheel-windows-test-vs2022-base-${PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION}
       context: .
-      dockerfile: ci/docker/python-free-threaded-wheel-windows-test-vs2019.dockerfile
-    volumes: *python-wheel-windows-vs2019-volumes
+      dockerfile: ci/docker/python-free-threaded-wheel-windows-test-vs2022.dockerfile
+    volumes: *python-wheel-windows-vs2022-volumes
     command: arrow\\ci\\scripts\\python_wheel_windows_test.bat
 
   ##############################  Integration #################################
@@ -1467,33 +1474,6 @@ services:
         /arrow/ci/scripts/python_build.sh /arrow /build &&
         /arrow/ci/scripts/integration_dask.sh"]
 
-  conda-python-substrait:
-    # Usage:
-    #   docker compose build conda
-    #   docker compose build conda-cpp
-    #   docker compose build conda-python
-    #   docker compose build conda-python-substrait
-    #   docker compose run --rm conda-python-substrait
-    image: ${REPO}:${ARCH}-conda-python-${PYTHON}
-    build:
-      context: .
-      dockerfile: ci/docker/conda-python-substrait.dockerfile
-      cache_from:
-        - ${REPO}:${ARCH}-conda-python-${PYTHON}
-      args:
-        repo: ${REPO}
-        arch: ${ARCH}
-        python: ${PYTHON}
-    shm_size: *shm-size
-    environment:
-      <<: [*common, *ccache]
-      ARROW_SUBSTRAIT: "ON"
-    volumes: *conda-volumes
-    command:
-      ["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
-        /arrow/ci/scripts/python_build.sh /arrow /build &&
-        /arrow/ci/scripts/integration_substrait.sh"]
-
   conda-python-cpython-debug:
     # Usage:
     #   docker compose build conda
@@ -1583,6 +1563,7 @@ services:
         - ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG}
       args:
         base: ${R_ORG}/${R_IMAGE}:${R_TAG}
+        cmake: ${CMAKE}
         r_dev: ${ARROW_R_DEV}
         tz: ${TZ}
         r_prune_deps: ${R_PRUNE_DEPS}
@@ -1601,11 +1582,11 @@ services:
       /bin/bash -c "/arrow/ci/scripts/r_test.sh /arrow"
 
   ubuntu-r-sanitizer:
-    # Only 20.04 and amd64 supported
+    # Only amd64 supported
     # Usage:
     #   docker compose build ubuntu-r-sanitizer
     #   docker compose run ubuntu-r-sanitizer
-    image: ${REPO}:amd64-ubuntu-20.04-r-sanitizer
+    image: ${REPO}:amd64-ubuntu-r-sanitizer
     cap_add:
       # LeakSanitizer and gdb requires ptrace(2)
       - SYS_PTRACE
@@ -1613,9 +1594,10 @@ services:
       context: .
       dockerfile: ci/docker/linux-r.dockerfile
       cache_from:
-        - ${REPO}:amd64-ubuntu-20.04-r-sanitizer
+        - ${REPO}:amd64-ubuntu-r-sanitizer
       args:
         base: wch1/r-debug:latest
+        cmake: ${CMAKE}
         r_bin: RDsan
         tz: ${TZ}
         r_prune_deps: ${R_PRUNE_DEPS}
@@ -1636,6 +1618,7 @@ services:
         - ${REPO}:r-rhub-clang-devel-latest
       args:
         base: rhub/clang-asan
+        cmake: ${CMAKE}
         r_dev: ${ARROW_R_DEV}
         r_bin: R
         tz: ${TZ}
@@ -1655,18 +1638,19 @@ services:
         /arrow/ci/scripts/r_sanitize.sh /arrow"
 
   ubuntu-r-valgrind:
-    # Only 20.04 and amd64 supported
+    # Only amd64 supported
     # Usage:
     #   docker compose build ubuntu-r-valgrind
     #   docker compose run ubuntu-r-valgrind
-    image: ${REPO}:amd64-ubuntu-20.04-r-valgrind
+    image: ${REPO}:amd64-ubuntu-r-valgrind
     build:
       context: .
       dockerfile: ci/docker/linux-r.dockerfile
       cache_from:
-        - ${REPO}:amd64-ubuntu-20.04-r-valgrind
+        - ${REPO}:amd64-ubuntu-r-valgrind
       args:
         base: wch1/r-debug:latest
+        cmake: ${CMAKE}
         r_bin: RDvalgrind
         tz: ${TZ}
     environment:
@@ -1806,8 +1790,6 @@ services:
         - ${REPO}:${ARCH}-debian-${DEBIAN}-docs
       args:
         r: ${R}
-        jdk: ${JDK}
-        maven: ${MAVEN}
         node: ${NODE}
         base: ${REPO}:${ARCH}-debian-${DEBIAN}-python-3
     # This is for Chromium used by Mermaid. Chromium uses namespace
@@ -2025,7 +2007,7 @@ services:
     #   docker compose build ubuntu-verify-rc
     #   docker compose run -e VERIFY_VERSION=6.0.1 -e VERIFY_RC=1 ubuntu-verify-rc
     # Parameters:
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-verify-rc
     build:
       context: .
@@ -2033,8 +2015,9 @@ services:
       cache_from:
         - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-verify-rc
       args:
-        repo: ${REPO}
         arch: ${ARCH}
+        cmake: ${CMAKE}
+        repo: ${REPO}
     volumes:
       - .:/arrow:delegated
       - ${DOCKER_VOLUME_PREFIX}ubuntu-ccache:/ccache:delegated
diff --git a/docs/requirements.txt b/docs/requirements.txt
index afb252e17457b..493528fb5c725 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -4,6 +4,7 @@
 
 breathe
 ipython
+linuxdoc
 myst-parser[linkify]
 numpydoc
 pydata-sphinx-theme~=0.14
diff --git a/docs/source/_templates/kapa-ai-bot.html b/docs/source/_templates/kapa-ai-bot.html
new file mode 100644
index 0000000000000..a97c844589e5d
--- /dev/null
+++ b/docs/source/_templates/kapa-ai-bot.html
@@ -0,0 +1,16 @@
+<div class="kapa-ai-bot">
+    <script
+        async
+        src="https://widget.kapa.ai/kapa-widget.bundle.js"
+        data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
+        data-project-name="Apache Arrow"
+        data-project-color="#000000"
+        data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
+        data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/)."
+        data-consent-required="true" 
+        data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
+    ></script>
+   
+</div>
+
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e9b926e884a45..a763a42c64062 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -114,6 +114,7 @@
     'breathe',
     'IPython.sphinxext.ipython_console_highlighting',
     'IPython.sphinxext.ipython_directive',
+    'linuxdoc.rstFlatTable',
     'myst_parser',
     'numpydoc',
     'sphinx_design',
@@ -343,7 +344,7 @@
     "header_links_before_dropdown": 2,
     "header_dropdown_text": "Implementations",
     "navbar_align": "left",
-    "navbar_end": ["version-switcher", "theme-switcher", "navbar-icon-links"],
+    "navbar_end": ["kapa-ai-bot.html", "version-switcher", "theme-switcher", "navbar-icon-links"],
     "icon_links": [
         {
             "name": "GitHub",
diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst
index e80bca4c949dc..5f89866a16b75 100644
--- a/docs/source/cpp/build_system.rst
+++ b/docs/source/cpp/build_system.rst
@@ -46,7 +46,7 @@ file into an executable linked with the Arrow C++ shared library:
 
 .. code-block:: cmake
 
-   cmake_minimum_required(VERSION 3.16)
+   cmake_minimum_required(VERSION 3.25)
 
    project(MyExample)
 
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 6acc9e31a5ffd..8825ffebf2a12 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -214,35 +214,41 @@ the input to a single output value.
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
 | count_distinct     | Unary   | Non-nested types | Scalar Int64           | :struct:`CountOptions`           | \(2)  |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| first              | Unary   | Numeric, Binary  | Scalar Input type      | :struct:`ScalarAggregateOptions` | \(11) |
+| first              | Unary   | Numeric, Binary  | Scalar Input type      | :struct:`ScalarAggregateOptions` | \(3) |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| first_last         | Unary   | Numeric, Binary  | Scalar Struct          | :struct:`ScalarAggregateOptions` | \(11) |
+| first_last         | Unary   | Numeric, Binary  | Scalar Struct          | :struct:`ScalarAggregateOptions` | \(3) |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| index              | Unary   | Any              | Scalar Int64           | :struct:`IndexOptions`           | \(3)  |
+| index              | Unary   | Any              | Scalar Int64           | :struct:`IndexOptions`           | \(4)  |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| last               | Unary   | Numeric, Binary  | Scalar Input type      | :struct:`ScalarAggregateOptions` | \(11) |
+| kurtosis           | Unary   | Numeric          | Scalar Float64         | :struct:`SkewOptions`            | \(11) |
++--------------------+---------+------------------+------------------------+----------------------------------+-------+
+| last               | Unary   | Numeric, Binary  | Scalar Input type      | :struct:`ScalarAggregateOptions` | \(3) |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
 | max                | Unary   | Non-nested types | Scalar Input type      | :struct:`ScalarAggregateOptions` |       |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| mean               | Unary   | Numeric          | Scalar Decimal/Float64 | :struct:`ScalarAggregateOptions` | \(4)  |
+| mean               | Unary   | Numeric          | Scalar Decimal/Float64 | :struct:`ScalarAggregateOptions` | \(5)  |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
 | min                | Unary   | Non-nested types | Scalar Input type      | :struct:`ScalarAggregateOptions` |       |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| min_max            | Unary   | Non-nested types | Scalar Struct          | :struct:`ScalarAggregateOptions` | \(5)  |
+| min_max            | Unary   | Non-nested types | Scalar Struct          | :struct:`ScalarAggregateOptions` | \(6)  |
++--------------------+---------+------------------+------------------------+----------------------------------+-------+
+| mode               | Unary   | Numeric          | Struct                 | :struct:`ModeOptions`            | \(7)  |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| mode               | Unary   | Numeric          | Struct                 | :struct:`ModeOptions`            | \(6)  |
+| pivot_wider        | Binary  | Binary, Any      | Scalar Struct          | :struct:`PivotWiderOptions`      | \(8)  |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| product            | Unary   | Numeric          | Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(7)  |
+| product            | Unary   | Numeric          | Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(9)  |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| quantile           | Unary   | Numeric          | Scalar Numeric         | :struct:`QuantileOptions`        | \(8)  |
+| quantile           | Unary   | Numeric          | Scalar Numeric         | :struct:`QuantileOptions`        | \(10) |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| stddev             | Unary   | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        | \(9)  |
+| skew               | Unary   | Numeric          | Scalar Float64         | :struct:`SkewOptions`            | \(11) |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| sum                | Unary   | Numeric          | Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(7)  |
+| stddev             | Unary   | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        | \(11) |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| tdigest            | Unary   | Numeric          | Float64                | :struct:`TDigestOptions`         | \(10) |
+| sum                | Unary   | Numeric          | Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(9)  |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
-| variance           | Unary   | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        | \(9)  |
+| tdigest            | Unary   | Numeric          | Float64                | :struct:`TDigestOptions`         | \(12) |
++--------------------+---------+------------------+------------------------+----------------------------------+-------+
+| variance           | Unary   | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        | \(11) |
 +--------------------+---------+------------------+------------------------+----------------------------------+-------+
 
 * \(1) If null values are taken into account, by setting the
@@ -252,37 +258,41 @@ the input to a single output value.
 * \(2) CountMode controls whether only non-null values are counted (the
   default), only null values are counted, or all values are counted.
 
-* \(3) Returns -1 if the value is not found. The index of a null value
+* \(3) Result is based on the ordering of input data.
+
+* \(4) Returns -1 if the value is not found. The index of a null value
   is always -1, regardless of whether there are nulls in the input.
 
-* \(4) For decimal inputs, the resulting decimal will have the same
+* \(5) For decimal inputs, the resulting decimal will have the same
   precision and scale. The result is rounded away from zero.
 
-* \(5) Output is a ``{"min": input type, "max": input type}`` Struct.
+* \(6) Output is a ``{"min": input type, "max": input type}`` Struct.
 
   Of the interval types, only the month interval is supported, as the day-time
   and month-day-nano types are not sortable.
 
-* \(6) Output is an array of ``{"mode": input type, "count": Int64}`` Struct.
+* \(7) Output is an array of ``{"mode": input type, "count": Int64}`` Struct.
   It contains the *N* most common elements in the input, in descending
   order, where *N* is given in :member:`ModeOptions::n`.
   If two values have the same count, the smallest one comes first.
   Note that the output can have less than *N* elements if the input has
   less than *N* distinct values.
 
-* \(7) Output is Int64, UInt64, Float64, or Decimal128/256, depending on the
+* \(8) The first input contains the pivot key, while the second input contains
+  the values to be pivoted. The output is a Struct with one field for each key
+  in :member:`PivotOptions::key_names`.
+
+* \(9) Output is Int64, UInt64, Float64, or Decimal128/256, depending on the
   input type.
 
-* \(8) Output is Float64 or input type, depending on QuantileOptions.
+* \(10) Output is Float64 or input type, depending on QuantileOptions.
 
-* \(9) Decimal arguments are cast to Float64 first.
+* \(11) Decimal arguments are cast to Float64 first.
 
-* \(10) tdigest/t-digest computes approximate quantiles, and so only needs a
+* \(12) tdigest/t-digest computes approximate quantiles, and so only needs a
   fixed amount of memory. See the `reference implementation
   <https://github.com/tdunning/t-digest>`_ for details.
 
-* \(11) Result is based on the ordering of input data
-
   Decimal arguments are cast to Float64 first.
 
 .. _grouped-aggregations-group-by:
@@ -350,11 +360,13 @@ equivalents above and reflects how they are implemented internally.
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 | hash_distinct           | Unary   | Any                                | List of input type     | :struct:`CountOptions`           | \(2) \(3) |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_first              | Unary   | Numeric, Binary                    | Input type             | :struct:`ScalarAggregateOptions` | \(10)     |
+| hash_first              | Unary   | Numeric, Binary                    | Input type             | :struct:`ScalarAggregateOptions` | \(11)     |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_first_last         | Unary   | Numeric, Binary                    | Struct                 | :struct:`ScalarAggregateOptions` | \(10)     |
+| hash_first_last         | Unary   | Numeric, Binary                    | Struct                 | :struct:`ScalarAggregateOptions` | \(11)     |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_last               | Unary   | Numeric, Binary                    | Input type             | :struct:`ScalarAggregateOptions` | \(10)     |
+| hash_kurtosis           | Unary   | Numeric                            | Float64                | :struct:`SkewOptions`            | \(9)      |
++-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
+| hash_last               | Unary   | Numeric, Binary                    | Input type             | :struct:`ScalarAggregateOptions` | \(11)     |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 | hash_list               | Unary   | Any                                | List of input type     |                                  | \(3)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
@@ -368,15 +380,19 @@ equivalents above and reflects how they are implemented internally.
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 | hash_one                | Unary   | Any                                | Input type             |                                  | \(6)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_product            | Unary   | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(7)      |
+| hash_pivot_wider        | Binary  | Binary, Any                        | Struct                 | :struct:`PivotWiderOptions`      | \(7)      |
++-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
+| hash_product            | Unary   | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(8)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_stddev             | Unary   | Numeric                            | Float64                | :struct:`VarianceOptions`        | \(8)      |
+| hash_skew               | Unary   | Numeric                            | Float64                | :struct:`SkewOptions`            | \(9)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_sum                | Unary   | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(7)      |
+| hash_stddev             | Unary   | Numeric                            | Float64                | :struct:`VarianceOptions`        | \(9)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_tdigest            | Unary   | Numeric                            | FixedSizeList[Float64] | :struct:`TDigestOptions`         | \(9)      |
+| hash_sum                | Unary   | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(8)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_variance           | Unary   | Numeric                            | Float64                | :struct:`VarianceOptions`        | \(8)      |
+| hash_tdigest            | Unary   | Numeric                            | FixedSizeList[Float64] | :struct:`TDigestOptions`         | \(10)     |
++-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
+| hash_variance           | Unary   | Numeric                            | Float64                | :struct:`VarianceOptions`        | \(9)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 
 * \(1) If null values are taken into account, by setting the
@@ -405,18 +421,21 @@ equivalents above and reflects how they are implemented internally.
   one non-null value for a certain group, that value is returned, and only if
   all the values are ``null`` for the group will the function return ``null``.
 
-* \(7) Output is Int64, UInt64, Float64, or Decimal128/256, depending on the
+* \(7) The first input contains the pivot key, while the second input contains
+  the values to be pivoted. The output is a Struct with one field for each key
+  in :member:`PivotOptions::key_names`.
+
+* \(8) Output is Int64, UInt64, Float64, or Decimal128/256, depending on the
   input type.
 
-* \(8) Decimal arguments are cast to Float64 first.
+* \(9) Decimal arguments are cast to Float64 first.
 
-* \(9) T-digest computes approximate quantiles, and so only needs a
+* \(10) T-digest computes approximate quantiles, and so only needs a
   fixed amount of memory. See the `reference implementation
   <https://github.com/tdunning/t-digest>`_ for details.
 
-* \(10) Result is based on ordering of the input data.
+* \(11) Result is based on ordering of the input data.
 
-  Decimal arguments are cast to Float64 first.
 
 Element-wise ("scalar") functions
 ---------------------------------
@@ -1805,6 +1824,8 @@ in the respective option classes.
 +-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+
 | rank                  | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`RankOptions`         | \(4)           |
 +-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+
+| rank_normal           | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | Float64           | :struct:`RankQuantileOptions` | \(5)           |
++-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+
 | rank_quantile         | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | Float64           | :struct:`RankQuantileOptions` | \(5)           |
 +-----------------------+------------+---------------------------------------------------------+-------------------+-------------------------------+----------------+
 | select_k_unstable     | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`SelectKOptions`      | \(6) \(7)      |
@@ -1827,7 +1848,10 @@ in the respective option classes.
 
 * \(4) The output is a one-based numerical array of ranks.
 
-* \(5) The output is an array of quantiles strictly between 0 and 1.
+* \(5) The output of ``rank_quantile`` is an array of quantiles strictly between
+  0 and 1. The ouput of ``rank_normal`` is an array of finite real values
+  corresponding to points in the normal distribution that reflect the input's
+  quantile ranks.
 
 * \(6) The input can be an array, chunked array, record batch or
   table. If the input is a record batch or table, one or more sort
diff --git a/docs/source/developers/guide/step_by_step/arrow_codebase.rst b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
index c4ea61d89ff80..ddd4266dbd324 100644
--- a/docs/source/developers/guide/step_by_step/arrow_codebase.rst
+++ b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
@@ -32,15 +32,18 @@ Working on the Arrow codebase 🧐
 Finding your way around Arrow
 =============================
 
-The Apache Arrow repository includes implementations for
-most of the libraries for which Arrow is available.
+The `Apache Arrow repository <https://github.com/apache/arrow>`_ includes
+implementations for most of the libraries for which Arrow is available.
 
 Languages like GLib (``c_glib/``), C++ (``cpp/``), C# (``csharp/``),
-Go (``go/``), Java (``java/``), JavaScript (``js/``), MATLAB
-(``matlab/``), Python (``python/``), R (``r/``) and Ruby (``ruby/``)
-have their own subdirectories in the main folder as written here.
+JavaScript (``js/``), MATLAB (``matlab/``), Python (``python/``), R (``r/``)
+and Ruby (``ruby/``) have their own subdirectories in the main folder as written here.
 
-Rust has its own repository available `here <https://github.com/apache/arrow-rs>`_.
+The following language implementations have their own repositories:
+
+- `Rust <https://github.com/apache/arrow-rs>`_
+- `Go <https://github.com/apache/arrow-go>`_
+- `Java <https://github.com/apache/arrow-java>`_
 
 In the **language-specific subdirectories** you can find the code
 connected to that language. For example:
diff --git a/docs/source/format/StatisticsSchema.rst b/docs/source/format/StatisticsSchema.rst
index 01cc0da7c4eb5..0e0752ff56b8e 100644
--- a/docs/source/format/StatisticsSchema.rst
+++ b/docs/source/format/StatisticsSchema.rst
@@ -253,7 +253,7 @@ Data::
 
 Statistics:
 
-.. list-table::
+.. flat-table::
    :header-rows: 1
 
    * - Target
@@ -262,29 +262,23 @@ Statistics:
    * - Record batch
      - The number of rows
      - ``5``
-   * - ``vendor_id``
+   * - :rspan:`3` ``vendor_id``
      - The number of nulls
      - ``0``
-   * - ``vendor_id``
-     - The number of distinct values
+   * - The number of distinct values
      - ``2``
-   * - ``vendor_id``
-     - The max value
+   * - The max value
      - ``5``
-   * - ``vendor_id``
-     - The min value
+   * - The min value
      - ``1``
-   * - ``passenger_count``
+   * - :rspan:`4` ``passenger_count``
      - The number of nulls
      - ``1``
-   * - ``passenger_count``
-     - The number of distinct values
+   * - The number of distinct values
      - ``3``
-   * - ``passenger_count``
-     - The max value
+   * - The max value
      - ``2``
-   * - ``passenger_count``
-     - The min value
+   * - The min value
      - ``0``
 
 Column indexes:
@@ -314,15 +308,15 @@ Statistics array::
     column: [
       null, # record batch
       0,    # vendor_id
-      0,    # vendor_id
-      0,    # vendor_id
-      0,    # vendor_id
-      1,    # passenger_count
-      1,    # passenger_count
-      1,    # passenger_count
       1,    # passenger_count
     ]
     statistics:
+      offsets: [
+        0,
+        1, # record batch: 1 value: [0]
+        5, # vendor_id: 4 values: [1, 2, 3, 4]
+        9, # passenger_count: 4 values: [5, 6, 7, 8]
+      ]
       key:
         values: [
           "ARROW:row_count:exact",
@@ -330,7 +324,7 @@ Statistics array::
           "ARROW:distinct_count:exact",
           "ARROW:max_value:exact",
           "ARROW:min_value:exact",
-        ],
+        ]
         indices: [
           0, # "ARROW:row_count:exact"
           1, # "ARROW:null_count:exact"
@@ -399,7 +393,7 @@ Data::
 
 Statistics:
 
-.. list-table::
+.. flat-table::
    :header-rows: 1
 
    * - Target
@@ -411,41 +405,34 @@ Statistics:
    * - ``col1``
      - The number of nulls
      - ``0``
-   * - ``col1.a``
+   * - :rspan:`3` ``col1.a``
      - The number of nulls
      - ``0``
-   * - ``col1.a``
-     - The number of distinct values
+   * - The number of distinct values
      - ``3``
-   * - ``col1.a``
-     - The approximate max value
+   * - The approximate max value
      - ``5``
-   * - ``col1.a``
-     - The approximate min value
+   * - The approximate min value
      - ``0``
    * - ``col1.b``
      - The number of nulls
      - ``1``
-   * - ``col1.b.item``
+   * - :rspan:`1` ``col1.b.item``
      - The max value
      - ``99``
-   * - ``col1.b.item``
-     - The min value
+   * - The min value
      - ``20``
-   * - ``col1.c``
+   * - :rspan:`2` ``col1.c``
      - The number of nulls
      - ``1``
-   * - ``col1.c``
-     - The approximate max value
+   * - The approximate max value
      - ``3.0``
-   * - ``col1.c``
-     - The approximate min value
+   * - The approximate min value
      - ``-3.0``
-   * - ``col2``
+   * - :rspan:`1` ``col2``
      - The number of nulls
      - ``1``
-   * - ``col2``
-     - The number of distinct values
+   * - The number of distinct values
      - ``2``
 
 Column indexes:
@@ -491,19 +478,22 @@ Statistics array::
       null, # record batch
       0,    # col1
       1,    # col1.a
-      1,    # col1.a
-      1,    # col1.a
-      1,    # col1.a
       2,    # col1.b
       3,    # col1.b.item
-      3,    # col1.b.item
-      4,    # col1.c
-      4,    # col1.c
       4,    # col1.c
       5,    # col2
-      5,    # col2
     ]
     statistics:
+      offsets: [
+        0,
+        1,  # record batch: 1 value: [0]
+        2,  # col1: 1 value: [1]
+        6,  # col1.a: 4 values: [2, 3, 4, 5]
+        7,  # col1.b: 1 value: [6]
+        9,  # col1.b.item: 2 values: [7, 8]
+        12, # col1.c: 3 values: [9, 10, 11]
+        14, # col2: 2 values: [12, 13]
+      ]
       key:
         values: [
           "ARROW:row_count:exact",
@@ -596,26 +586,22 @@ Data::
 
 Statistics:
 
-.. list-table::
+.. flat-table::
    :header-rows: 1
 
    * - Target
      - Name
      - Value
-   * - Array
+   * - :rspan:`4` Array
      - The number of rows
      - ``5``
-   * - Array
-     - The number of nulls
+   * - The number of nulls
      - ``1``
-   * - Array
-     - The number of distinct values
+   * - The number of distinct values
      - ``3``
-   * - Array
-     - The max value
+   * - The max value
      - ``2``
-   * - Array
-     - The min value
+   * - The min value
      - ``0``
 
 Column indexes:
@@ -642,12 +628,12 @@ Statistics array::
 
     column: [
       0, # array
-      0, # array
-      0, # array
-      0, # array
-      0, # array
     ]
     statistics:
+      offsets: [
+        0,
+        5, # array: 5 values: [0, 1, 2, 3, 4]
+      ]
       key:
         values: [
           "ARROW:row_count:exact",
@@ -706,47 +692,40 @@ Data::
 
 Statistics:
 
-.. list-table::
+.. flat-table::
    :header-rows: 1
 
    * - Target
      - Name
      - Value
-   * - Array
+   * - :rspan:`1` Array
      - The number of rows
      - ``3``
-   * - Array
-     - The number of nulls
+   * - The number of nulls
      - ``0``
-   * - ``a``
+   * - :rspan:`3` ``a``
      - The number of nulls
      - ``0``
-   * - ``a``
-     - The number of distinct values
+   * - The number of distinct values
      - ``3``
-   * - ``a``
-     - The approximate max value
+   * - The approximate max value
      - ``5``
-   * - ``a``
-     - The approximate min value
+   * - The approximate min value
      - ``0``
    * - ``b``
      - The number of nulls
      - ``1``
-   * - ``b.item``
+   * - :rspan:`1` ``b.item``
      - The max value
      - ``99``
-   * - ``b.item``
-     - The min value
+   * - The min value
      - ``20``
-   * - ``c``
+   * - :rspan:`2` ``c``
      - The number of nulls
      - ``1``
-   * - ``c``
-     - The approximate max value
+   * - The approximate max value
      - ``3.0``
-   * - ``c``
-     - The approximate min value
+   * - The approximate min value
      - ``-3.0``
 
 Column indexes:
@@ -788,19 +767,20 @@ Statistics array::
 
     column: [
       0, # array
-      0, # array
-      1, # a
-      1, # a
-      1, # a
       1, # a
       2, # b
       3, # b.item
-      3, # b.item
-      4, # c
-      4, # c
       4, # c
     ]
     statistics:
+      offsets: [
+        0,
+        2,  # array: 2 values: [0, 1]
+        6,  # a: 4 values: [2, 3, 4, 5]
+        7,  # b: 1 value: [6]
+        9,  # b.item: 2 values: [7, 8]
+        12, # c: 3 values: [9, 10, 11]
+      ]
       key:
         values: [
           "ARROW:row_count:exact",
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 5423eebfbab40..0205457fec751 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -38,6 +38,7 @@ Aggregations
    min
    min_max
    mode
+   pivot_wider
    product
    quantile
    stddev
@@ -557,6 +558,7 @@ Compute Options
    PadOptions
    PairwiseOptions
    PartitionNthOptions
+   PivotWiderOptions
    QuantileOptions
    ReplaceSliceOptions
    ReplaceSubstringOptions
diff --git a/docs/source/python/api/formats.rst b/docs/source/python/api/formats.rst
index 86e2585ac2537..a4cf3bbcdd3ad 100644
--- a/docs/source/python/api/formats.rst
+++ b/docs/source/python/api/formats.rst
@@ -66,6 +66,7 @@ JSON Files
 
    ReadOptions
    ParseOptions
+   open_json
    read_json
 
 .. _api.parquet:
diff --git a/docs/source/python/json.rst b/docs/source/python/json.rst
index eff6135d895a7..277b8e134947f 100644
--- a/docs/source/python/json.rst
+++ b/docs/source/python/json.rst
@@ -115,3 +115,15 @@ and pass it to :func:`read_json`.  For example, you can pass an explicit
 
 Similarly, you can choose performance settings by passing a
 :class:`ReadOptions` instance to :func:`read_json`.
+
+
+Incremental reading
+-------------------
+
+For memory-constrained environments, it is also possible to read a JSON file
+one batch at a time, using :func:`open_json`.
+
+In this case, type inference is done on the first block and types are frozen afterwards.
+To make sure the right data types are inferred, either set
+:attr:`ReadOptions.block_size` to a large enough value, or use
+:attr:`ParseOptions.explicit_schema` to set the desired data types explicitly.
diff --git a/docs/source/python/orc.rst b/docs/source/python/orc.rst
index 76c293d742010..7c16a94673a85 100644
--- a/docs/source/python/orc.rst
+++ b/docs/source/python/orc.rst
@@ -159,14 +159,14 @@ Compression
 
 The data pages within a column in a row group can be compressed after the
 encoding passes (dictionary, RLE encoding). In PyArrow we don't use compression
-by default, but Snappy, ZSTD, Gzip/Zlib, and LZ4 are also supported::
+by default, but Snappy, ZSTD, Zlib, and LZ4 are also supported::
 
    >>> orc.write_table(table, where, compression='uncompressed')
-   >>> orc.write_table(table, where, compression='gzip')
+   >>> orc.write_table(table, where, compression='zlib')
    >>> orc.write_table(table, where, compression='zstd')
    >>> orc.write_table(table, where, compression='snappy')
 
-Snappy generally results in better performance, while Gzip may yield smaller
+Snappy generally results in better performance, while Zlib may yield smaller
 files.
 
 Reading from cloud storage
diff --git a/docs/source/status.rst b/docs/source/status.rst
index c5883afa8f345..a54d3d7066864 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -44,9 +44,9 @@ Data Types
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Float32/64        | ✓     | ✓     | ✓     | ✓  |  ✓    |  ✓    | ✓     | ✓     | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Decimal32         | ✓     |       | ✓     |    |  ✓    |       |       |       |           |
+| Decimal32         | ✓     |       | ✓     |    |  ✓    |       |       |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Decimal64         | ✓     |       | ✓     |    |  ✓    |       |       |       |           |
+| Decimal64         | ✓     |       | ✓     |    |  ✓    |       |       |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Decimal128        | ✓     | ✓     | ✓     | ✓  |  ✓    |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
@@ -72,13 +72,9 @@ Data Types
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Large Utf8        | ✓     | ✓     | ✓     | ✓  | \(4)  |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Binary View       | ✓     |       | ✓     |    |   ✓   |  ✓    |       |       |           |
+| Binary View       | ✓     |       | ✓     |    |   ✓   |  ✓    |       |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Large Binary View | ✓     |       | ✓     |    |       |       |       |       |           |
-+-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Utf8 View         | ✓     |       | ✓     |    |   ✓   |  ✓    |       |       |           |
-+-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Large Utf8 View   | ✓     |       | ✓     |    |       |       |       |       |           |
+| Utf8 View         | ✓     |       | ✓     |    |   ✓   |  ✓    |       |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
@@ -91,9 +87,9 @@ Data Types
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Large List        | ✓     | ✓     | ✓     |    | \(4)  |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| List View         | ✓     |       | ✓     |    |   ✓   |       |       |       |           |
+| List View         | ✓     |       | ✓     |    |   ✓   |       |       |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Large List View   | ✓     |       | ✓     |    |       |       |       |       |           |
+| Large List View   | ✓     |       | ✓     |    |       |       |       |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Struct            | ✓     | ✓     | ✓     | ✓  |  ✓    |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
@@ -112,7 +108,7 @@ Data Types
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Extension         | ✓     | ✓     | ✓     |    |       | ✓     | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Run-End Encoded   | ✓     |       | ✓     |    |       |       |       |       |           |
+| Run-End Encoded   | ✓     |       | ✓     |    |       |       |       |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/header b/header
deleted file mode 100644
index 70665d1a26295..0000000000000
--- a/header
+++ /dev/null
@@ -1,16 +0,0 @@
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
diff --git a/js/package.json b/js/package.json
index e0dffc7f48590..dd366c431c699 100644
--- a/js/package.json
+++ b/js/package.json
@@ -58,7 +58,7 @@
     "@types/node": "^20.13.0",
     "command-line-args": "^6.0.1",
     "command-line-usage": "^7.0.1",
-    "flatbuffers": "^24.3.25",
+    "flatbuffers": "^25.1.24",
     "json-bignum": "^0.0.3",
     "tslib": "^2.6.2"
   },
@@ -78,8 +78,8 @@
     "benny": "3.7.1",
     "cross-env": "7.0.3",
     "del": "8.0.0",
-    "del-cli": "5.1.0",
-    "esbuild": "0.24.2",
+    "del-cli": "6.0.0",
+    "esbuild": "0.25.0",
     "esbuild-plugin-alias": "0.2.1",
     "eslint": "8.57.0",
     "eslint-plugin-jest": "28.9.0",
@@ -98,7 +98,7 @@
     "ix": "7.0.0",
     "jest": "29.7.0",
     "jest-silent-reporter": "0.6.0",
-    "memfs": "4.14.0",
+    "memfs": "4.17.0",
     "mkdirp": "3.0.1",
     "multistream": "4.1.0",
     "regenerator-runtime": "0.14.1",
@@ -106,7 +106,7 @@
     "rxjs": "7.8.1",
     "ts-jest": "29.1.4",
     "ts-node": "10.9.2",
-    "typedoc": "0.27.6",
+    "typedoc": "0.27.9",
     "typescript": "5.4.5",
     "vinyl-buffer": "1.0.1",
     "vinyl-named": "1.1.0",
diff --git a/js/yarn.lock b/js/yarn.lock
index 4be93037c98f5..28b2b24fad1ed 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -354,240 +354,240 @@
   resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f"
   integrity sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==
 
-"@esbuild/aix-ppc64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.24.2.tgz#38848d3e25afe842a7943643cbcd387cc6e13461"
-  integrity sha512-thpVCb/rhxE/BnMLQ7GReQLLN8q9qbHmI55F4489/ByVg2aQaQ6kbcLb6FHkocZzQhxc4gx0sCk0tJkKBFzDhA==
+"@esbuild/aix-ppc64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.25.0.tgz#499600c5e1757a524990d5d92601f0ac3ce87f64"
+  integrity sha512-O7vun9Sf8DFjH2UtqK8Ku3LkquL9SZL8OLY1T5NZkA34+wG3OQF7cl4Ql8vdNzM6fzBbYfLaiRLIOZ+2FOCgBQ==
 
 "@esbuild/android-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz#09d9b4357780da9ea3a7dfb833a1f1ff439b4052"
   integrity sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==
 
-"@esbuild/android-arm64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.24.2.tgz#f592957ae8b5643129fa889c79e69cd8669bb894"
-  integrity sha512-cNLgeqCqV8WxfcTIOeL4OAtSmL8JjcN6m09XIgro1Wi7cF4t/THaWEa7eL5CMoMBdjoHOTh/vwTO/o2TRXIyzg==
+"@esbuild/android-arm64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.25.0.tgz#b9b8231561a1dfb94eb31f4ee056b92a985c324f"
+  integrity sha512-grvv8WncGjDSyUBjN9yHXNt+cq0snxXbDxy5pJtzMKGmmpPxeAmAhWxXI+01lU5rwZomDgD3kJwulEnhTRUd6g==
 
 "@esbuild/android-arm@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.21.5.tgz#9b04384fb771926dfa6d7ad04324ecb2ab9b2e28"
   integrity sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==
 
-"@esbuild/android-arm@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.24.2.tgz#72d8a2063aa630308af486a7e5cbcd1e134335b3"
-  integrity sha512-tmwl4hJkCfNHwFB3nBa8z1Uy3ypZpxqxfTQOcHX+xRByyYgunVbZ9MzUUfb0RxaHIMnbHagwAxuTL+tnNM+1/Q==
+"@esbuild/android-arm@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.25.0.tgz#ca6e7888942505f13e88ac9f5f7d2a72f9facd2b"
+  integrity sha512-PTyWCYYiU0+1eJKmw21lWtC+d08JDZPQ5g+kFyxP0V+es6VPPSUhM6zk8iImp2jbV6GwjX4pap0JFbUQN65X1g==
 
 "@esbuild/android-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.21.5.tgz#29918ec2db754cedcb6c1b04de8cd6547af6461e"
   integrity sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==
 
-"@esbuild/android-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.24.2.tgz#9a7713504d5f04792f33be9c197a882b2d88febb"
-  integrity sha512-B6Q0YQDqMx9D7rvIcsXfmJfvUYLoP722bgfBlO5cGvNVb5V/+Y7nhBE3mHV9OpxBf4eAS2S68KZztiPaWq4XYw==
+"@esbuild/android-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.25.0.tgz#e765ea753bac442dfc9cb53652ce8bd39d33e163"
+  integrity sha512-m/ix7SfKG5buCnxasr52+LI78SQ+wgdENi9CqyCXwjVR2X4Jkz+BpC3le3AoBPYTC9NHklwngVXvbJ9/Akhrfg==
 
 "@esbuild/darwin-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz#e495b539660e51690f3928af50a76fb0a6ccff2a"
   integrity sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==
 
-"@esbuild/darwin-arm64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.24.2.tgz#02ae04ad8ebffd6e2ea096181b3366816b2b5936"
-  integrity sha512-kj3AnYWc+CekmZnS5IPu9D+HWtUI49hbnyqk0FLEJDbzCIQt7hg7ucF1SQAilhtYpIujfaHr6O0UHlzzSPdOeA==
+"@esbuild/darwin-arm64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.25.0.tgz#fa394164b0d89d4fdc3a8a21989af70ef579fa2c"
+  integrity sha512-mVwdUb5SRkPayVadIOI78K7aAnPamoeFR2bT5nszFUZ9P8UpK4ratOdYbZZXYSqPKMHfS1wdHCJk1P1EZpRdvw==
 
 "@esbuild/darwin-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz#c13838fa57372839abdddc91d71542ceea2e1e22"
   integrity sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==
 
-"@esbuild/darwin-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.24.2.tgz#9ec312bc29c60e1b6cecadc82bd504d8adaa19e9"
-  integrity sha512-WeSrmwwHaPkNR5H3yYfowhZcbriGqooyu3zI/3GGpF8AyUdsrrP0X6KumITGA9WOyiJavnGZUwPGvxvwfWPHIA==
+"@esbuild/darwin-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.25.0.tgz#91979d98d30ba6e7d69b22c617cc82bdad60e47a"
+  integrity sha512-DgDaYsPWFTS4S3nWpFcMn/33ZZwAAeAFKNHNa1QN0rI4pUjgqf0f7ONmXf6d22tqTY+H9FNdgeaAa+YIFUn2Rg==
 
 "@esbuild/freebsd-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz#646b989aa20bf89fd071dd5dbfad69a3542e550e"
   integrity sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==
 
-"@esbuild/freebsd-arm64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.24.2.tgz#5e82f44cb4906d6aebf24497d6a068cfc152fa00"
-  integrity sha512-UN8HXjtJ0k/Mj6a9+5u6+2eZ2ERD7Edt1Q9IZiB5UZAIdPnVKDoG7mdTVGhHJIeEml60JteamR3qhsr1r8gXvg==
+"@esbuild/freebsd-arm64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.0.tgz#b97e97073310736b430a07b099d837084b85e9ce"
+  integrity sha512-VN4ocxy6dxefN1MepBx/iD1dH5K8qNtNe227I0mnTRjry8tj5MRk4zprLEdG8WPyAPb93/e4pSgi1SoHdgOa4w==
 
 "@esbuild/freebsd-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz#aa615cfc80af954d3458906e38ca22c18cf5c261"
   integrity sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==
 
-"@esbuild/freebsd-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.24.2.tgz#3fb1ce92f276168b75074b4e51aa0d8141ecce7f"
-  integrity sha512-TvW7wE/89PYW+IevEJXZ5sF6gJRDY/14hyIGFXdIucxCsbRmLUcjseQu1SyTko+2idmCw94TgyaEZi9HUSOe3Q==
+"@esbuild/freebsd-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.25.0.tgz#f3b694d0da61d9910ec7deff794d444cfbf3b6e7"
+  integrity sha512-mrSgt7lCh07FY+hDD1TxiTyIHyttn6vnjesnPoVDNmDfOmggTLXRv8Id5fNZey1gl/V2dyVK1VXXqVsQIiAk+A==
 
 "@esbuild/linux-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz#70ac6fa14f5cb7e1f7f887bcffb680ad09922b5b"
   integrity sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==
 
-"@esbuild/linux-arm64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.24.2.tgz#856b632d79eb80aec0864381efd29de8fd0b1f43"
-  integrity sha512-7HnAD6074BW43YvvUmE/35Id9/NB7BeX5EoNkK9obndmZBUk8xmJJeU7DwmUeN7tkysslb2eSl6CTrYz6oEMQg==
+"@esbuild/linux-arm64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.25.0.tgz#f921f699f162f332036d5657cad9036f7a993f73"
+  integrity sha512-9QAQjTWNDM/Vk2bgBl17yWuZxZNQIF0OUUuPZRKoDtqF2k4EtYbpyiG5/Dk7nqeK6kIJWPYldkOcBqjXjrUlmg==
 
 "@esbuild/linux-arm@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz#fc6fd11a8aca56c1f6f3894f2bea0479f8f626b9"
   integrity sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==
 
-"@esbuild/linux-arm@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.24.2.tgz#c846b4694dc5a75d1444f52257ccc5659021b736"
-  integrity sha512-n0WRM/gWIdU29J57hJyUdIsk0WarGd6To0s+Y+LwvlC55wt+GT/OgkwoXCXvIue1i1sSNWblHEig00GBWiJgfA==
+"@esbuild/linux-arm@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.25.0.tgz#cc49305b3c6da317c900688995a4050e6cc91ca3"
+  integrity sha512-vkB3IYj2IDo3g9xX7HqhPYxVkNQe8qTK55fraQyTzTX/fxaDtXiEnavv9geOsonh2Fd2RMB+i5cbhu2zMNWJwg==
 
 "@esbuild/linux-ia32@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz#3271f53b3f93e3d093d518d1649d6d68d346ede2"
   integrity sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==
 
-"@esbuild/linux-ia32@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.24.2.tgz#f8a16615a78826ccbb6566fab9a9606cfd4a37d5"
-  integrity sha512-sfv0tGPQhcZOgTKO3oBE9xpHuUqguHvSo4jl+wjnKwFpapx+vUDcawbwPNuBIAYdRAvIDBfZVvXprIj3HA+Ugw==
+"@esbuild/linux-ia32@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.25.0.tgz#3e0736fcfab16cff042dec806247e2c76e109e19"
+  integrity sha512-43ET5bHbphBegyeqLb7I1eYn2P/JYGNmzzdidq/w0T8E2SsYL1U6un2NFROFRg1JZLTzdCoRomg8Rvf9M6W6Gg==
 
 "@esbuild/linux-loong64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz#ed62e04238c57026aea831c5a130b73c0f9f26df"
   integrity sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==
 
-"@esbuild/linux-loong64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.24.2.tgz#1c451538c765bf14913512c76ed8a351e18b09fc"
-  integrity sha512-CN9AZr8kEndGooS35ntToZLTQLHEjtVB5n7dl8ZcTZMonJ7CCfStrYhrzF97eAecqVbVJ7APOEe18RPI4KLhwQ==
+"@esbuild/linux-loong64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.25.0.tgz#ea2bf730883cddb9dfb85124232b5a875b8020c7"
+  integrity sha512-fC95c/xyNFueMhClxJmeRIj2yrSMdDfmqJnyOY4ZqsALkDrrKJfIg5NTMSzVBr5YW1jf+l7/cndBfP3MSDpoHw==
 
 "@esbuild/linux-mips64el@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz#e79b8eb48bf3b106fadec1ac8240fb97b4e64cbe"
   integrity sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==
 
-"@esbuild/linux-mips64el@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.24.2.tgz#0846edeefbc3d8d50645c51869cc64401d9239cb"
-  integrity sha512-iMkk7qr/wl3exJATwkISxI7kTcmHKE+BlymIAbHO8xanq/TjHaaVThFF6ipWzPHryoFsesNQJPE/3wFJw4+huw==
+"@esbuild/linux-mips64el@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.25.0.tgz#4cababb14eede09248980a2d2d8b966464294ff1"
+  integrity sha512-nkAMFju7KDW73T1DdH7glcyIptm95a7Le8irTQNO/qtkoyypZAnjchQgooFUDQhNAy4iu08N79W4T4pMBwhPwQ==
 
 "@esbuild/linux-ppc64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz#5f2203860a143b9919d383ef7573521fb154c3e4"
   integrity sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==
 
-"@esbuild/linux-ppc64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.24.2.tgz#8e3fc54505671d193337a36dfd4c1a23b8a41412"
-  integrity sha512-shsVrgCZ57Vr2L8mm39kO5PPIb+843FStGt7sGGoqiiWYconSxwTiuswC1VJZLCjNiMLAMh34jg4VSEQb+iEbw==
+"@esbuild/linux-ppc64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.25.0.tgz#8860a4609914c065373a77242e985179658e1951"
+  integrity sha512-NhyOejdhRGS8Iwv+KKR2zTq2PpysF9XqY+Zk77vQHqNbo/PwZCzB5/h7VGuREZm1fixhs4Q/qWRSi5zmAiO4Fw==
 
 "@esbuild/linux-riscv64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz#07bcafd99322d5af62f618cb9e6a9b7f4bb825dc"
   integrity sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==
 
-"@esbuild/linux-riscv64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.24.2.tgz#6a1e92096d5e68f7bb10a0d64bb5b6d1daf9a694"
-  integrity sha512-4eSFWnU9Hhd68fW16GD0TINewo1L6dRrB+oLNNbYyMUAeOD2yCK5KXGK1GH4qD/kT+bTEXjsyTCiJGHPZ3eM9Q==
+"@esbuild/linux-riscv64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.25.0.tgz#baf26e20bb2d38cfb86ee282dff840c04f4ed987"
+  integrity sha512-5S/rbP5OY+GHLC5qXp1y/Mx//e92L1YDqkiBbO9TQOvuFXM+iDqUNG5XopAnXoRH3FjIUDkeGcY1cgNvnXp/kA==
 
 "@esbuild/linux-s390x@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz#b7ccf686751d6a3e44b8627ababc8be3ef62d8de"
   integrity sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==
 
-"@esbuild/linux-s390x@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.24.2.tgz#ab18e56e66f7a3c49cb97d337cd0a6fea28a8577"
-  integrity sha512-S0Bh0A53b0YHL2XEXC20bHLuGMOhFDO6GN4b3YjRLK//Ep3ql3erpNcPlEFed93hsQAjAQDNsvcK+hV90FubSw==
+"@esbuild/linux-s390x@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.25.0.tgz#8323afc0d6cb1b6dc6e9fd21efd9e1542c3640a4"
+  integrity sha512-XM2BFsEBz0Fw37V0zU4CXfcfuACMrppsMFKdYY2WuTS3yi8O1nFOhil/xhKTmE1nPmVyvQJjJivgDT+xh8pXJA==
 
 "@esbuild/linux-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz#6d8f0c768e070e64309af8004bb94e68ab2bb3b0"
   integrity sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==
 
-"@esbuild/linux-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.24.2.tgz#8140c9b40da634d380b0b29c837a0b4267aff38f"
-  integrity sha512-8Qi4nQcCTbLnK9WoMjdC9NiTG6/E38RNICU6sUNqK0QFxCYgoARqVqxdFmWkdonVsvGqWhmm7MO0jyTqLqwj0Q==
+"@esbuild/linux-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.25.0.tgz#08fcf60cb400ed2382e9f8e0f5590bac8810469a"
+  integrity sha512-9yl91rHw/cpwMCNytUDxwj2XjFpxML0y9HAOH9pNVQDpQrBxHy01Dx+vaMu0N1CKa/RzBD2hB4u//nfc+Sd3Cw==
 
-"@esbuild/netbsd-arm64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-arm64/-/netbsd-arm64-0.24.2.tgz#65f19161432bafb3981f5f20a7ff45abb2e708e6"
-  integrity sha512-wuLK/VztRRpMt9zyHSazyCVdCXlpHkKm34WUyinD2lzK07FAHTq0KQvZZlXikNWkDGoT6x3TD51jKQ7gMVpopw==
+"@esbuild/netbsd-arm64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.0.tgz#935c6c74e20f7224918fbe2e6c6fe865b6c6ea5b"
+  integrity sha512-RuG4PSMPFfrkH6UwCAqBzauBWTygTvb1nxWasEJooGSJ/NwRw7b2HOwyRTQIU97Hq37l3npXoZGYMy3b3xYvPw==
 
 "@esbuild/netbsd-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz#bbe430f60d378ecb88decb219c602667387a6047"
   integrity sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==
 
-"@esbuild/netbsd-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.24.2.tgz#7a3a97d77abfd11765a72f1c6f9b18f5396bcc40"
-  integrity sha512-VefFaQUc4FMmJuAxmIHgUmfNiLXY438XrL4GDNV1Y1H/RW3qow68xTwjZKfj/+Plp9NANmzbH5R40Meudu8mmw==
+"@esbuild/netbsd-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.25.0.tgz#414677cef66d16c5a4d210751eb2881bb9c1b62b"
+  integrity sha512-jl+qisSB5jk01N5f7sPCsBENCOlPiS/xptD5yxOx2oqQfyourJwIKLRA2yqWdifj3owQZCL2sn6o08dBzZGQzA==
 
-"@esbuild/openbsd-arm64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.24.2.tgz#58b00238dd8f123bfff68d3acc53a6ee369af89f"
-  integrity sha512-YQbi46SBct6iKnszhSvdluqDmxCJA+Pu280Av9WICNwQmMxV7nLRHZfjQzwbPs3jeWnuAhE9Jy0NrnJ12Oz+0A==
+"@esbuild/openbsd-arm64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.0.tgz#8fd55a4d08d25cdc572844f13c88d678c84d13f7"
+  integrity sha512-21sUNbq2r84YE+SJDfaQRvdgznTD8Xc0oc3p3iW/a1EVWeNj/SdUCbm5U0itZPQYRuRTW20fPMWMpcrciH2EJw==
 
 "@esbuild/openbsd-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz#99d1cf2937279560d2104821f5ccce220cb2af70"
   integrity sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==
 
-"@esbuild/openbsd-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.24.2.tgz#0ac843fda0feb85a93e288842936c21a00a8a205"
-  integrity sha512-+iDS6zpNM6EnJyWv0bMGLWSWeXGN/HTaF/LXHXHwejGsVi+ooqDfMCCTerNFxEkM3wYVcExkeGXNqshc9iMaOA==
+"@esbuild/openbsd-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.25.0.tgz#0c48ddb1494bbc2d6bcbaa1429a7f465fa1dedde"
+  integrity sha512-2gwwriSMPcCFRlPlKx3zLQhfN/2WjJ2NSlg5TKLQOJdV0mSxIcYNTMhk3H3ulL/cak+Xj0lY1Ym9ysDV1igceg==
 
 "@esbuild/sunos-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz#08741512c10d529566baba837b4fe052c8f3487b"
   integrity sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==
 
-"@esbuild/sunos-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.24.2.tgz#8b7aa895e07828d36c422a4404cc2ecf27fb15c6"
-  integrity sha512-hTdsW27jcktEvpwNHJU4ZwWFGkz2zRJUz8pvddmXPtXDzVKTTINmlmga3ZzwcuMpUvLw7JkLy9QLKyGpD2Yxig==
+"@esbuild/sunos-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.25.0.tgz#86ff9075d77962b60dd26203d7352f92684c8c92"
+  integrity sha512-bxI7ThgLzPrPz484/S9jLlvUAHYMzy6I0XiU1ZMeAEOBcS0VePBFxh1JjTQt3Xiat5b6Oh4x7UC7IwKQKIJRIg==
 
 "@esbuild/win32-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz#675b7385398411240735016144ab2e99a60fc75d"
   integrity sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==
 
-"@esbuild/win32-arm64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.24.2.tgz#c023afb647cabf0c3ed13f0eddfc4f1d61c66a85"
-  integrity sha512-LihEQ2BBKVFLOC9ZItT9iFprsE9tqjDjnbulhHoFxYQtQfai7qfluVODIYxt1PgdoyQkz23+01rzwNwYfutxUQ==
+"@esbuild/win32-arm64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.25.0.tgz#849c62327c3229467f5b5cd681bf50588442e96c"
+  integrity sha512-ZUAc2YK6JW89xTbXvftxdnYy3m4iHIkDtK3CLce8wg8M2L+YZhIvO1DKpxrd0Yr59AeNNkTiic9YLf6FTtXWMw==
 
 "@esbuild/win32-ia32@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz#1bfc3ce98aa6ca9a0969e4d2af72144c59c1193b"
   integrity sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==
 
-"@esbuild/win32-ia32@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.24.2.tgz#96c356132d2dda990098c8b8b951209c3cd743c2"
-  integrity sha512-q+iGUwfs8tncmFC9pcnD5IvRHAzmbwQ3GPS5/ceCyHdjXubwQWI12MKWSNSMYLJMq23/IUCvJMS76PDqXe1fxA==
+"@esbuild/win32-ia32@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.25.0.tgz#f62eb480cd7cca088cb65bb46a6db25b725dc079"
+  integrity sha512-eSNxISBu8XweVEWG31/JzjkIGbGIJN/TrRoiSVZwZ6pkC6VX4Im/WV2cz559/TXLcYbcrDN8JtKgd9DJVIo8GA==
 
 "@esbuild/win32-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz#acad351d582d157bb145535db2a6ff53dd514b5c"
   integrity sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==
 
-"@esbuild/win32-x64@0.24.2":
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.24.2.tgz#34aa0b52d0fbb1a654b596acfa595f0c7b77a77b"
-  integrity sha512-7VTgWzgMGvup6aSqDPLiW5zHaxYJGTO4OokMjIlrCtf+VpEL+cXKtCvg723iguPYI5oaUNdS+/V7OU2gvXVWEg==
+"@esbuild/win32-x64@0.25.0":
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.25.0.tgz#c8e119a30a7c8d60b9d2e22d2073722dde3b710b"
+  integrity sha512-ZENoHJBxA20C2zFzh6AI4fT6RraMzjYw4xKWemRTRmRVtN9c5DcH9r/f2ihEkMjOW5eGgrwCslG/+Y/3bL+DHQ==
 
 "@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0":
   version "4.4.0"
@@ -1409,11 +1409,6 @@
   resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-5.1.2.tgz#07508b45797cb81ec3f273011b054cd0755eddca"
   integrity sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA==
 
-"@types/minimist@^1.2.2":
-  version "1.2.5"
-  resolved "https://registry.yarnpkg.com/@types/minimist/-/minimist-1.2.5.tgz#ec10755e871497bcd83efe927e43ec46e8c0747e"
-  integrity sha512-hov8bUuiLiyFPGyFPE1lwWhmzYbirOXQNNo40+y3zow8aFVTeyn3VWL0VFFfdNddA8S4Vf0Tc062rzyNr7Paag==
-
 "@types/multistream@4.1.3":
   version "4.1.3"
   resolved "https://registry.yarnpkg.com/@types/multistream/-/multistream-4.1.3.tgz#972e3666502128dc273ef15c86b2e533e373ece4"
@@ -1792,14 +1787,6 @@ acorn@^8.0.4, acorn@^8.14.0, acorn@^8.4.1, acorn@^8.8.2, acorn@^8.9.0:
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.14.0.tgz#063e2c70cac5fb4f6467f0b11152e04c682795b0"
   integrity sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==
 
-aggregate-error@^4.0.0:
-  version "4.0.1"
-  resolved "https://registry.yarnpkg.com/aggregate-error/-/aggregate-error-4.0.1.tgz#25091fe1573b9e0be892aeda15c7c66a545f758e"
-  integrity sha512-0poP0T7el6Vq3rstR8Mn4V/IQrpBLO6POkUSrN7RhyY+GF/InCFShQzsQ39T25gkHhLgSLByyAz+Kjb+c2L98w==
-  dependencies:
-    clean-stack "^4.0.0"
-    indent-string "^5.0.0"
-
 ajv-keywords@^3.5.2:
   version "3.5.2"
   resolved "https://registry.yarnpkg.com/ajv-keywords/-/ajv-keywords-3.5.2.tgz#31f29da5ab6e00d1c2d329acf7b5929614d5014d"
@@ -2008,11 +1995,6 @@ array-unique@^0.3.2:
   resolved "https://registry.yarnpkg.com/array-unique/-/array-unique-0.3.2.tgz#a894b75d4bc4f6cd679ef3244a9fd8f46ae2d428"
   integrity sha512-SleRWjh9JUud2wH1hPs9rZBZ33H6T9HOiL0uwGnGx9FpE6wKGyfWugmbkEOIs6qWrZhg0LWeLziLrEwQJhs5mQ==
 
-arrify@^1.0.1:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/arrify/-/arrify-1.0.1.tgz#898508da2226f380df904728456849c1501a4b0d"
-  integrity sha512-3CYzex9M9FGQjCGMGyi6/31c8GJbgb0qGyrx5HWxPd0aCwh4cB2YjMb2Xf9UuoogrMrlO9cTqnB5rI5GHZTcUA==
-
 assign-symbols@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/assign-symbols/-/assign-symbols-1.0.0.tgz#59667f41fadd4f20ccbc2bb96b8d4f7f78ec0367"
@@ -2313,16 +2295,6 @@ callsites@^3.0.0:
   resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73"
   integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==
 
-camelcase-keys@^7.0.0:
-  version "7.0.2"
-  resolved "https://registry.yarnpkg.com/camelcase-keys/-/camelcase-keys-7.0.2.tgz#d048d8c69448745bb0de6fc4c1c52a30dfbe7252"
-  integrity sha512-Rjs1H+A9R+Ig+4E/9oyB66UC5Mj9Xq3N//vcLf2WzgdTi/3gUu3Z9KoqmlrEG4VuuLK8wJHofxzdQXz/knhiYg==
-  dependencies:
-    camelcase "^6.3.0"
-    map-obj "^4.1.0"
-    quick-lru "^5.1.1"
-    type-fest "^1.2.1"
-
 camelcase@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-3.0.0.tgz#32fc4b9fcdaf845fcdf7e73bb97cac2261f0ab0a"
@@ -2333,7 +2305,7 @@ camelcase@^5.3.1:
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-5.3.1.tgz#e3c9b31569e106811df242f715725a1f4c494320"
   integrity sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==
 
-camelcase@^6.2.0, camelcase@^6.3.0:
+camelcase@^6.2.0:
   version "6.3.0"
   resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.3.0.tgz#5685b95eb209ac9c0c177467778c9c84df58ba9a"
   integrity sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==
@@ -2433,13 +2405,6 @@ clean-regexp@^1.0.0:
   dependencies:
     escape-string-regexp "^1.0.5"
 
-clean-stack@^4.0.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/clean-stack/-/clean-stack-4.2.0.tgz#c464e4cde4ac789f4e0735c5d75beb49d7b30b31"
-  integrity sha512-LYv6XPxoyODi36Dp976riBtSY27VmFo+MKqEU9QCCWyTrdEPDog+RWA7xQWHi6Vbp61j5c4cdzzX1NidnwtUWg==
-  dependencies:
-    escape-string-regexp "5.0.0"
-
 cli-cursor@^3.1.0:
   version "3.1.0"
   resolved "https://registry.yarnpkg.com/cli-cursor/-/cli-cursor-3.1.0.tgz#264305a7ae490d1d03bf0c9ba7c925d1753af307"
@@ -2731,24 +2696,11 @@ debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4:
   dependencies:
     ms "2.1.2"
 
-decamelize-keys@^1.1.0:
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/decamelize-keys/-/decamelize-keys-1.1.1.tgz#04a2d523b2f18d80d0158a43b895d56dff8d19d8"
-  integrity sha512-WiPxgEirIV0/eIOMcnFBA3/IJZAZqKnwAwWyvvdi4lsr1WCN22nhdf/3db3DoZcUjTV2SqfzIwNyp6y2xs3nmg==
-  dependencies:
-    decamelize "^1.1.0"
-    map-obj "^1.0.0"
-
-decamelize@^1.1.0, decamelize@^1.1.1:
+decamelize@^1.1.1:
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
   integrity sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==
 
-decamelize@^5.0.0:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-5.0.1.tgz#db11a92e58c741ef339fb0a2868d8a06a9a7b1e9"
-  integrity sha512-VfxadyCECXgQlkoEAjeghAr5gY3Hf+IKjKb+X8tGVDtveCjN+USwprd2q3QXBR9T1+x2DG0XZF5/w+7HAtSaXA==
-
 decode-uri-component@^0.2.0:
   version "0.2.2"
   resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9"
@@ -2821,15 +2773,15 @@ define-property@^2.0.2:
     is-descriptor "^1.0.2"
     isobject "^3.0.1"
 
-del-cli@5.1.0:
-  version "5.1.0"
-  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-5.1.0.tgz#740eca1c7a9eb13043e68d8a361cf0ff9a18d5c8"
-  integrity sha512-xwMeh2acluWeccsfzE7VLsG3yTr7nWikbfw+xhMnpRrF15pGSkw+3/vJZWlGoE4I86UiLRNHicmKt4tkIX9Jtg==
+del-cli@6.0.0:
+  version "6.0.0"
+  resolved "https://registry.yarnpkg.com/del-cli/-/del-cli-6.0.0.tgz#7822d0ffd5b73449a506a586d839711485bfb119"
+  integrity sha512-9nitGV2W6KLFyya4qYt4+9AKQFL+c0Ehj5K7V7IwlxTc6RMCfQUGY9E9pLG6e8TQjtwXpuiWIGGZb3mfVxyZkw==
   dependencies:
-    del "^7.1.0"
-    meow "^10.1.3"
+    del "^8.0.0"
+    meow "^13.2.0"
 
-del@8.0.0:
+del@8.0.0, del@^8.0.0:
   version "8.0.0"
   resolved "https://registry.yarnpkg.com/del/-/del-8.0.0.tgz#f333a5673cfeb72e46084031714a7c30515e80aa"
   integrity sha512-R6ep6JJ+eOBZsBr9esiNN1gxFbZE4Q2cULkUSFumGYecAiS6qodDvcPx/sFuWHMNul7DWmrtoEOpYSm7o6tbSA==
@@ -2841,20 +2793,6 @@ del@8.0.0:
     p-map "^7.0.2"
     slash "^5.1.0"
 
-del@^7.1.0:
-  version "7.1.0"
-  resolved "https://registry.yarnpkg.com/del/-/del-7.1.0.tgz#0de0044d556b649ff05387f1fa7c885e155fd1b6"
-  integrity sha512-v2KyNk7efxhlyHpjEvfyxaAihKKK0nWCuf6ZtqZcFFpQRG0bJ12Qsr0RpvsICMjAAZ8DOVCxrlqpxISlMHC4Kg==
-  dependencies:
-    globby "^13.1.2"
-    graceful-fs "^4.2.10"
-    is-glob "^4.0.3"
-    is-path-cwd "^3.0.0"
-    is-path-inside "^4.0.0"
-    p-map "^5.5.0"
-    rimraf "^3.0.2"
-    slash "^4.0.0"
-
 detect-file@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/detect-file/-/detect-file-1.0.0.tgz#f0d66d03672a825cb1b73bdb3fe62310c8e552b7"
@@ -3035,36 +2973,36 @@ esbuild-plugin-alias@0.2.1:
   resolved "https://registry.yarnpkg.com/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb"
   integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ==
 
-esbuild@0.24.2:
-  version "0.24.2"
-  resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.24.2.tgz#b5b55bee7de017bff5fb8a4e3e44f2ebe2c3567d"
-  integrity sha512-+9egpBW8I3CD5XPe0n6BfT5fxLzxrlDzqydF3aviG+9ni1lDC/OvMHcxqEFV0+LANZG5R1bFMWfUrjVsdwxJvA==
+esbuild@0.25.0:
+  version "0.25.0"
+  resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.25.0.tgz#0de1787a77206c5a79eeb634a623d39b5006ce92"
+  integrity sha512-BXq5mqc8ltbaN34cDqWuYKyNhX8D/Z0J1xdtdQ8UcIIIyJyz+ZMKUt58tF3SrZ85jcfN/PZYhjR5uDQAYNVbuw==
   optionalDependencies:
-    "@esbuild/aix-ppc64" "0.24.2"
-    "@esbuild/android-arm" "0.24.2"
-    "@esbuild/android-arm64" "0.24.2"
-    "@esbuild/android-x64" "0.24.2"
-    "@esbuild/darwin-arm64" "0.24.2"
-    "@esbuild/darwin-x64" "0.24.2"
-    "@esbuild/freebsd-arm64" "0.24.2"
-    "@esbuild/freebsd-x64" "0.24.2"
-    "@esbuild/linux-arm" "0.24.2"
-    "@esbuild/linux-arm64" "0.24.2"
-    "@esbuild/linux-ia32" "0.24.2"
-    "@esbuild/linux-loong64" "0.24.2"
-    "@esbuild/linux-mips64el" "0.24.2"
-    "@esbuild/linux-ppc64" "0.24.2"
-    "@esbuild/linux-riscv64" "0.24.2"
-    "@esbuild/linux-s390x" "0.24.2"
-    "@esbuild/linux-x64" "0.24.2"
-    "@esbuild/netbsd-arm64" "0.24.2"
-    "@esbuild/netbsd-x64" "0.24.2"
-    "@esbuild/openbsd-arm64" "0.24.2"
-    "@esbuild/openbsd-x64" "0.24.2"
-    "@esbuild/sunos-x64" "0.24.2"
-    "@esbuild/win32-arm64" "0.24.2"
-    "@esbuild/win32-ia32" "0.24.2"
-    "@esbuild/win32-x64" "0.24.2"
+    "@esbuild/aix-ppc64" "0.25.0"
+    "@esbuild/android-arm" "0.25.0"
+    "@esbuild/android-arm64" "0.25.0"
+    "@esbuild/android-x64" "0.25.0"
+    "@esbuild/darwin-arm64" "0.25.0"
+    "@esbuild/darwin-x64" "0.25.0"
+    "@esbuild/freebsd-arm64" "0.25.0"
+    "@esbuild/freebsd-x64" "0.25.0"
+    "@esbuild/linux-arm" "0.25.0"
+    "@esbuild/linux-arm64" "0.25.0"
+    "@esbuild/linux-ia32" "0.25.0"
+    "@esbuild/linux-loong64" "0.25.0"
+    "@esbuild/linux-mips64el" "0.25.0"
+    "@esbuild/linux-ppc64" "0.25.0"
+    "@esbuild/linux-riscv64" "0.25.0"
+    "@esbuild/linux-s390x" "0.25.0"
+    "@esbuild/linux-x64" "0.25.0"
+    "@esbuild/netbsd-arm64" "0.25.0"
+    "@esbuild/netbsd-x64" "0.25.0"
+    "@esbuild/openbsd-arm64" "0.25.0"
+    "@esbuild/openbsd-x64" "0.25.0"
+    "@esbuild/sunos-x64" "0.25.0"
+    "@esbuild/win32-arm64" "0.25.0"
+    "@esbuild/win32-ia32" "0.25.0"
+    "@esbuild/win32-x64" "0.25.0"
 
 esbuild@^0.21.5:
   version "0.21.5"
@@ -3105,11 +3043,6 @@ escalade@^3.2.0:
   resolved "https://registry.yarnpkg.com/escalade/-/escalade-3.2.0.tgz#011a3f69856ba189dffa7dc8fcce99d2a87903e5"
   integrity sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==
 
-escape-string-regexp@5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz#4683126b500b61762f2dbebace1806e8be31b1c8"
-  integrity sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==
-
 escape-string-regexp@^1.0.3, escape-string-regexp@^1.0.5:
   version "1.0.5"
   resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
@@ -3414,7 +3347,7 @@ fast-fifo@^1.1.0:
   resolved "https://registry.yarnpkg.com/fast-fifo/-/fast-fifo-1.3.2.tgz#286e31de96eb96d38a97899815740ba2a4f3640c"
   integrity sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==
 
-fast-glob@^3.2.9, fast-glob@^3.3.0, fast-glob@^3.3.2:
+fast-glob@^3.2.9, fast-glob@^3.3.2:
   version "3.3.2"
   resolved "https://registry.yarnpkg.com/fast-glob/-/fast-glob-3.3.2.tgz#a904501e57cfdd2ffcded45e99a54fef55e46129"
   integrity sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==
@@ -3562,10 +3495,10 @@ flat-cache@^3.0.4:
     keyv "^4.5.3"
     rimraf "^3.0.2"
 
-flatbuffers@^24.3.25:
-  version "24.3.25"
-  resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-24.3.25.tgz#e2f92259ba8aa53acd0af7844afb7c7eb95e7089"
-  integrity sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==
+flatbuffers@^25.1.24:
+  version "25.1.24"
+  resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-25.1.24.tgz#e182b2733c724157db59267e4a8c971021741f32"
+  integrity sha512-Ni+KCqYquU30UEgGkrrwpbYtUcUmNuLFcQ5Xdy9DK7WUaji+AAov+Bf12FEYmu0eI15y31oD38utnBexe0cAYA==
 
 flatted@^3.2.9:
   version "3.3.1"
@@ -3816,17 +3749,6 @@ globby@^11.1.0:
     merge2 "^1.4.1"
     slash "^3.0.0"
 
-globby@^13.1.2:
-  version "13.2.2"
-  resolved "https://registry.yarnpkg.com/globby/-/globby-13.2.2.tgz#63b90b1bf68619c2135475cbd4e71e66aa090592"
-  integrity sha512-Y1zNGV+pzQdh7H39l9zgB4PJqjRNqydvdYCDG4HFXM4XuvSaQQlEc91IU1yALL8gUTDomgBAfz3XJdmUS+oo0w==
-  dependencies:
-    dir-glob "^3.0.1"
-    fast-glob "^3.3.0"
-    ignore "^5.2.4"
-    merge2 "^1.4.1"
-    slash "^4.0.0"
-
 globby@^14.0.2:
   version "14.0.2"
   resolved "https://registry.yarnpkg.com/globby/-/globby-14.0.2.tgz#06554a54ccfe9264e5a9ff8eded46aa1e306482f"
@@ -3888,7 +3810,7 @@ gopd@^1.0.1:
   dependencies:
     get-intrinsic "^1.1.3"
 
-graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.10, graceful-fs@^4.2.11, graceful-fs@^4.2.4, graceful-fs@^4.2.9:
+graceful-fs@^4.0.0, graceful-fs@^4.1.11, graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.11, graceful-fs@^4.2.4, graceful-fs@^4.2.9:
   version "4.2.11"
   resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.11.tgz#4183e4e8bf08bb6e05bbb2f7d2e0c8f712ca40e3"
   integrity sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==
@@ -4030,11 +3952,6 @@ gzip-size@^6.0.0:
   dependencies:
     duplexer "^0.1.2"
 
-hard-rejection@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/hard-rejection/-/hard-rejection-2.1.0.tgz#1c6eda5c1685c63942766d79bb40ae773cecd883"
-  integrity sha512-VIZB+ibDhx7ObhAe7OVtoEbuP4h/MuOTHJ+J8h/eBXotJYl0fBgR72xDFCKgIh22OJZIOVNxBMWuhAr10r8HdA==
-
 has-flag@^3.0.0:
   version "3.0.0"
   resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-3.0.0.tgz#b5d454dc2199ae225699f3467e5a07f3b955bafd"
@@ -4112,13 +4029,6 @@ hosted-git-info@^2.1.4:
   resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-2.8.9.tgz#dffc0bf9a21c02209090f2aa69429e1414daf3f9"
   integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==
 
-hosted-git-info@^4.0.1:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/hosted-git-info/-/hosted-git-info-4.1.0.tgz#827b82867e9ff1c8d0c4d9d53880397d2c86d224"
-  integrity sha512-kyCuEOWjJqZuDbRHzL8V93NzQhwIB71oFWSyzVo+KPZI+pnQPPxucdkrOZvkLRnrf5URsQM+IJ09Dw29cRALIA==
-  dependencies:
-    lru-cache "^6.0.0"
-
 html-escaper@^2.0.0, html-escaper@^2.0.2:
   version "2.0.2"
   resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
@@ -4165,11 +4075,6 @@ indent-string@^4.0.0:
   resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251"
   integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==
 
-indent-string@^5.0.0:
-  version "5.0.0"
-  resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-5.0.0.tgz#4fd2980fccaf8622d14c64d694f4cf33c81951a5"
-  integrity sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==
-
 inflight@^1.0.4:
   version "1.0.6"
   resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
@@ -4244,7 +4149,7 @@ is-ci@^2.0.0:
   dependencies:
     ci-info "^2.0.0"
 
-is-core-module@^2.13.0, is-core-module@^2.5.0:
+is-core-module@^2.13.0:
   version "2.13.1"
   resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.13.1.tgz#ad0d7532c6fea9da1ebdc82742d74525c6273384"
   integrity sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw==
@@ -4364,11 +4269,6 @@ is-path-inside@^4.0.0:
   resolved "https://registry.yarnpkg.com/is-path-inside/-/is-path-inside-4.0.0.tgz#805aeb62c47c1b12fc3fd13bfb3ed1e7430071db"
   integrity sha512-lJJV/5dYS+RcL8uQdBDW9c9uWFLLBNRyFhnAKXw5tVqLlKZ4RMGZKv+YQ/IA3OhD+RpbJa1LLFM1FQPGyIXvOA==
 
-is-plain-obj@^1.1.0:
-  version "1.1.0"
-  resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-1.1.0.tgz#71a50c8429dfca773c92a390a4a03b39fcd51d3e"
-  integrity sha512-yvkRyxmFKEOQ4pNXCmJG5AEQNlXJS5LaONXo5/cLdTZdWvsZ1ioJEonLGAosKlMWE8lwUy/bJzMjcw8az73+Fg==
-
 is-plain-object@^2.0.1, is-plain-object@^2.0.3, is-plain-object@^2.0.4:
   version "2.0.4"
   resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677"
@@ -5026,7 +4926,7 @@ kind-of@^5.0.2:
   resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-5.1.0.tgz#729c91e2d857b7a419a1f9aa65685c4c33f5845d"
   integrity sha512-NGEErnH6F2vUuXDh+OlbcKW7/wOcfdRHaZ7VWtqCztfHri/++YKmP51OdWeGPuqCOba6kk2OTe5d02VmTB80Pw==
 
-kind-of@^6.0.2, kind-of@^6.0.3:
+kind-of@^6.0.2:
   version "6.0.3"
   resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd"
   integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==
@@ -5196,13 +5096,6 @@ lru-cache@^5.1.1:
   dependencies:
     yallist "^3.0.2"
 
-lru-cache@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-6.0.0.tgz#6d6fe6570ebd96aaf90fcad1dafa3b2566db3a94"
-  integrity sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==
-  dependencies:
-    yallist "^4.0.0"
-
 lru-queue@^0.1.0:
   version "0.1.0"
   resolved "https://registry.yarnpkg.com/lru-queue/-/lru-queue-0.1.0.tgz#2738bd9f0d3cf4f84490c5736c48699ac632cda3"
@@ -5246,16 +5139,6 @@ map-cache@^0.2.0, map-cache@^0.2.2:
   resolved "https://registry.yarnpkg.com/map-cache/-/map-cache-0.2.2.tgz#c32abd0bd6525d9b051645bb4f26ac5dc98a0dbf"
   integrity sha512-8y/eV9QQZCiyn1SprXSrCmqJN0yNRATe+PO8ztwqrvrbdRLA3eYJF0yaR0YayLWkMbsQSKWS9N2gPcGEc4UsZg==
 
-map-obj@^1.0.0:
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-1.0.1.tgz#d933ceb9205d82bdcf4886f6742bdc2b4dea146d"
-  integrity sha512-7N/q3lyZ+LVCp7PzuxrJr4KMbBE2hW7BT7YNia330OFxIf4d3r5zVpicP2650l7CPN6RM9zOJRl3NGpqSiw3Eg==
-
-map-obj@^4.1.0:
-  version "4.3.0"
-  resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-4.3.0.tgz#9304f906e93faae70880da102a9f1df0ea8bb05a"
-  integrity sha512-hdN1wVrZbb29eBGiGjJbeP8JbKjq1urkHJ/LIP/NY48MZ1QVXUsQBV1G1zvYFHn1XE06cwjBsOI2K3Ulnj1YXQ==
-
 map-visit@^1.0.0:
   version "1.0.0"
   resolved "https://registry.yarnpkg.com/map-visit/-/map-visit-1.0.0.tgz#ecdca8f13144e660f1b5bd41f12f3479d98dfb8f"
@@ -5290,10 +5173,10 @@ mdurl@^2.0.0:
   resolved "https://registry.yarnpkg.com/mdurl/-/mdurl-2.0.0.tgz#80676ec0433025dd3e17ee983d0fe8de5a2237e0"
   integrity sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==
 
-memfs@4.14.0:
-  version "4.14.0"
-  resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.14.0.tgz#48d5e85a03ea0b428280003212fbca3063531be3"
-  integrity sha512-JUeY0F/fQZgIod31Ja1eJgiSxLn7BfQlCnqhwXFBzFHEw63OdLK7VJUJ7bnzNsWgCyoUP5tEp1VRY8rDaYzqOA==
+memfs@4.17.0:
+  version "4.17.0"
+  resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.17.0.tgz#a3c4b5490b9b1e7df5d433adc163e08208ce7ca2"
+  integrity sha512-4eirfZ7thblFmqFjywlTmuWVSvccHAJbn1r8qQLzmTO11qcqpohOjmY2mFce6x7x7WtskzRqApPD0hv+Oa74jg==
   dependencies:
     "@jsonjoy.com/json-pack" "^1.0.3"
     "@jsonjoy.com/util" "^1.3.0"
@@ -5322,23 +5205,10 @@ memory-fs@^0.5.0:
     errno "^0.1.3"
     readable-stream "^2.0.1"
 
-meow@^10.1.3:
-  version "10.1.5"
-  resolved "https://registry.yarnpkg.com/meow/-/meow-10.1.5.tgz#be52a1d87b5f5698602b0f32875ee5940904aa7f"
-  integrity sha512-/d+PQ4GKmGvM9Bee/DPa8z3mXs/pkvJE2KEThngVNOqtmljC6K7NMPxtc2JeZYTmpWb9k/TmxjeL18ez3h7vCw==
-  dependencies:
-    "@types/minimist" "^1.2.2"
-    camelcase-keys "^7.0.0"
-    decamelize "^5.0.0"
-    decamelize-keys "^1.1.0"
-    hard-rejection "^2.1.0"
-    minimist-options "4.1.0"
-    normalize-package-data "^3.0.2"
-    read-pkg-up "^8.0.0"
-    redent "^4.0.0"
-    trim-newlines "^4.0.2"
-    type-fest "^1.2.2"
-    yargs-parser "^20.2.9"
+meow@^13.2.0:
+  version "13.2.0"
+  resolved "https://registry.yarnpkg.com/meow/-/meow-13.2.0.tgz#6b7d63f913f984063b3cc261b6e8800c4cd3474f"
+  integrity sha512-pxQJQzB6djGPXh08dacEloMFopsOqGVRKFPYvPOt9XDZ1HasbgDZA74CJGreSU4G3Ak7EFJGoiH2auq+yXISgA==
 
 merge-stream@^2.0.0:
   version "2.0.0"
@@ -5394,7 +5264,7 @@ mimic-fn@^2.1.0:
   resolved "https://registry.yarnpkg.com/mimic-fn/-/mimic-fn-2.1.0.tgz#7ed2c2ccccaf84d3ffcb7a69b57711fc2083401b"
   integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==
 
-min-indent@^1.0.0, min-indent@^1.0.1:
+min-indent@^1.0.0:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869"
   integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==
@@ -5413,15 +5283,6 @@ minimatch@^9.0.4, minimatch@^9.0.5:
   dependencies:
     brace-expansion "^2.0.1"
 
-minimist-options@4.1.0:
-  version "4.1.0"
-  resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-4.1.0.tgz#c0655713c53a8a2ebd77ffa247d342c40f010619"
-  integrity sha512-Q4r8ghd80yhO/0j1O3B2BjweX3fiHg9cdOwjJd2J76Q135c+NDxGCqdYKQ1SKBuFfgWbAUzBfvYjPUEeNgqN1A==
-  dependencies:
-    arrify "^1.0.1"
-    is-plain-obj "^1.1.0"
-    kind-of "^6.0.3"
-
 minimist@1.x:
   version "1.2.8"
   resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c"
@@ -5540,16 +5401,6 @@ normalize-package-data@^2.3.2, normalize-package-data@^2.5.0:
     semver "2 || 3 || 4 || 5"
     validate-npm-package-license "^3.0.1"
 
-normalize-package-data@^3.0.2:
-  version "3.0.3"
-  resolved "https://registry.yarnpkg.com/normalize-package-data/-/normalize-package-data-3.0.3.tgz#dbcc3e2da59509a0983422884cd172eefdfa525e"
-  integrity sha512-p2W1sgqij3zMMyRC067Dg16bfzVH+w7hyegmpIvZ4JNjqtGOVAIvLmjBx3yP7YTe9vKJgkoNOPjwQGogDoMXFA==
-  dependencies:
-    hosted-git-info "^4.0.1"
-    is-core-module "^2.5.0"
-    semver "^7.3.4"
-    validate-npm-package-license "^3.0.1"
-
 normalize-path@^2.0.1, normalize-path@^2.1.1:
   version "2.1.1"
   resolved "https://registry.yarnpkg.com/normalize-path/-/normalize-path-2.1.1.tgz#1ab28b556e198363a8c1a6f7e6fa20137fe6aed9"
@@ -5723,13 +5574,6 @@ p-locate@^5.0.0:
   dependencies:
     p-limit "^3.0.2"
 
-p-map@^5.5.0:
-  version "5.5.0"
-  resolved "https://registry.yarnpkg.com/p-map/-/p-map-5.5.0.tgz#054ca8ca778dfa4cf3f8db6638ccb5b937266715"
-  integrity sha512-VFqfGDHlx87K66yZrNdI4YGtD70IRyd+zSvgks6mzHPRNkoKy+9EKP4SFC77/vTTQYmRmti7dvqC+m5jBrBAcg==
-  dependencies:
-    aggregate-error "^4.0.0"
-
 p-map@^7.0.2:
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/p-map/-/p-map-7.0.3.tgz#7ac210a2d36f81ec28b736134810f7ba4418cdb6"
@@ -6027,11 +5871,6 @@ queue-tick@^1.0.1:
   resolved "https://registry.yarnpkg.com/queue-tick/-/queue-tick-1.0.1.tgz#f6f07ac82c1fd60f82e098b417a80e52f1f4c142"
   integrity sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==
 
-quick-lru@^5.1.1:
-  version "5.1.1"
-  resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932"
-  integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==
-
 randombytes@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
@@ -6061,15 +5900,6 @@ read-pkg-up@^7.0.1:
     read-pkg "^5.2.0"
     type-fest "^0.8.1"
 
-read-pkg-up@^8.0.0:
-  version "8.0.0"
-  resolved "https://registry.yarnpkg.com/read-pkg-up/-/read-pkg-up-8.0.0.tgz#72f595b65e66110f43b052dd9af4de6b10534670"
-  integrity sha512-snVCqPczksT0HS2EC+SxUndvSzn6LRCwpfSvLrIfR5BKDQQZMaI6jPRC9dYvYFDRAuFEAnkwww8kBBNE/3VvzQ==
-  dependencies:
-    find-up "^5.0.0"
-    read-pkg "^6.0.0"
-    type-fest "^1.0.1"
-
 read-pkg@^1.0.0:
   version "1.1.0"
   resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-1.1.0.tgz#f5ffaa5ecd29cb31c0474bca7d756b6bb29e3f28"
@@ -6089,16 +5919,6 @@ read-pkg@^5.2.0:
     parse-json "^5.0.0"
     type-fest "^0.6.0"
 
-read-pkg@^6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/read-pkg/-/read-pkg-6.0.0.tgz#a67a7d6a1c2b0c3cd6aa2ea521f40c458a4a504c"
-  integrity sha512-X1Fu3dPuk/8ZLsMhEj5f4wFAF0DWoK7qhGJvgaijocXxBmSToKfbFtqbxMO7bVjNA1dmE5huAzjXj/ey86iw9Q==
-  dependencies:
-    "@types/normalize-package-data" "^2.4.0"
-    normalize-package-data "^3.0.2"
-    parse-json "^5.2.0"
-    type-fest "^1.0.1"
-
 "readable-stream@2 || 3", readable-stream@3, readable-stream@^3.6.0:
   version "3.6.2"
   resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.2.tgz#56a9b36ea965c00c5a93ef31eb111a0f11056967"
@@ -6137,14 +5957,6 @@ rechoir@^0.6.2:
   dependencies:
     resolve "^1.1.6"
 
-redent@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/redent/-/redent-4.0.0.tgz#0c0ba7caabb24257ab3bb7a4fd95dd1d5c5681f9"
-  integrity sha512-tYkDkVVtYkSVhuQ4zBgfvciymHaeuel+zFKXShfDnFP5SyVEP7qo70Rf1jTOTCx3vGNAbnEi/xFkcfQVMIBWag==
-  dependencies:
-    indent-string "^5.0.0"
-    strip-indent "^4.0.0"
-
 regenerator-runtime@0.14.1:
   version "0.14.1"
   resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f"
@@ -6405,7 +6217,7 @@ semver@^6.3.0, semver@^6.3.1:
   resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
   integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==
 
-semver@^7.3.4, semver@^7.5.3, semver@^7.5.4, semver@^7.6.0, semver@^7.6.1:
+semver@^7.5.3, semver@^7.5.4, semver@^7.6.0, semver@^7.6.1:
   version "7.6.2"
   resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.2.tgz#1e3b34759f896e8f14d6134732ce798aeb0c6e13"
   integrity sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w==
@@ -6485,11 +6297,6 @@ slash@^3.0.0:
   resolved "https://registry.yarnpkg.com/slash/-/slash-3.0.0.tgz#6539be870c165adbd5240220dbe361f1bc4d4634"
   integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==
 
-slash@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/slash/-/slash-4.0.0.tgz#2422372176c4c6c5addb5e2ada885af984b396a7"
-  integrity sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==
-
 slash@^5.1.0:
   version "5.1.0"
   resolved "https://registry.yarnpkg.com/slash/-/slash-5.1.0.tgz#be3adddcdf09ac38eebe8dcdc7b1a57a75b095ce"
@@ -6787,13 +6594,6 @@ strip-indent@^3.0.0:
   dependencies:
     min-indent "^1.0.0"
 
-strip-indent@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/strip-indent/-/strip-indent-4.0.0.tgz#b41379433dd06f5eae805e21d631e07ee670d853"
-  integrity sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA==
-  dependencies:
-    min-indent "^1.0.1"
-
 strip-json-comments@^3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
@@ -7014,11 +6814,6 @@ tree-dump@^1.0.1:
   resolved "https://registry.yarnpkg.com/tree-dump/-/tree-dump-1.0.2.tgz#c460d5921caeb197bde71d0e9a7b479848c5b8ac"
   integrity sha512-dpev9ABuLWdEubk+cIaI9cHwRNNDjkBBLXTwI4UCUFdQ5xXKqNXoK4FEciw/vxf+NQ7Cb7sGUyeUtORvHIdRXQ==
 
-trim-newlines@^4.0.2:
-  version "4.1.1"
-  resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.1.1.tgz#28c88deb50ed10c7ba6dc2474421904a00139125"
-  integrity sha512-jRKj0n0jXWo6kh62nA5TEh3+4igKDXLvzBJcPpiizP7oOolUrYIxmVBG9TOtHYFHoddUk6YvAkGeGoSVTXfQXQ==
-
 ts-api-utils@^1.3.0:
   version "1.3.0"
   resolved "https://registry.yarnpkg.com/ts-api-utils/-/ts-api-utils-1.3.0.tgz#4b490e27129f1e8e686b45cc4ab63714dc60eea1"
@@ -7094,11 +6889,6 @@ type-fest@^0.8.1:
   resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.8.1.tgz#09e249ebde851d3b1e48d27c105444667f17b83d"
   integrity sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==
 
-type-fest@^1.0.1, type-fest@^1.2.1, type-fest@^1.2.2:
-  version "1.4.0"
-  resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-1.4.0.tgz#e9fb813fe3bf1744ec359d55d1affefa76f14be1"
-  integrity sha512-yGSza74xk0UG8k+pLh5oeoYirvIiWo5t0/o3zHHAO2tRDiZcxWP7fywNlXhqb6/r6sWvwi+RsyQMWhVLe4BVuA==
-
 type@^2.7.2:
   version "2.7.2"
   resolved "https://registry.yarnpkg.com/type/-/type-2.7.2.tgz#2376a15a3a28b1efa0f5350dcf72d24df6ef98d0"
@@ -7109,10 +6899,10 @@ typedarray@^0.0.6:
   resolved "https://registry.yarnpkg.com/typedarray/-/typedarray-0.0.6.tgz#867ac74e3864187b1d3d47d996a78ec5c8830777"
   integrity sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA==
 
-typedoc@0.27.6:
-  version "0.27.6"
-  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.27.6.tgz#7e8d067bd5386b7908afcb12c9054a83e8bb326b"
-  integrity sha512-oBFRoh2Px6jFx366db0lLlihcalq/JzyCVp7Vaq1yphL/tbgx2e+bkpkCgJPunaPvPwoTOXSwasfklWHm7GfAw==
+typedoc@0.27.9:
+  version "0.27.9"
+  resolved "https://registry.yarnpkg.com/typedoc/-/typedoc-0.27.9.tgz#5e0a7bc32bfc7bd0b70a353f4f1d5cba3d667c46"
+  integrity sha512-/z585740YHURLl9DN2jCWe6OW7zKYm6VoQ93H0sxZ1cwHQEQrUn5BJrEnkWhfzUdyO+BLGjnKUZ9iz9hKloFDw==
   dependencies:
     "@gerrit0/mini-shiki" "^1.24.0"
     lunr "^2.3.9"
@@ -7565,11 +7355,6 @@ yallist@^3.0.2:
   resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd"
   integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==
 
-yallist@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
-  integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
-
 yaml@^2.6.1:
   version "2.7.0"
   resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.7.0.tgz#aef9bb617a64c937a9a748803786ad8d3ffe1e98"
@@ -7580,11 +7365,6 @@ yargs-parser@>=5.0.0-security.0, yargs-parser@^21.0.1, yargs-parser@^21.1.1:
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35"
   integrity sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==
 
-yargs-parser@^20.2.9:
-  version "20.2.9"
-  resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee"
-  integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==
-
 yargs-parser@^5.0.1:
   version "5.0.1"
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-5.0.1.tgz#7ede329c1d8cdbbe209bd25cdb990e9b1ebbb394"
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index cd6d8768d03ee..07de5a612b399 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-cmake_minimum_required(VERSION 3.20)
+cmake_minimum_required(VERSION 3.25)
 
 # Build the Arrow C++ libraries using ExternalProject_Add.
 function(build_arrow)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 0b0275f200d9d..ac497e844e74f 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -18,7 +18,7 @@
 # Includes code assembled from BSD/MIT/Apache-licensed code from some 3rd-party
 # projects, including Kudu, Impala, and libdynd. See python/LICENSE.txt
 
-cmake_minimum_required(VERSION 3.16)
+cmake_minimum_required(VERSION 3.25)
 project(pyarrow)
 
 # This is needed for 3.13 free-threading. CMake used to add Python
diff --git a/python/examples/minimal_build/Dockerfile.ubuntu b/python/examples/minimal_build/Dockerfile.ubuntu
index 07cd69c082461..5c6b113168478 100644
--- a/python/examples/minimal_build/Dockerfile.ubuntu
+++ b/python/examples/minimal_build/Dockerfile.ubuntu
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM ubuntu:jammy
+FROM ubuntu:24.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 
@@ -33,7 +33,6 @@ RUN apt-get update -y -q && \
       python3-pip \
       python3-venv \
       tzdata \
+      tzdata-legacy \
       && \
       apt-get clean && rm -rf /var/lib/apt/lists*
-
-RUN pip3 install --no-cache-dir -U pip setuptools
diff --git a/python/examples/minimal_build/README.md b/python/examples/minimal_build/README.md
index e9d595b603103..2ed7a653e7b48 100644
--- a/python/examples/minimal_build/README.md
+++ b/python/examples/minimal_build/README.md
@@ -51,7 +51,7 @@ docker compose run --rm minimal-fedora-conda
 docker compose run --rm minimal-fedora-venv
 ```
 
-## Ubuntu 20.04
+## Ubuntu 24.04
 
 First, build the Docker image using:
 ```
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index d00a731324c92..592f482141a45 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -58,14 +58,7 @@ def parse_git(root, **kwargs):
     except ImportError:
         __version__ = None
 
-# ARROW-8684: Disable GC while initializing Cython extension module,
-# to workaround Cython bug in https://github.com/cython/cython/issues/3603
-_gc_enabled = _gc.isenabled()
-_gc.disable()
 import pyarrow.lib as _lib
-if _gc_enabled:
-    _gc.enable()
-
 from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
                          MonthDayNano, VersionInfo, cpp_build_info,
                          cpp_version, cpp_version_info, runtime_info,
diff --git a/python/pyarrow/_acero.pyx b/python/pyarrow/_acero.pyx
index 9e8cbd65be224..d49945ed70009 100644
--- a/python/pyarrow/_acero.pyx
+++ b/python/pyarrow/_acero.pyx
@@ -30,7 +30,7 @@ from pyarrow.lib cimport (Table, pyarrow_unwrap_table, pyarrow_wrap_table,
 from pyarrow.lib import frombytes, tobytes
 from pyarrow._compute cimport (
     Expression, FunctionOptions, _ensure_field_ref, _true,
-    unwrap_null_placement, unwrap_sort_order
+    unwrap_null_placement, unwrap_sort_keys
 )
 
 
@@ -234,17 +234,10 @@ class AggregateNodeOptions(_AggregateNodeOptions):
 cdef class _OrderByNodeOptions(ExecNodeOptions):
 
     def _set_options(self, sort_keys, null_placement):
-        cdef:
-            vector[CSortKey] c_sort_keys
-
-        for name, order in sort_keys:
-            c_sort_keys.push_back(
-                CSortKey(_ensure_field_ref(name), unwrap_sort_order(order))
-            )
-
         self.wrapped.reset(
             new COrderByNodeOptions(
-                COrdering(c_sort_keys, unwrap_null_placement(null_placement))
+                COrdering(unwrap_sort_keys(sort_keys, allow_str=False),
+                          unwrap_null_placement(null_placement))
             )
         )
 
diff --git a/python/pyarrow/_compute.pxd b/python/pyarrow/_compute.pxd
index 29b37da3ac4ef..648c1e0e2e5b3 100644
--- a/python/pyarrow/_compute.pxd
+++ b/python/pyarrow/_compute.pxd
@@ -65,6 +65,8 @@ cdef CExpression _true
 
 cdef CFieldRef _ensure_field_ref(value) except *
 
+cdef vector[CSortKey] unwrap_sort_keys(sort_keys, allow_str=*) except *
+
 cdef CSortOrder unwrap_sort_order(order) except *
 
 cdef CNullPlacement unwrap_null_placement(null_placement) except *
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 658f6b6cac4b5..63370c938b9dd 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -74,6 +74,20 @@ def _forbid_instantiation(klass, subclasses_instead=True):
     raise TypeError(msg)
 
 
+cdef vector[CSortKey] unwrap_sort_keys(sort_keys, allow_str=True):
+    cdef vector[CSortKey] c_sort_keys
+    if allow_str and isinstance(sort_keys, str):
+        c_sort_keys.push_back(
+            CSortKey(_ensure_field_ref(""), unwrap_sort_order(sort_keys))
+        )
+    else:
+        for name, order in sort_keys:
+            c_sort_keys.push_back(
+                CSortKey(_ensure_field_ref(name), unwrap_sort_order(order))
+            )
+    return c_sort_keys
+
+
 cdef wrap_scalar_function(const shared_ptr[CFunction]& sp_func):
     """
     Wrap a C++ scalar Function in a ScalarFunction object.
@@ -1875,6 +1889,25 @@ class VarianceOptions(_VarianceOptions):
         self._set_options(ddof, skip_nulls, min_count)
 
 
+cdef class _SkewOptions(FunctionOptions):
+    def _set_options(self, skip_nulls, min_count):
+        self.wrapped.reset(new CSkewOptions(skip_nulls, min_count))
+
+
+class SkewOptions(_SkewOptions):
+    __doc__ = f"""
+    Options for the `skew` and `kurtosis` functions.
+
+    Parameters
+    ----------
+    {_skip_nulls_doc()}
+    {_min_count_doc(default=0)}
+    """
+
+    def __init__(self, *, skip_nulls=True, min_count=0):
+        self._set_options(skip_nulls, min_count)
+
+
 cdef class _SplitOptions(FunctionOptions):
     def _set_options(self, max_splits, reverse):
         self.wrapped.reset(new CSplitOptions(max_splits, reverse))
@@ -2093,13 +2126,9 @@ class ArraySortOptions(_ArraySortOptions):
 
 cdef class _SortOptions(FunctionOptions):
     def _set_options(self, sort_keys, null_placement):
-        cdef vector[CSortKey] c_sort_keys
-        for name, order in sort_keys:
-            c_sort_keys.push_back(
-                CSortKey(_ensure_field_ref(name), unwrap_sort_order(order))
-            )
         self.wrapped.reset(new CSortOptions(
-            c_sort_keys, unwrap_null_placement(null_placement)))
+            unwrap_sort_keys(sort_keys, allow_str=False),
+            unwrap_null_placement(null_placement)))
 
 
 class SortOptions(_SortOptions):
@@ -2125,12 +2154,7 @@ class SortOptions(_SortOptions):
 
 cdef class _SelectKOptions(FunctionOptions):
     def _set_options(self, k, sort_keys):
-        cdef vector[CSortKey] c_sort_keys
-        for name, order in sort_keys:
-            c_sort_keys.push_back(
-                CSortKey(_ensure_field_ref(name), unwrap_sort_order(order))
-            )
-        self.wrapped.reset(new CSelectKOptions(k, c_sort_keys))
+        self.wrapped.reset(new CSelectKOptions(k, unwrap_sort_keys(sort_keys, allow_str=False)))
 
 
 class SelectKOptions(_SelectKOptions):
@@ -2317,19 +2341,9 @@ cdef class _RankOptions(FunctionOptions):
     }
 
     def _set_options(self, sort_keys, null_placement, tiebreaker):
-        cdef vector[CSortKey] c_sort_keys
-        if isinstance(sort_keys, str):
-            c_sort_keys.push_back(
-                CSortKey(_ensure_field_ref(""), unwrap_sort_order(sort_keys))
-            )
-        else:
-            for name, order in sort_keys:
-                c_sort_keys.push_back(
-                    CSortKey(_ensure_field_ref(name), unwrap_sort_order(order))
-                )
         try:
             self.wrapped.reset(
-                new CRankOptions(c_sort_keys,
+                new CRankOptions(unwrap_sort_keys(sort_keys),
                                  unwrap_null_placement(null_placement),
                                  self._tiebreaker_map[tiebreaker])
             )
@@ -2370,6 +2384,81 @@ class RankOptions(_RankOptions):
         self._set_options(sort_keys, null_placement, tiebreaker)
 
 
+cdef class _RankQuantileOptions(FunctionOptions):
+
+    def _set_options(self, sort_keys, null_placement):
+        self.wrapped.reset(
+            new CRankQuantileOptions(unwrap_sort_keys(sort_keys),
+                                     unwrap_null_placement(null_placement))
+        )
+
+
+class RankQuantileOptions(_RankQuantileOptions):
+    """
+    Options for the `rank_quantile` function.
+
+    Parameters
+    ----------
+    sort_keys : sequence of (name, order) tuples or str, default "ascending"
+        Names of field/column keys to sort the input on,
+        along with the order each field/column is sorted in.
+        Accepted values for `order` are "ascending", "descending".
+        The field name can be a string column name or expression.
+        Alternatively, one can simply pass "ascending" or "descending" as a string
+        if the input is array-like.
+    null_placement : str, default "at_end"
+        Where nulls in input should be sorted.
+        Accepted values are "at_start", "at_end".
+    """
+
+    def __init__(self, sort_keys="ascending", *, null_placement="at_end"):
+        self._set_options(sort_keys, null_placement)
+
+
+cdef class _PivotWiderOptions(FunctionOptions):
+
+    def _set_options(self, key_names, unexpected_key_behavior):
+        cdef:
+            vector[c_string] c_key_names
+            PivotWiderUnexpectedKeyBehavior c_unexpected_key_behavior
+        if unexpected_key_behavior == "ignore":
+            c_unexpected_key_behavior = PivotWiderUnexpectedKeyBehavior_Ignore
+        elif unexpected_key_behavior == "raise":
+            c_unexpected_key_behavior = PivotWiderUnexpectedKeyBehavior_Raise
+        else:
+            raise ValueError(
+                f"Unsupported value for unexpected_key_behavior: "
+                f"expected 'ignore' or 'raise', got {unexpected_key_behavior!r}")
+
+        for k in key_names:
+            c_key_names.push_back(tobytes(k))
+
+        self.wrapped.reset(
+            new CPivotWiderOptions(move(c_key_names), c_unexpected_key_behavior)
+        )
+
+
+class PivotWiderOptions(_PivotWiderOptions):
+    """
+    Options for the `pivot_wider` function.
+
+    Parameters
+    ----------
+    key_names : sequence of str
+        The pivot key names expected in the pivot key column.
+        For each entry in `key_names`, a column with the same name is emitted
+        in the struct output.
+    unexpected_key_behavior : str, default "ignore"
+        The behavior when pivot keys not in `key_names` are encountered.
+        Accepted values are "ignore", "raise".
+        If "ignore", unexpected keys are silently ignored.
+        If "raise", unexpected keys raise a KeyError.
+    """
+
+    def __init__(self, key_names, *, unexpected_key_behavior="ignore"):
+        self._set_options(key_names, unexpected_key_behavior)
+
+
 cdef class Expression(_Weakrefable):
     """
     A logical expression to be evaluated against some input.
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index 508488c0c3b3c..e53c6d1847566 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -1295,7 +1295,7 @@ def open_csv(input_file, read_options=None, parse_options=None,
         Options for converting CSV data
         (see pyarrow.csv.ConvertOptions constructor for defaults)
     memory_pool : MemoryPool, optional
-        Pool to allocate Table memory from
+        Pool to allocate RecordBatch memory from
 
     Returns
     -------
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index fd50215cee9ae..ca66c2a7a0ae5 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -300,6 +300,7 @@ cdef class Dataset(_Weakrefable):
                 int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                 FragmentScanOptions fragment_scan_options=None,
                 bint use_threads=True,
+                bint cache_metadata=True,
                 MemoryPool memory_pool=None):
         """
         Build a scan operation against the dataset.
@@ -354,6 +355,9 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -417,6 +421,7 @@ cdef class Dataset(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         )
 
@@ -428,6 +433,7 @@ cdef class Dataset(_Weakrefable):
                    int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                    FragmentScanOptions fragment_scan_options=None,
                    bint use_threads=True,
+                   bint cache_metadata=True,
                    MemoryPool memory_pool=None):
         """
         Read the dataset as materialized record batches.
@@ -476,6 +482,9 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -492,6 +501,7 @@ cdef class Dataset(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).to_batches()
 
@@ -503,6 +513,7 @@ cdef class Dataset(_Weakrefable):
                  int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                  FragmentScanOptions fragment_scan_options=None,
                  bint use_threads=True,
+                 bint cache_metadata=True,
                  MemoryPool memory_pool=None):
         """
         Read the dataset to an Arrow table.
@@ -554,6 +565,9 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -570,6 +584,7 @@ cdef class Dataset(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).to_table()
 
@@ -582,6 +597,7 @@ cdef class Dataset(_Weakrefable):
              int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
              FragmentScanOptions fragment_scan_options=None,
              bint use_threads=True,
+             bint cache_metadata=True,
              MemoryPool memory_pool=None):
         """
         Select rows of data by index.
@@ -632,6 +648,9 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -648,6 +667,7 @@ cdef class Dataset(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).take(indices)
 
@@ -660,6 +680,7 @@ cdef class Dataset(_Weakrefable):
              int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
              FragmentScanOptions fragment_scan_options=None,
              bint use_threads=True,
+             bint cache_metadata=True,
              MemoryPool memory_pool=None):
         """
         Load the first N rows of the dataset.
@@ -710,6 +731,9 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -726,6 +750,7 @@ cdef class Dataset(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).head(num_rows)
 
@@ -736,6 +761,7 @@ cdef class Dataset(_Weakrefable):
                    int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                    FragmentScanOptions fragment_scan_options=None,
                    bint use_threads=True,
+                   bint cache_metadata=True,
                    MemoryPool memory_pool=None):
         """
         Count rows matching the scanner filter.
@@ -765,6 +791,9 @@ cdef class Dataset(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -780,6 +809,7 @@ cdef class Dataset(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).count_rows()
 
@@ -1458,6 +1488,7 @@ cdef class Fragment(_Weakrefable):
                 int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                 FragmentScanOptions fragment_scan_options=None,
                 bint use_threads=True,
+                bint cache_metadata=True,
                 MemoryPool memory_pool=None):
         """
         Build a scan operation against the fragment.
@@ -1514,6 +1545,9 @@ cdef class Fragment(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -1532,6 +1566,7 @@ cdef class Fragment(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         )
 
@@ -1544,6 +1579,7 @@ cdef class Fragment(_Weakrefable):
                    int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                    FragmentScanOptions fragment_scan_options=None,
                    bint use_threads=True,
+                   bint cache_metadata=True,
                    MemoryPool memory_pool=None):
         """
         Read the fragment as materialized record batches.
@@ -1594,6 +1630,9 @@ cdef class Fragment(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -1612,6 +1651,7 @@ cdef class Fragment(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).to_batches()
 
@@ -1624,6 +1664,7 @@ cdef class Fragment(_Weakrefable):
                  int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                  FragmentScanOptions fragment_scan_options=None,
                  bint use_threads=True,
+                 bint cache_metadata=True,
                  MemoryPool memory_pool=None):
         """
         Convert this Fragment into a Table.
@@ -1677,6 +1718,9 @@ cdef class Fragment(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -1694,6 +1738,7 @@ cdef class Fragment(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).to_table()
 
@@ -1706,6 +1751,7 @@ cdef class Fragment(_Weakrefable):
              int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
              FragmentScanOptions fragment_scan_options=None,
              bint use_threads=True,
+             bint cache_metadata=True,
              MemoryPool memory_pool=None):
         """
         Select rows of data by index.
@@ -1756,6 +1802,9 @@ cdef class Fragment(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -1772,6 +1821,7 @@ cdef class Fragment(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).take(indices)
 
@@ -1784,6 +1834,7 @@ cdef class Fragment(_Weakrefable):
              int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
              FragmentScanOptions fragment_scan_options=None,
              bint use_threads=True,
+             bint cache_metadata=True,
              MemoryPool memory_pool=None):
         """
         Load the first N rows of the fragment.
@@ -1834,6 +1885,9 @@ cdef class Fragment(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -1850,6 +1904,7 @@ cdef class Fragment(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).head(num_rows)
 
@@ -1860,6 +1915,7 @@ cdef class Fragment(_Weakrefable):
                    int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                    FragmentScanOptions fragment_scan_options=None,
                    bint use_threads=True,
+                   bint cache_metadata=True,
                    MemoryPool memory_pool=None):
         """
         Count rows matching the scanner filter.
@@ -1889,6 +1945,9 @@ cdef class Fragment(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -1904,6 +1963,7 @@ cdef class Fragment(_Weakrefable):
             fragment_readahead=fragment_readahead,
             fragment_scan_options=fragment_scan_options,
             use_threads=use_threads,
+            cache_metadata=cache_metadata,
             memory_pool=memory_pool
         ).count_rows()
 
@@ -2519,7 +2579,7 @@ cdef class Partitioning(_Weakrefable):
 
     def format(self, expr):
         """
-        Convert a filter expression into a tuple of (directory, filename) using 
+        Convert a filter expression into a tuple of (directory, filename) using
         the current partitioning scheme
 
         Parameters
@@ -3446,7 +3506,8 @@ cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr,
                             int batch_size=_DEFAULT_BATCH_SIZE,
                             int batch_readahead=_DEFAULT_BATCH_READAHEAD,
                             int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
-                            bint use_threads=True, MemoryPool memory_pool=None,
+                            bint use_threads=True, bint cache_metadata=True,
+                            MemoryPool memory_pool=None,
                             FragmentScanOptions fragment_scan_options=None)\
         except *:
     cdef:
@@ -3486,6 +3547,7 @@ cdef void _populate_builder(const shared_ptr[CScannerBuilder]& ptr,
     check_status(builder.BatchReadahead(batch_readahead))
     check_status(builder.FragmentReadahead(fragment_readahead))
     check_status(builder.UseThreads(use_threads))
+    check_status(builder.CacheMetadata(cache_metadata))
     check_status(builder.Pool(maybe_unbox_memory_pool(memory_pool)))
     if fragment_scan_options:
         check_status(
@@ -3534,6 +3596,7 @@ cdef class Scanner(_Weakrefable):
             fragment_readahead=py_scanoptions.get(
                 "fragment_readahead", _DEFAULT_FRAGMENT_READAHEAD),
             use_threads=py_scanoptions.get("use_threads", True),
+            cache_metadata=py_scanoptions.get("cache_metadata", True),
             memory_pool=py_scanoptions.get("memory_pool"),
             fragment_scan_options=py_scanoptions.get("fragment_scan_options"))
 
@@ -3547,7 +3610,8 @@ cdef class Scanner(_Weakrefable):
                      int batch_readahead=_DEFAULT_BATCH_READAHEAD,
                      int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                      FragmentScanOptions fragment_scan_options=None,
-                     bint use_threads=True, MemoryPool memory_pool=None):
+                     bint use_threads=True, bint cache_metadata=True,
+                     MemoryPool memory_pool=None):
         """
         Create Scanner from Dataset,
 
@@ -3597,6 +3661,9 @@ cdef class Scanner(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -3611,7 +3678,8 @@ cdef class Scanner(_Weakrefable):
             dict(columns=columns, filter=filter, batch_size=batch_size,
                  batch_readahead=batch_readahead,
                  fragment_readahead=fragment_readahead, use_threads=use_threads,
-                 memory_pool=memory_pool, fragment_scan_options=fragment_scan_options)
+                 cache_metadata=cache_metadata, memory_pool=memory_pool,
+                 fragment_scan_options=fragment_scan_options)
         )
         builder = make_shared[CScannerBuilder](dataset.unwrap(), options)
         scanner = GetResultValue(builder.get().Finish())
@@ -3624,7 +3692,8 @@ cdef class Scanner(_Weakrefable):
                       int batch_readahead=_DEFAULT_BATCH_READAHEAD,
                       int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                       FragmentScanOptions fragment_scan_options=None,
-                      bint use_threads=True, MemoryPool memory_pool=None):
+                      bint use_threads=True, bint cache_metadata=True,
+                      MemoryPool memory_pool=None):
         """
         Create Scanner from Fragment,
 
@@ -3676,6 +3745,9 @@ cdef class Scanner(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -3692,7 +3764,7 @@ cdef class Scanner(_Weakrefable):
         _populate_builder(builder, columns=columns, filter=filter,
                           batch_size=batch_size, batch_readahead=batch_readahead,
                           fragment_readahead=fragment_readahead,
-                          use_threads=use_threads,
+                          use_threads=use_threads, cache_metadata=cache_metadata,
                           memory_pool=memory_pool,
                           fragment_scan_options=fragment_scan_options)
 
@@ -3705,7 +3777,8 @@ cdef class Scanner(_Weakrefable):
                      int batch_readahead=_DEFAULT_BATCH_READAHEAD,
                      int fragment_readahead=_DEFAULT_FRAGMENT_READAHEAD,
                      FragmentScanOptions fragment_scan_options=None,
-                     bint use_threads=True, MemoryPool memory_pool=None):
+                     bint use_threads=True, bint cache_metadata=True,
+                     MemoryPool memory_pool=None):
         """
         Create a Scanner from an iterator of batches.
 
@@ -3765,6 +3838,9 @@ cdef class Scanner(_Weakrefable):
         use_threads : bool, default True
             If enabled, then maximum parallelism will be used determined by
             the number of available CPU cores.
+        cache_metadata : bool, default True
+            If enabled, metadata may be cached when scanning to speed up
+            repeated scans.
         memory_pool : MemoryPool, default None
             For memory allocations, if required. If not specified, uses the
             default pool.
@@ -3797,7 +3873,7 @@ cdef class Scanner(_Weakrefable):
         _populate_builder(builder, columns=columns, filter=filter,
                           batch_size=batch_size, batch_readahead=batch_readahead,
                           fragment_readahead=fragment_readahead, use_threads=use_threads,
-                          memory_pool=memory_pool,
+                          cache_metadata=cache_metadata, memory_pool=memory_pool,
                           fragment_scan_options=fragment_scan_options)
         scanner = GetResultValue(builder.get().Finish())
         return Scanner.wrap(scanner)
@@ -4077,13 +4153,15 @@ cdef class _ScanNodeOptions(ExecNodeOptions):
         cdef:
             shared_ptr[CScanOptions] c_scan_options
             bint require_sequenced_output=False
+            bint implicit_ordering=False
 
         c_scan_options = Scanner._make_scan_options(dataset, scan_options)
 
         require_sequenced_output=scan_options.get("require_sequenced_output", False)
+        implicit_ordering=scan_options.get("implicit_ordering", False)
 
         self.wrapped.reset(
-            new CScanNodeOptions(dataset.unwrap(), c_scan_options, require_sequenced_output)
+            new CScanNodeOptions(dataset.unwrap(), c_scan_options, require_sequenced_output, implicit_ordering)
         )
 
 
@@ -4101,17 +4179,19 @@ class ScanNodeOptions(_ScanNodeOptions):
     expression or projection to the scan node that you also supply
     to the filter or project node.
 
-    Yielded batches will be augmented with fragment/batch indices to
-    enable stable ordering for simple ExecPlans.
+    Yielded batches will be augmented with fragment/batch indices when
+    implicit_ordering=True to enable stable ordering for simple ExecPlans.
 
     Parameters
     ----------
     dataset : pyarrow.dataset.Dataset
         The table which acts as the data source.
     **kwargs : dict, optional
-        Scan options. See `Scanner.from_dataset` for possible arguments.        
+        Scan options. See `Scanner.from_dataset` for possible arguments.
     require_sequenced_output : bool, default False
-        Assert implicit ordering on data.
+        Batches are yielded sequentially, like single-threaded
+    implicit_ordering : bool, default False
+        Preserve implicit ordering of data.
     """
 
     def __init__(self, Dataset dataset, **kwargs):
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index 9a2341d6948e5..ed7740440db56 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -2460,8 +2460,8 @@ cdef dict convert_headers(const CCallHeaders& c_headers):
         CCallHeaders.const_iterator header_iter = c_headers.cbegin()
     headers = {}
     while header_iter != c_headers.cend():
-        header = c_string(deref(header_iter).first).decode("ascii")
-        value = c_string(deref(header_iter).second)
+        header = to_string(deref(header_iter).first).decode("ascii")
+        value = to_string(deref(header_iter).second)
         if not header.endswith("-bin"):
             # Text header values in gRPC (and HTTP/1, HTTP/2) are
             # required to be valid ASCII. Binary header values are
diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx
index d36dad67abbaa..c023baeec1c82 100644
--- a/python/pyarrow/_json.pyx
+++ b/python/pyarrow/_json.pyx
@@ -21,7 +21,9 @@
 
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
-from pyarrow.lib cimport (_Weakrefable, MemoryPool,
+
+from pyarrow.lib cimport (_Weakrefable, Schema,
+                          RecordBatchReader, MemoryPool,
                           maybe_unbox_memory_pool,
                           get_input_stream, pyarrow_wrap_table,
                           pyarrow_wrap_schema, pyarrow_unwrap_schema)
@@ -266,6 +268,38 @@ cdef _get_parse_options(ParseOptions parse_options, CJSONParseOptions* out):
         out[0] = parse_options.options
 
 
+cdef class JSONStreamingReader(RecordBatchReader):
+    """An object that reads record batches incrementally from a JSON file.
+
+    Should not be instantiated directly by user code.
+    """
+    cdef readonly:
+        Schema schema
+
+    def __init__(self):
+        raise TypeError(f"Do not call {self.__class__.__name__}'s "
+                        "constructor directly, "
+                        "use pyarrow.json.open_json() instead.")
+
+    cdef _open(self, shared_ptr[CInputStream] stream,
+               CJSONReadOptions c_read_options,
+               CJSONParseOptions c_parse_options,
+               MemoryPool memory_pool):
+        cdef:
+            shared_ptr[CSchema] c_schema
+            CIOContext io_context
+
+        io_context = CIOContext(maybe_unbox_memory_pool(memory_pool))
+
+        with nogil:
+            self.reader = <shared_ptr[CRecordBatchReader]> GetResultValue(
+                CJSONStreamingReader.Make(stream, move(c_read_options),
+                                          move(c_parse_options), io_context))
+            c_schema = self.reader.get().schema()
+
+        self.schema = pyarrow_wrap_schema(c_schema)
+
+
 def read_json(input_file, read_options=None, parse_options=None,
               MemoryPool memory_pool=None):
     """
@@ -308,3 +342,45 @@ def read_json(input_file, read_options=None, parse_options=None,
         table = GetResultValue(reader.get().Read())
 
     return pyarrow_wrap_table(table)
+
+
+def open_json(input_file, read_options=None, parse_options=None,
+              MemoryPool memory_pool=None):
+    """
+    Open a streaming reader of JSON data.
+
+    Reading using this function is always single-threaded.
+
+    Parameters
+    ----------
+    input_file : string, path or file-like object
+        The location of JSON data.  If a string or path, and if it ends
+        with a recognized compressed file extension (e.g. ".gz" or ".bz2"),
+        the data is automatically decompressed when reading.
+    read_options : pyarrow.json.ReadOptions, optional
+        Options for the JSON reader (see pyarrow.json.ReadOptions constructor
+        for defaults)
+    parse_options : pyarrow.json.ParseOptions, optional
+        Options for the JSON parser
+        (see pyarrow.json.ParseOptions constructor for defaults)
+    memory_pool : MemoryPool, optional
+        Pool to allocate RecordBatch memory from
+
+    Returns
+    -------
+    :class:`pyarrow.json.JSONStreamingReader`
+    """
+    cdef:
+        shared_ptr[CInputStream] stream
+        CJSONReadOptions c_read_options
+        CJSONParseOptions c_parse_options
+        JSONStreamingReader reader
+
+    _get_reader(input_file, &stream)
+    _get_read_options(read_options, &c_read_options)
+    _get_parse_options(parse_options, &c_parse_options)
+
+    reader = JSONStreamingReader.__new__(JSONStreamingReader)
+    reader._open(stream, move(c_read_options), move(c_parse_options),
+                 memory_pool)
+    return reader
diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx
index d0a9a6612328c..81bd421dcbce3 100644
--- a/python/pyarrow/_parquet_encryption.pyx
+++ b/python/pyarrow/_parquet_encryption.pyx
@@ -21,7 +21,7 @@
 from datetime import timedelta
 
 from cython.operator cimport dereference as deref
-from libcpp.memory cimport shared_ptr
+
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.lib cimport _Weakrefable
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index 706338bd8cdb8..86bf7cbf4d29d 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -56,10 +56,10 @@ class InMemoryDataset:
     ds = DatasetModuleStub
 
 
-def _dataset_to_decl(dataset, use_threads=True, require_sequenced_output=False):
+def _dataset_to_decl(dataset, use_threads=True, implicit_ordering=False):
     decl = Declaration("scan", ScanNodeOptions(
         dataset, use_threads=use_threads,
-        require_sequenced_output=require_sequenced_output))
+        implicit_ordering=implicit_ordering))
 
     # Get rid of special dataset columns
     # "__fragment_index", "__batch_index", "__last_in_fragment", "__filename"
@@ -316,7 +316,7 @@ def _perform_join_asof(left_operand, left_on, left_by,
         left_source = _dataset_to_decl(
             left_operand,
             use_threads=use_threads,
-            require_sequenced_output=True)
+            implicit_ordering=True)
     else:
         left_source = Declaration(
             "table_source", TableSourceNodeOptions(left_operand),
@@ -324,7 +324,7 @@ def _perform_join_asof(left_operand, left_on, left_by,
     if isinstance(right_operand, ds.Dataset):
         right_source = _dataset_to_decl(
             right_operand, use_threads=use_threads,
-            require_sequenced_output=True)
+            implicit_ordering=True)
     else:
         right_source = Declaration(
             "table_source", TableSourceNodeOptions(right_operand)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 2ef42051d9ad2..b738dc04b0c81 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -21,6 +21,9 @@ import os
 import warnings
 from cython import sizeof
 
+cdef extern from "<variant>" namespace "std":
+    c_bool holds_alternative[T](...)
+    T get[T](...)
 
 cdef _sequence_to_array(object sequence, object mask, object size,
                         DataType type, CMemoryPool* pool, c_bool from_pandas):
@@ -704,6 +707,101 @@ def _restore_array(data):
     return pyarrow_wrap_array(MakeArray(ad))
 
 
+cdef class ArrayStatistics(_Weakrefable):
+    """
+    The class for statistics of an array.
+    """
+
+    def __init__(self):
+        raise TypeError(f"Do not call {self.__class__.__name__}'s constructor "
+                        "directly")
+
+    cdef void init(self, const shared_ptr[CArrayStatistics]& sp_statistics):
+        self.sp_statistics = sp_statistics
+
+    def __repr__(self):
+        return (f"arrow.ArrayStatistics<null_count={self.null_count}, "
+                f"distinct_count={self.distinct_count}, min={self.min}, "
+                f"is_min_exact={self.is_min_exact}, max={self.max}, "
+                f"is_max_exact={self.is_max_exact}>")
+
+    @property
+    def null_count(self):
+        """
+        The number of nulls.
+        """
+        null_count = self.sp_statistics.get().null_count
+        # We'll be able to simplify this after
+        # https://github.com/cython/cython/issues/6692 is solved.
+        if null_count.has_value():
+            return null_count.value()
+        else:
+            return None
+
+    @property
+    def distinct_count(self):
+        """
+        The number of distinct values.
+        """
+        distinct_count = self.sp_statistics.get().distinct_count
+        # We'll be able to simplify this after
+        # https://github.com/cython/cython/issues/6692 is solved.
+        if distinct_count.has_value():
+            return distinct_count.value()
+        else:
+            return None
+
+    @property
+    def min(self):
+        """
+        The minimum value.
+        """
+        return self._get_value(self.sp_statistics.get().min)
+
+    @property
+    def is_min_exact(self):
+        """
+        Whether the minimum value is an exact value or not.
+        """
+        return self.sp_statistics.get().is_min_exact
+
+    @property
+    def max(self):
+        """
+        The maximum value.
+        """
+        return self._get_value(self.sp_statistics.get().max)
+
+    @property
+    def is_max_exact(self):
+        """
+        Whether the maximum value is an exact value or not.
+        """
+        return self.sp_statistics.get().is_max_exact
+
+    cdef _get_value(self, const optional[CArrayStatisticsValueType]& optional_value):
+        """
+        Get a raw value from
+        std::optional<arrow::ArrayStatistics::ValueType>> data.
+
+        arrow::ArrayStatistics::ValueType is
+        std::variant<bool, int64_t, uint64_t, double, std::string>.
+        """
+        if not optional_value.has_value():
+            return None
+        value = optional_value.value()
+        if holds_alternative[c_bool](value):
+            return get[c_bool](value)
+        elif holds_alternative[int64_t](value):
+            return get[int64_t](value)
+        elif holds_alternative[uint64_t](value):
+            return get[uint64_t](value)
+        elif holds_alternative[double](value):
+            return get[double](value)
+        else:
+            return get[c_string](value)
+
+
 cdef class _PandasConvertible(_Weakrefable):
 
     def to_pandas(
@@ -1651,16 +1749,30 @@ cdef class Array(_PandasConvertible):
             array = array.copy()
         return array
 
-    def to_pylist(self):
+    def to_pylist(self, *, maps_as_pydicts=None):
         """
         Convert to a list of native Python objects.
 
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
         Returns
         -------
         lst : list
         """
         self._assert_cpu()
-        return [x.as_py() for x in self]
+        return [x.as_py(maps_as_pydicts=maps_as_pydicts) for x in self]
 
     def tolist(self):
         """
@@ -2085,6 +2197,20 @@ cdef class Array(_PandasConvertible):
         if self.sp_array.get().device_type() != CDeviceAllocationType_kCPU:
             raise NotImplementedError("Implemented only for data on CPU device")
 
+    @property
+    def statistics(self):
+        """
+        Statistics of the array.
+        """
+        cdef ArrayStatistics stat
+        sp_stat = self.sp_array.get().statistics()
+        if sp_stat.get() == nullptr:
+            return None
+        else:
+            stat = ArrayStatistics.__new__(ArrayStatistics)
+            stat.init(sp_stat)
+            return stat
+
 
 cdef _array_like_to_pandas(obj, options, types_mapper):
     cdef:
@@ -2286,12 +2412,18 @@ cdef class MonthDayNanoIntervalArray(Array):
     Concrete class for Arrow arrays of interval[MonthDayNano] type.
     """
 
-    def to_pylist(self):
+    def to_pylist(self, *, maps_as_pydicts=None):
         """
         Convert to a list of native Python objects.
 
         pyarrow.MonthDayNano is used as the native representation.
 
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
+
         Returns
         -------
         lst : list
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index 426ecae31c039..8040cf9ff03f8 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -53,9 +53,11 @@
     PadOptions,
     PairwiseOptions,
     PartitionNthOptions,
+    PivotWiderOptions,
     QuantileOptions,
     RandomOptions,
     RankOptions,
+    RankQuantileOptions,
     ReplaceSliceOptions,
     ReplaceSubstringOptions,
     RoundBinaryOptions,
@@ -65,6 +67,7 @@
     ScalarAggregateOptions,
     SelectKOptions,
     SetLookupOptions,
+    SkewOptions,
     SliceOptions,
     SortOptions,
     SplitOptions,
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index 9297436c1cf8c..a99da416374ad 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -18,11 +18,14 @@
 # distutils: language = c++
 
 from libc.stdint cimport *
+
 from libcpp cimport bool as c_bool, nullptr
 from libcpp.functional cimport function
-from libcpp.memory cimport shared_ptr, unique_ptr, make_shared
+from libcpp.memory cimport (shared_ptr, unique_ptr, make_shared,
+                            static_pointer_cast, dynamic_pointer_cast)
+from libcpp.optional cimport nullopt, optional
 from libcpp.string cimport string as c_string
-from libcpp.utility cimport pair
+from libcpp.utility cimport move, pair
 from libcpp.vector cimport vector
 from libcpp.unordered_map cimport unordered_map
 from libcpp.unordered_set cimport unordered_set
@@ -32,54 +35,27 @@ from cpython.datetime cimport PyDateTime_DateTime
 cimport cpython
 
 
-cdef extern from * namespace "std" nogil:
-    cdef shared_ptr[T] static_pointer_cast[T, U](shared_ptr[U])
-
-
-cdef extern from "<optional>" namespace "std" nogil:
-    cdef cppclass optional[T]:
-        ctypedef T value_type
-        optional()
-        optional(nullopt_t)
-        optional(optional&) except +
-        optional(T&) except +
-        c_bool has_value()
-        T& value()
-        T& value_or[U](U& default_value)
-        void swap(optional&)
-        void reset()
-        T& emplace(...)
-        T& operator*()
-        # T* operator->() # Not Supported
-        optional& operator=(optional&)
-        optional& operator=[U](U&)
+cdef extern from "<string_view>" namespace "std" nogil:
+    # Needed until https://github.com/cython/cython/issues/6651 is fixed
+    cdef cppclass cpp_string_view "std::string_view":
+        string_view()
+        string_view(const char*)
+        string_view(c_string&)
+        size_t size()
+        bint empty()
+        const char* data()
 
 
-# vendored from the cymove project https://github.com/ozars/cymove
-cdef extern from * namespace "cymove" nogil:
-    """
-    #include <type_traits>
-    #include <utility>
-    namespace cymove {
-    template <typename T>
-    inline typename std::remove_reference<T>::type&& cymove(T& t) {
-        return std::move(t);
-    }
-    template <typename T>
-    inline typename std::remove_reference<T>::type&& cymove(T&& t) {
-        return std::move(t);
-    }
-    }  // namespace cymove
-    """
-    cdef T move" cymove::cymove"[T](T)
-
 cdef extern from * namespace "arrow::py" nogil:
     """
     #include <memory>
+    #include <string>
+    #include <string_view>
     #include <utility>
 
     namespace arrow {
     namespace py {
+
     template <typename T>
     std::shared_ptr<T> to_shared(std::unique_ptr<T>& t) {
         return std::move(t);
@@ -88,10 +64,17 @@ cdef extern from * namespace "arrow::py" nogil:
     std::shared_ptr<T> to_shared(std::unique_ptr<T>&& t) {
         return std::move(t);
     }
+
+    // Needed until https://github.com/cython/cython/issues/6651 is fixed
+    inline std::string to_string(std::string_view s) {
+        return std::string(s);
+    }
+
     }  // namespace py
     }  // namespace arrow
     """
     cdef shared_ptr[T] to_shared" arrow::py::to_shared"[T](unique_ptr[T])
+    cdef c_string to_string(cpp_string_view s)
 
 cdef extern from "arrow/python/platform.h":
     pass
@@ -173,12 +156,3 @@ cdef inline object PyObject_to_object(PyObject* o):
     cdef object result = <object> o
     cpython.Py_DECREF(result)
     return result
-
-
-cdef extern from "<string_view>" namespace "std" nogil:
-    cdef cppclass cpp_string_view "std::string_view":
-        string_view()
-        string_view(const char*)
-        size_t size()
-        bint empty()
-        const char* data()
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 021c1c782c6e5..c3ddaba88fddb 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -101,6 +101,16 @@ cdef extern from "arrow/util/future.h" namespace "arrow" nogil:
         CStatus status()
 
 
+cdef extern from "<variant>" namespace "std" nogil:
+    cdef cppclass CArrayStatisticsValueType" std::variant<bool, int64_t, uint64_t, double, std::string>":
+        CArrayStatisticsValueType()
+        CArrayStatisticsValueType(c_bool)
+        CArrayStatisticsValueType(int64_t)
+        CArrayStatisticsValueType(uint64_t)
+        CArrayStatisticsValueType(double)
+        CArrayStatisticsValueType(c_string)
+
+
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef enum Type" arrow::Type::type":
         _Type_NA" arrow::Type::NA"
@@ -188,6 +198,16 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     c_bool is_primitive(Type type)
     c_bool is_numeric(Type type)
 
+    cdef cppclass CArrayStatistics" arrow::ArrayStatistics":
+        optional[int64_t] null_count
+        optional[int64_t] distinct_count
+        optional[CArrayStatisticsValueType] min
+        c_bool is_min_exact
+        optional[CArrayStatisticsValueType] max
+        c_bool is_max_exact
+
+        c_bool Equals(const CArrayStatistics& statistics) const
+
     cdef cppclass CArrayData" arrow::ArrayData":
         shared_ptr[CDataType] type
         int64_t length
@@ -251,6 +271,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CDeviceAllocationType device_type()
         CResult[shared_ptr[CArray]] CopyTo(const shared_ptr[CMemoryManager]& to) const
 
+        const shared_ptr[CArrayStatistics]& statistics() const
+
     shared_ptr[CArray] MakeArray(const shared_ptr[CArrayData]& data)
     CResult[shared_ptr[CArray]] MakeArrayOfNull(
         const shared_ptr[CDataType]& type, int64_t length, CMemoryPool* pool)
@@ -1078,7 +1100,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CRecordBatch] batch
         # The struct in C++ does not actually have these two `const` qualifiers, but
         # adding `const` gets Cython to not complain
-        const shared_ptr[const CKeyValueMetadata] custom_metadata
+        shared_ptr[const CKeyValueMetadata] custom_metadata
 
     cdef cppclass CTable" arrow::Table":
         CTable(const shared_ptr[CSchema]& schema,
@@ -2176,6 +2198,13 @@ cdef extern from "arrow/json/reader.h" namespace "arrow::json" nogil:
 
         CResult[shared_ptr[CTable]] Read()
 
+    cdef cppclass CJSONStreamingReader" arrow::json::StreamingReader"(
+            CRecordBatchReader):
+        @staticmethod
+        CResult[shared_ptr[CJSONStreamingReader]] Make(
+            shared_ptr[CInputStream],
+            CJSONReadOptions, CJSONParseOptions, CIOContext)
+
 
 cdef extern from "arrow/util/thread_pool.h" namespace "arrow::internal" nogil:
 
@@ -2592,6 +2621,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         c_bool skip_nulls
         uint32_t min_count
 
+    cdef cppclass CSkewOptions \
+            "arrow::compute::SkewOptions"(CFunctionOptions):
+        CSkewOptions(c_bool skip_nulls, uint32_t min_count)
+        c_bool skip_nulls
+        uint32_t min_count
+
     cdef cppclass CScalarAggregateOptions \
             "arrow::compute::ScalarAggregateOptions"(CFunctionOptions):
         CScalarAggregateOptions(c_bool skip_nulls, uint32_t min_count)
@@ -2788,6 +2823,22 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
         CNullPlacement null_placement
         CRankOptionsTiebreaker tiebreaker
 
+    cdef cppclass CRankQuantileOptions \
+            "arrow::compute::RankQuantileOptions"(CFunctionOptions):
+        CRankQuantileOptions(vector[CSortKey] sort_keys, CNullPlacement)
+        vector[CSortKey] sort_keys
+        CNullPlacement null_placement
+
+    cdef enum PivotWiderUnexpectedKeyBehavior \
+            "arrow::compute::PivotWiderOptions::UnexpectedKeyBehavior":
+        PivotWiderUnexpectedKeyBehavior_Ignore "arrow::compute::PivotWiderOptions::kIgnore"
+        PivotWiderUnexpectedKeyBehavior_Raise "arrow::compute::PivotWiderOptions::kRaise"
+
+    cdef cppclass CPivotWiderOptions \
+            "arrow::compute::PivotWiderOptions"(CFunctionOptions):
+        CPivotWiderOptions(vector[c_string] key_names,
+                           PivotWiderUnexpectedKeyBehavior)
+
     cdef enum DatumType" arrow::Datum::type":
         DatumType_NONE" arrow::Datum::NONE"
         DatumType_SCALAR" arrow::Datum::SCALAR"
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index d2fbcd0ee4d3b..ab7355f8d394f 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -48,10 +48,11 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         shared_ptr[CSchema] dataset_schema
         shared_ptr[CSchema] projected_schema
         c_bool use_threads
+        c_bool cache_metadata
         CExpression filter
 
     cdef cppclass CScanNodeOptions "arrow::dataset::ScanNodeOptions"(CExecNodeOptions):
-        CScanNodeOptions(shared_ptr[CDataset] dataset, shared_ptr[CScanOptions] scan_options, bint require_sequenced_output)
+        CScanNodeOptions(shared_ptr[CDataset] dataset, shared_ptr[CScanOptions] scan_options, bint require_sequenced_output, bint implicit_ordering)
 
         shared_ptr[CScanOptions] scan_options
 
@@ -115,6 +116,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
         CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns)
         CStatus Filter(CExpression filter)
         CStatus UseThreads(c_bool use_threads)
+        CStatus CacheMetadata(c_bool cache_metadata)
         CStatus Pool(CMemoryPool* pool)
         CStatus BatchSize(int64_t batch_size)
         CStatus BatchReadahead(int32_t batch_readahead)
diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd
index eefad537dcec8..b1af6bcb4fa36 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -21,6 +21,8 @@ from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.includes.libarrow_python cimport CTimePoint
 
+from libcpp.map cimport multimap
+
 
 cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
     cdef char* CTracingServerMiddlewareName\
@@ -311,20 +313,8 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
     cdef cppclass CCallInfo" arrow::flight::CallInfo":
         CFlightMethod method
 
-    # This is really std::unordered_multimap, but Cython has no
-    # bindings for it, so treat it as an opaque class and bind the
-    # methods we need
-    cdef cppclass CCallHeaders" arrow::flight::CallHeaders":
-        cppclass const_iterator:
-            pair[c_string, c_string] operator*()
-            # For Cython < 3
-            const_iterator operator++()
-            # For Cython >= 3
-            const_iterator operator++(int)
-            bint operator==(const_iterator)
-            bint operator!=(const_iterator)
-        const_iterator cbegin()
-        const_iterator cend()
+    ctypedef multimap[cpp_string_view, cpp_string_view] CCallHeaders\
+        " arrow::flight::CallHeaders"
 
     cdef cppclass CAddCallHeaders" arrow::flight::AddCallHeaders":
         void AddHeader(const c_string& key, const c_string& value)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 89015dadba5ae..3ab2e49a15753 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -20,6 +20,8 @@
 
 from libc.stdlib cimport malloc, free
 
+from cpython.bytearray cimport PyByteArray_FromStringAndSize
+
 import codecs
 import pickle
 import re
@@ -43,9 +45,6 @@ cdef extern from "Python.h":
     PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"(
         char *v, Py_ssize_t len) except NULL
 
-    # Workaround https://github.com/cython/cython/issues/4707
-    bytearray PyByteArray_FromStringAndSize(char *string, Py_ssize_t len)
-
 
 def have_libhdfs():
     """
diff --git a/python/pyarrow/json.py b/python/pyarrow/json.py
index a864f5d998a44..24e604613500c 100644
--- a/python/pyarrow/json.py
+++ b/python/pyarrow/json.py
@@ -16,4 +16,4 @@
 # under the License.
 
 
-from pyarrow._json import ReadOptions, ParseOptions, read_json  # noqa
+from pyarrow._json import ReadOptions, ParseOptions, read_json, open_json  # noqa
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index bc9811b92b007..0b2dedad50929 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -18,51 +18,17 @@
 # cython: language_level = 3
 
 from cpython cimport PyObject
+from cpython.slice cimport PySlice_Check
+
 from libcpp cimport nullptr, bool as c_bool
 from libcpp.cast cimport dynamic_cast
-from libcpp.memory cimport dynamic_pointer_cast
+from libcpp.memory cimport static_pointer_cast, dynamic_pointer_cast
+from libcpp.utility cimport move
+
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
 from pyarrow.includes.libarrow_python cimport *
 
-# Will be available in Cython 3, not backported
-# ref: https://github.com/cython/cython/issues/3293#issuecomment-1223058101
-cdef extern from "<optional>" namespace "std" nogil:
-    cdef cppclass nullopt_t:
-        nullopt_t()
-
-    cdef nullopt_t nullopt
-
-    cdef cppclass optional[T]:
-        ctypedef T value_type
-        optional()
-        optional(nullopt_t)
-        optional(optional&) except +
-        optional(T&) except +
-        c_bool has_value()
-        T& value()
-        T& value_or[U](U& default_value)
-        void swap(optional&)
-        void reset()
-        T& emplace(...)
-        T& operator*()
-        # T* operator->() # Not Supported
-        optional& operator=(optional&)
-        optional& operator=[U](U&)
-        c_bool operator bool()
-        c_bool operator!()
-        c_bool operator==[U](optional&, U&)
-        c_bool operator!=[U](optional&, U&)
-        c_bool operator<[U](optional&, U&)
-        c_bool operator>[U](optional&, U&)
-        c_bool operator<=[U](optional&, U&)
-        c_bool operator>=[U](optional&, U&)
-
-    optional[T] make_optional[T](...) except +
-
-cdef extern from "Python.h":
-    int PySlice_Check(object)
-
 
 cdef int check_status(const CStatus& status) except -1 nogil
 cdef object convert_status(const CStatus& status)
@@ -295,6 +261,14 @@ cdef class Scalar(_Weakrefable):
     cdef inline shared_ptr[CScalar] unwrap(self) nogil
 
 
+cdef class ArrayStatistics(_Weakrefable):
+    cdef:
+        shared_ptr[CArrayStatistics] sp_statistics
+
+    cdef void init(self, const shared_ptr[CArrayStatistics]& sp_statistics) except *
+    cdef _get_value(self, const optional[CArrayStatisticsValueType]& optional_value)
+
+
 cdef class _PandasConvertible(_Weakrefable):
     pass
 
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index 5be6f03f86ed6..18de584bff835 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -38,7 +38,7 @@ cdef class _PandasAPIShim(object):
         object _array_like_types, _is_extension_array_dtype, _lock
         bint has_sparse
         bint _pd024
-        bint _is_v1, _is_ge_v21, _is_ge_v3, _is_ge_v3_strict
+        bint _is_v1, _is_ge_v21, _is_ge_v23, _is_ge_v3, _is_ge_v3_strict
 
     def __init__(self):
         self._lock = Lock()
@@ -79,6 +79,7 @@ cdef class _PandasAPIShim(object):
 
         self._is_v1 = self._loose_version < Version('2.0.0')
         self._is_ge_v21 = self._loose_version >= Version('2.1.0')
+        self._is_ge_v23 = self._loose_version >= Version('2.3.0.dev0')
         self._is_ge_v3 = self._loose_version >= Version('3.0.0.dev0')
         self._is_ge_v3_strict = self._loose_version >= Version('3.0.0')
 
@@ -171,6 +172,10 @@ cdef class _PandasAPIShim(object):
         self._check_import()
         return self._is_ge_v21
 
+    def is_ge_v23(self):
+        self._check_import()
+        return self._is_ge_v23
+
     def is_ge_v3(self):
         self._check_import()
         return self._is_ge_v3
@@ -183,7 +188,7 @@ cdef class _PandasAPIShim(object):
         if self.is_ge_v3_strict():
             return True
         try:
-            if self.pd.options.future.infer_string:
+            if self.is_ge_v23() and self.pd.options.future.infer_string:
                 return True
         except:
             pass
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index e9655914ad767..4164ad2106668 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -84,7 +84,7 @@ def get_logical_type(arrow_type):
             return 'list[{}]'.format(get_logical_type(arrow_type.value_type))
         elif isinstance(arrow_type, pa.lib.TimestampType):
             return 'datetimetz' if arrow_type.tz is not None else 'datetime'
-        elif isinstance(arrow_type, pa.lib.Decimal128Type):
+        elif pa.types.is_decimal(arrow_type):
             return 'decimal'
         return 'object'
 
@@ -1163,7 +1163,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
         if dtype == np.bytes_:
             level = level.map(encoder)
         # ARROW-13756: if index is timezone aware DataTimeIndex
-        if pandas_dtype == "datetimetz":
+        elif pandas_dtype == "datetimetz":
             tz = pa.lib.string_to_tzinfo(
                 column_indexes[0]['metadata']['timezone'])
             level = pd.to_datetime(level, utc=True).tz_convert(tz)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index e877b0965d139..04442c1f5d2b9 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -16,6 +16,7 @@
 # under the License.
 
 import collections
+import warnings
 from uuid import UUID
 
 
@@ -148,7 +149,24 @@ cdef class Scalar(_Weakrefable):
     def __reduce__(self):
         return scalar, (self.as_py(), self.type)
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
+        """
+        Return this value as a Python representation.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+        """
         raise NotImplementedError()
 
 
@@ -169,9 +187,15 @@ cdef class NullScalar(Scalar):
     def __init__(self):
         pass
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python None.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         return None
 
@@ -184,9 +208,15 @@ cdef class BooleanScalar(Scalar):
     Concrete class for boolean scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python bool.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CBooleanScalar* sp = <CBooleanScalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -197,9 +227,15 @@ cdef class UInt8Scalar(Scalar):
     Concrete class for uint8 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CUInt8Scalar* sp = <CUInt8Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -210,9 +246,15 @@ cdef class Int8Scalar(Scalar):
     Concrete class for int8 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CInt8Scalar* sp = <CInt8Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -223,9 +265,15 @@ cdef class UInt16Scalar(Scalar):
     Concrete class for uint16 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CUInt16Scalar* sp = <CUInt16Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -236,9 +284,15 @@ cdef class Int16Scalar(Scalar):
     Concrete class for int16 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CInt16Scalar* sp = <CInt16Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -249,9 +303,15 @@ cdef class UInt32Scalar(Scalar):
     Concrete class for uint32 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CUInt32Scalar* sp = <CUInt32Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -262,9 +322,15 @@ cdef class Int32Scalar(Scalar):
     Concrete class for int32 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CInt32Scalar* sp = <CInt32Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -275,9 +341,15 @@ cdef class UInt64Scalar(Scalar):
     Concrete class for uint64 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CUInt64Scalar* sp = <CUInt64Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -288,9 +360,15 @@ cdef class Int64Scalar(Scalar):
     Concrete class for int64 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python int.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CInt64Scalar* sp = <CInt64Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -301,9 +379,15 @@ cdef class HalfFloatScalar(Scalar):
     Concrete class for float scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python float.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CHalfFloatScalar* sp = <CHalfFloatScalar*> self.wrapped.get()
         return PyHalf_FromHalf(sp.value) if sp.is_valid else None
@@ -314,9 +398,15 @@ cdef class FloatScalar(Scalar):
     Concrete class for float scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python float.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CFloatScalar* sp = <CFloatScalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -327,9 +417,15 @@ cdef class DoubleScalar(Scalar):
     Concrete class for double scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python float.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CDoubleScalar* sp = <CDoubleScalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
@@ -340,9 +436,15 @@ cdef class Decimal32Scalar(Scalar):
     Concrete class for decimal32 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python Decimal.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CDecimal32Scalar* sp = <CDecimal32Scalar*> self.wrapped.get()
@@ -360,9 +462,15 @@ cdef class Decimal64Scalar(Scalar):
     Concrete class for decimal64 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python Decimal.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CDecimal64Scalar* sp = <CDecimal64Scalar*> self.wrapped.get()
@@ -380,9 +488,15 @@ cdef class Decimal128Scalar(Scalar):
     Concrete class for decimal128 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python Decimal.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CDecimal128Scalar* sp = <CDecimal128Scalar*> self.wrapped.get()
@@ -400,9 +514,15 @@ cdef class Decimal256Scalar(Scalar):
     Concrete class for decimal256 scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python Decimal.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CDecimal256Scalar* sp = <CDecimal256Scalar*> self.wrapped.get()
@@ -425,9 +545,15 @@ cdef class Date32Scalar(Scalar):
         cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python datetime.datetime instance.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CDate32Scalar* sp = <CDate32Scalar*> self.wrapped.get()
 
@@ -450,9 +576,15 @@ cdef class Date64Scalar(Scalar):
         cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python datetime.datetime instance.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef CDate64Scalar* sp = <CDate64Scalar*> self.wrapped.get()
 
@@ -504,9 +636,15 @@ cdef class Time32Scalar(Scalar):
         cdef CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python datetime.timedelta instance.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CTime32Scalar* sp = <CTime32Scalar*> self.wrapped.get()
@@ -528,9 +666,15 @@ cdef class Time64Scalar(Scalar):
         cdef CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python datetime.timedelta instance.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CTime64Scalar* sp = <CTime64Scalar*> self.wrapped.get()
@@ -552,11 +696,17 @@ cdef class TimestampScalar(Scalar):
         cdef CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Pandas Timestamp instance (if units are
         nanoseconds and pandas is available), otherwise as a Python
         datetime.datetime instance.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
@@ -600,11 +750,17 @@ cdef class DurationScalar(Scalar):
         cdef CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
         return sp.value if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Pandas Timedelta instance (if units are
         nanoseconds and pandas is available), otherwise as a Python
         datetime.timedelta instance.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             CDurationScalar* sp = <CDurationScalar*> self.wrapped.get()
@@ -647,9 +803,15 @@ cdef class MonthDayNanoIntervalScalar(Scalar):
         """
         return self.as_py()
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a pyarrow.MonthDayNano.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         cdef:
             PyObject* val
@@ -672,9 +834,15 @@ cdef class BinaryScalar(Scalar):
         cdef CBaseBinaryScalar* sp = <CBaseBinaryScalar*> self.wrapped.get()
         return pyarrow_wrap_buffer(sp.value) if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python bytes.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         buffer = self.as_buffer()
         return None if buffer is None else buffer.to_pybytes()
@@ -693,9 +861,15 @@ cdef class StringScalar(BinaryScalar):
     Concrete class for string-like (utf8) scalars.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python string.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         buffer = self.as_buffer()
         return None if buffer is None else str(buffer, 'utf8')
@@ -744,12 +918,26 @@ cdef class ListScalar(Scalar):
         """
         return iter(self.values)
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python list.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
         """
         arr = self.values
-        return None if arr is None else arr.to_pylist()
+        return None if arr is None else arr.to_pylist(maps_as_pydicts=maps_as_pydicts)
 
 
 cdef class FixedSizeListScalar(ListScalar):
@@ -824,13 +1012,27 @@ cdef class StructScalar(Scalar, collections.abc.Mapping):
             else:
                 raise KeyError(key) from exc
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this value as a Python dict.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
         """
         if self.is_valid:
             try:
-                return {k: self[k].as_py() for k in self.keys()}
+                return {k: self[k].as_py(maps_as_pydicts=maps_as_pydicts) for k in self.keys()}
             except KeyError:
                 raise ValueError(
                     "Converting to Python dictionary is not supported when "
@@ -880,12 +1082,47 @@ cdef class MapScalar(ListScalar):
         for k, v in zip(arr.field(self.type.key_field.name), arr.field(self.type.item_field.name)):
             yield (k.as_py(), v.as_py())
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
-        Return this value as a Python list.
+        Return this value as a Python list or dict, depending on 'maps_as_pydicts'.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
         """
-        cdef CStructScalar* sp = <CStructScalar*> self.wrapped.get()
-        return list(self) if sp.is_valid else None
+        if maps_as_pydicts not in (None, "lossy", "strict"):
+            raise ValueError(
+                "Invalid value for 'maps_as_pydicts': "
+                + "valid values are 'lossy', 'strict' or `None` (default). "
+                + f"Received {maps_as_pydicts!r}."
+            )
+        if not self.is_valid:
+            return None
+        if not maps_as_pydicts:
+            return list(self)
+        result_dict = {}
+        for key, value in self:
+            if key in result_dict:
+                if maps_as_pydicts == "strict":
+                    raise KeyError(
+                        "Converting to Python dictionary is not supported in strict mode "
+                        f"when duplicate keys are present (duplicate key was '{key}')."
+                    )
+                else:
+                    warnings.warn(
+                        f"Encountered key '{key}' which was already encountered.")
+            result_dict[key] = value
+        return result_dict
 
 
 cdef class DictionaryScalar(Scalar):
@@ -958,11 +1195,25 @@ cdef class DictionaryScalar(Scalar):
         cdef CDictionaryScalar* sp = <CDictionaryScalar*> self.wrapped.get()
         return pyarrow_wrap_array(sp.value.dictionary)
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this encoded value as a Python object.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
         """
-        return self.value.as_py() if self.is_valid else None
+        return self.value.as_py(maps_as_pydicts=maps_as_pydicts) if self.is_valid else None
 
 
 cdef class RunEndEncodedScalar(Scalar):
@@ -977,11 +1228,25 @@ cdef class RunEndEncodedScalar(Scalar):
         cdef CRunEndEncodedScalar* sp = <CRunEndEncodedScalar*> self.wrapped.get()
         return Scalar.wrap(sp.value)
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return underlying value as a Python object.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
         """
-        return self.value.as_py()
+        return self.value.as_py(maps_as_pydicts=maps_as_pydicts)
 
 
 cdef class UnionScalar(Scalar):
@@ -1003,12 +1268,26 @@ cdef class UnionScalar(Scalar):
             dp = <CDenseUnionScalar*> self.wrapped.get()
             return Scalar.wrap(dp.value) if dp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return underlying value as a Python object.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
         """
         value = self.value
-        return None if value is None else value.as_py()
+        return None if value is None else value.as_py(maps_as_pydicts=maps_as_pydicts)
 
     @property
     def type_code(self):
@@ -1032,11 +1311,25 @@ cdef class ExtensionScalar(Scalar):
         cdef CExtensionScalar* sp = <CExtensionScalar*> self.wrapped.get()
         return Scalar.wrap(sp.value) if sp.is_valid else None
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this scalar as a Python object.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
         """
-        return None if self.value is None else self.value.as_py()
+        return None if self.value is None else self.value.as_py(maps_as_pydicts=maps_as_pydicts)
 
     @staticmethod
     def from_storage(BaseExtensionType typ, value):
@@ -1093,7 +1386,16 @@ class UuidScalar(ExtensionScalar):
     Concrete class for Uuid extension scalar.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
+        """
+        Return this scalar as a Python UUID.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
+        """
         return None if self.value is None else UUID(bytes=self.value.as_py())
 
 
@@ -1150,9 +1452,15 @@ cdef class Bool8Scalar(ExtensionScalar):
     Concrete class for bool8 extension scalar.
     """
 
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         """
         Return this scalar as a Python object.
+
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            This parameter is ignored for non-nested Scalars.
         """
         py_val = super().as_py()
         return None if py_val is None else py_val != 0
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index a0f1d5bbbed8b..335fbbdd0f8ca 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -2123,6 +2123,8 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
     case Type::STRUCT:                   // fall through
     case Type::TIME32:                   // fall through
     case Type::TIME64:                   // fall through
+    case Type::DECIMAL32:                // fall through
+    case Type::DECIMAL64:                // fall through
     case Type::DECIMAL128:               // fall through
     case Type::DECIMAL256:               // fall through
     case Type::INTERVAL_MONTH_DAY_NANO:  // fall through
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index e78a301bce333..a76e82caf4227 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -225,9 +225,13 @@ class NumPyConverter {
 
   // NumPy ascii string arrays
   Status Visit(const BinaryType& type);
+  Status Visit(const LargeBinaryType& type);
+  Status Visit(const BinaryViewType& type);
 
   // NumPy unicode arrays
   Status Visit(const StringType& type);
+  Status Visit(const LargeStringType& type);
+  Status Visit(const StringViewType& type);
 
   Status Visit(const StructType& type);
 
@@ -285,6 +289,12 @@ class NumPyConverter {
     return PushArray(arr_data);
   }
 
+  template <typename T>
+  Status VisitBinary(T* builder);
+
+  template <typename T>
+  Status VisitString(T* builder);
+
   Status TypeNotImplemented(std::string type_name) {
     return Status::NotImplemented("NumPyConverter doesn't implement <", type_name,
                                   "> conversion. ");
@@ -554,24 +564,23 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
 // Create 16MB chunks for binary data
 constexpr int32_t kBinaryChunksize = 1 << 24;
 
-Status NumPyConverter::Visit(const BinaryType& type) {
-  ::arrow::internal::ChunkedBinaryBuilder builder(kBinaryChunksize, pool_);
-
+template <typename T>
+Status NumPyConverter::VisitBinary(T* builder) {
   auto data = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
 
-  auto AppendNotNull = [&builder, this](const uint8_t* data) {
+  auto AppendNotNull = [builder, this](const uint8_t* data) {
     // This is annoying. NumPy allows strings to have nul-terminators, so
     // we must check for them here
     const size_t item_size =
         strnlen(reinterpret_cast<const char*>(data), static_cast<size_t>(itemsize_));
-    return builder.Append(data, static_cast<int32_t>(item_size));
+    return builder->Append(data, static_cast<int32_t>(item_size));
   };
 
   if (mask_ != nullptr) {
     Ndarray1DIndexer<uint8_t> mask_values(mask_);
     for (int64_t i = 0; i < length_; ++i) {
       if (mask_values[i]) {
-        RETURN_NOT_OK(builder.AppendNull());
+        RETURN_NOT_OK(builder->AppendNull());
       } else {
         RETURN_NOT_OK(AppendNotNull(data));
       }
@@ -584,6 +593,14 @@ Status NumPyConverter::Visit(const BinaryType& type) {
     }
   }
 
+  return Status::OK();
+}
+
+Status NumPyConverter::Visit(const BinaryType& type) {
+  ::arrow::internal::ChunkedBinaryBuilder builder(kBinaryChunksize, pool_);
+
+  RETURN_NOT_OK(VisitBinary(&builder));
+
   ArrayVector result;
   RETURN_NOT_OK(builder.Finish(&result));
   for (auto arr : result) {
@@ -592,6 +609,26 @@ Status NumPyConverter::Visit(const BinaryType& type) {
   return Status::OK();
 }
 
+Status NumPyConverter::Visit(const LargeBinaryType& type) {
+  ::arrow::LargeBinaryBuilder builder(pool_);
+
+  RETURN_NOT_OK(VisitBinary(&builder));
+
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(builder.Finish(&result));
+  return PushArray(result->data());
+}
+
+Status NumPyConverter::Visit(const BinaryViewType& type) {
+  ::arrow::BinaryViewBuilder builder(pool_);
+
+  RETURN_NOT_OK(VisitBinary(&builder));
+
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(builder.Finish(&result));
+  return PushArray(result->data());
+}
+
 Status NumPyConverter::Visit(const FixedSizeBinaryType& type) {
   auto byte_width = type.byte_width();
 
@@ -631,8 +668,8 @@ namespace {
 // NumPy unicode is UCS4/UTF32 always
 constexpr int kNumPyUnicodeSize = 4;
 
-Status AppendUTF32(const char* data, int64_t itemsize, int byteorder,
-                   ::arrow::internal::ChunkedStringBuilder* builder) {
+template <typename T>
+Status AppendUTF32(const char* data, int64_t itemsize, int byteorder, T* builder) {
   // The binary \x00\x00\x00\x00 indicates a nul terminator in NumPy unicode,
   // so we need to detect that here to truncate if necessary. Yep.
   Py_ssize_t actual_length = 0;
@@ -660,11 +697,8 @@ Status AppendUTF32(const char* data, int64_t itemsize, int byteorder,
 
 }  // namespace
 
-Status NumPyConverter::Visit(const StringType& type) {
-  util::InitializeUTF8();
-
-  ::arrow::internal::ChunkedStringBuilder builder(kBinaryChunksize, pool_);
-
+template <typename T>
+Status NumPyConverter::VisitString(T* builder) {
   auto data = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
 
   char numpy_byteorder = dtype_->byteorder;
@@ -708,7 +742,7 @@ Status NumPyConverter::Visit(const StringType& type) {
   auto AppendNonNullValue = [&](const uint8_t* data) {
     if (is_binary_type) {
       if (ARROW_PREDICT_TRUE(util::ValidateUTF8(data, itemsize_))) {
-        return builder.Append(data, static_cast<int32_t>(itemsize_));
+        return builder->Append(data, static_cast<int32_t>(itemsize_));
       } else {
         return Status::Invalid("Encountered non-UTF8 binary value: ",
                                HexEncode(data, itemsize_));
@@ -716,7 +750,7 @@ Status NumPyConverter::Visit(const StringType& type) {
     } else {
       // is_unicode_type case
       return AppendUTF32(reinterpret_cast<const char*>(data), itemsize_, byteorder,
-                         &builder);
+                         builder);
     }
   };
 
@@ -724,7 +758,7 @@ Status NumPyConverter::Visit(const StringType& type) {
     Ndarray1DIndexer<uint8_t> mask_values(mask_);
     for (int64_t i = 0; i < length_; ++i) {
       if (mask_values[i]) {
-        RETURN_NOT_OK(builder.AppendNull());
+        RETURN_NOT_OK(builder->AppendNull());
       } else {
         RETURN_NOT_OK(AppendNonNullValue(data));
       }
@@ -737,6 +771,16 @@ Status NumPyConverter::Visit(const StringType& type) {
     }
   }
 
+  return Status::OK();
+}
+
+Status NumPyConverter::Visit(const StringType& type) {
+  util::InitializeUTF8();
+
+  ::arrow::internal::ChunkedStringBuilder builder(kBinaryChunksize, pool_);
+
+  RETURN_NOT_OK(VisitString(&builder));
+
   ArrayVector result;
   RETURN_NOT_OK(builder.Finish(&result));
   for (auto arr : result) {
@@ -745,6 +789,32 @@ Status NumPyConverter::Visit(const StringType& type) {
   return Status::OK();
 }
 
+Status NumPyConverter::Visit(const LargeStringType& type) {
+  util::InitializeUTF8();
+
+  ::arrow::LargeStringBuilder builder(pool_);
+
+  RETURN_NOT_OK(VisitString(&builder));
+
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(builder.Finish(&result));
+  RETURN_NOT_OK(PushArray(result->data()));
+  return Status::OK();
+}
+
+Status NumPyConverter::Visit(const StringViewType& type) {
+  util::InitializeUTF8();
+
+  ::arrow::StringViewBuilder builder(pool_);
+
+  RETURN_NOT_OK(VisitString(&builder));
+
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(builder.Finish(&result));
+  RETURN_NOT_OK(PushArray(result->data()));
+  return Status::OK();
+}
+
 Status NumPyConverter::Visit(const StructType& type) {
   std::vector<NumPyConverter> sub_converters;
   std::vector<OwnedRefNoGIL> sub_arrays;
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index af241e4be07d9..5a6cd390489bf 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1349,10 +1349,24 @@ cdef class ChunkedArray(_PandasConvertible):
         for i in range(self.num_chunks):
             yield self.chunk(i)
 
-    def to_pylist(self):
+    def to_pylist(self, *, maps_as_pydicts=None):
         """
         Convert to a list of native Python objects.
 
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
         Examples
         --------
         >>> import pyarrow as pa
@@ -1363,7 +1377,7 @@ cdef class ChunkedArray(_PandasConvertible):
         self._assert_cpu()
         result = []
         for i in range(self.num_chunks):
-            result += self.chunk(i).to_pylist()
+            result += self.chunk(i).to_pylist(maps_as_pydicts=maps_as_pydicts)
         return result
 
     def __arrow_c_stream__(self, requested_schema=None):
@@ -2255,10 +2269,24 @@ cdef class _Tabular(_PandasConvertible):
         else:
             return _pc().filter(self, mask, null_selection_behavior)
 
-    def to_pydict(self):
+    def to_pydict(self, *, maps_as_pydicts=None):
         """
         Convert the Table or RecordBatch to a dict or OrderedDict.
 
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
         Returns
         -------
         dict
@@ -2277,14 +2305,28 @@ cdef class _Tabular(_PandasConvertible):
         entries = []
         for i in range(self.num_columns):
             name = self.field(i).name
-            column = self[i].to_pylist()
+            column = self[i].to_pylist(maps_as_pydicts=maps_as_pydicts)
             entries.append((name, column))
         return ordered_dict(entries)
 
-    def to_pylist(self):
+    def to_pylist(self, *, maps_as_pydicts=None):
         """
         Convert the Table or RecordBatch to a list of rows / dictionaries.
 
+        Parameters
+        ----------
+        maps_as_pydicts : str, optional, default `None`
+            Valid values are `None`, 'lossy', or 'strict'.
+            The default behavior (`None`), is to convert Arrow Map arrays to
+            Python association lists (list-of-tuples) in the same order as the
+            Arrow Map, as in [(key1, value1), (key2, value2), ...].
+
+            If 'lossy' or 'strict', convert Arrow Map arrays to native Python dicts.
+
+            If 'lossy', whenever duplicate keys are detected, a warning will be printed.
+            The last seen value of a duplicate key will be in the Python dictionary.
+            If 'strict', this instead results in an exception being raised when detected.
+
         Returns
         -------
         list
@@ -2300,7 +2342,7 @@ cdef class _Tabular(_PandasConvertible):
         >>> table.to_pylist()
         [{'n_legs': 2, 'animals': 'Flamingo'}, {'n_legs': 4, 'animals': 'Horse'}, ...
         """
-        pydict = self.to_pydict()
+        pydict = self.to_pydict(maps_as_pydicts=maps_as_pydicts)
         names = self.schema.names
         pylist = [{column: pydict[column][row] for column in names}
                   for row in range(self.num_rows)]
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 63518beebae95..53b54bb494da6 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -55,6 +55,22 @@
         set_timezone_db_path(tzdata_set_path)
 
 
+# GH-45295: For ORC, try to populate TZDIR env var from tzdata package resource
+# path.
+#
+# Note this is a different kind of database than what we allow to be set by
+# `PYARROW_TZDATA_PATH` and passed to set_timezone_db_path.
+if sys.platform == 'win32':
+    if os.environ.get('TZDIR', None) is None:
+        from importlib import resources
+        try:
+            os.environ['TZDIR'] = os.path.join(resources.files('tzdata'), 'zoneinfo')
+        except ModuleNotFoundError:
+            print(
+                'Package "tzdata" not found. Not setting TZDIR environment variable.'
+            )
+
+
 def pytest_addoption(parser):
     # Create options to selectively enable test groups
     def bool_env(name, default=None):
diff --git a/python/pyarrow/tests/parquet/encryption.py b/python/pyarrow/tests/parquet/encryption.py
index d07f8ae273520..87476cdbb461f 100644
--- a/python/pyarrow/tests/parquet/encryption.py
+++ b/python/pyarrow/tests/parquet/encryption.py
@@ -41,6 +41,8 @@ def wrap_key(self, key_bytes, master_key_identifier):
     def unwrap_key(self, wrapped_key, master_key_identifier):
         """Not a secure cipher - just extract the key from
         the wrapped key"""
+        if master_key_identifier not in self.master_keys_map:
+            raise ValueError("Unknown master key", master_key_identifier)
         expected_master_key = self.master_keys_map[master_key_identifier]
         decoded_wrapped_key = base64.b64decode(wrapped_key)
         master_key_bytes = decoded_wrapped_key[:16]
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 93097a1afaac9..ae8a16e874acc 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -334,3 +334,22 @@ def test_parquet_file_with_filesystem(s3_example_fs, use_uri):
         assert f.read() == table
         assert not f.closed
     assert f.closed
+
+
+def test_read_statistics():
+    table = pa.table({"value": pa.array([-1, None, 3])})
+    buf = io.BytesIO()
+    _write_table(table, buf)
+    buf.seek(0)
+
+    statistics = pq.ParquetFile(buf).read().columns[0].chunks[0].statistics
+    assert statistics.null_count == 1
+    assert statistics.distinct_count is None
+    assert statistics.min == -1
+    assert statistics.is_min_exact
+    assert statistics.max == 3
+    assert statistics.is_max_exact
+    assert repr(statistics) == ("arrow.ArrayStatistics<"
+                                "null_count=1, distinct_count=None, "
+                                "min=-1, is_min_exact=True, "
+                                "max=3, is_max_exact=True>")
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 885442b079c5b..026504c131637 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2498,32 +2498,40 @@ def test_array_from_numpy_timedelta_incorrect_unit():
 
 
 @pytest.mark.numpy
-def test_array_from_numpy_ascii():
+@pytest.mark.parametrize('binary_type', [
+    None,
+    pa.binary(),
+    pa.large_binary(),
+    pa.binary_view()])
+def test_array_from_numpy_ascii(binary_type):
+    # Default when no type is specified should be binary
+    expected_type = binary_type or pa.binary()
+
     arr = np.array(['abcde', 'abc', ''], dtype='|S5')
 
-    arrow_arr = pa.array(arr)
-    assert arrow_arr.type == 'binary'
-    expected = pa.array(['abcde', 'abc', ''], type='binary')
+    arrow_arr = pa.array(arr, binary_type)
+    assert arrow_arr.type == expected_type
+    expected = pa.array(['abcde', 'abc', ''], type=expected_type)
     assert arrow_arr.equals(expected)
 
     mask = np.array([False, True, False])
-    arrow_arr = pa.array(arr, mask=mask)
-    expected = pa.array(['abcde', None, ''], type='binary')
+    arrow_arr = pa.array(arr, binary_type, mask=mask)
+    expected = pa.array(['abcde', None, ''], type=expected_type)
     assert arrow_arr.equals(expected)
 
     # Strided variant
     arr = np.array(['abcde', 'abc', ''] * 5, dtype='|S5')[::2]
     mask = np.array([False, True, False] * 5)[::2]
-    arrow_arr = pa.array(arr, mask=mask)
+    arrow_arr = pa.array(arr, binary_type, mask=mask)
 
     expected = pa.array(['abcde', '', None, 'abcde', '', None, 'abcde', ''],
-                        type='binary')
+                        type=expected_type)
     assert arrow_arr.equals(expected)
 
     # 0 itemsize
     arr = np.array(['', '', ''], dtype='|S0')
-    arrow_arr = pa.array(arr)
-    expected = pa.array(['', '', ''], type='binary')
+    arrow_arr = pa.array(arr, binary_type)
+    expected = pa.array(['', '', ''], type=expected_type)
     assert arrow_arr.equals(expected)
 
 
@@ -2643,35 +2651,43 @@ def test_interval_array_from_dateoffset():
 
 
 @pytest.mark.numpy
-def test_array_from_numpy_unicode():
+@pytest.mark.parametrize('string_type', [
+    None,
+    pa.utf8(),
+    pa.large_utf8(),
+    pa.string_view()])
+def test_array_from_numpy_unicode(string_type):
+    # Default when no type is specified should be utf8
+    expected_type = string_type or pa.utf8()
+
     dtypes = ['<U5', '>U5']
 
     for dtype in dtypes:
         arr = np.array(['abcde', 'abc', ''], dtype=dtype)
 
-        arrow_arr = pa.array(arr)
-        assert arrow_arr.type == 'utf8'
-        expected = pa.array(['abcde', 'abc', ''], type='utf8')
+        arrow_arr = pa.array(arr, string_type)
+        assert arrow_arr.type == expected_type
+        expected = pa.array(['abcde', 'abc', ''], type=expected_type)
         assert arrow_arr.equals(expected)
 
         mask = np.array([False, True, False])
-        arrow_arr = pa.array(arr, mask=mask)
-        expected = pa.array(['abcde', None, ''], type='utf8')
+        arrow_arr = pa.array(arr, string_type, mask=mask)
+        expected = pa.array(['abcde', None, ''], type=expected_type)
         assert arrow_arr.equals(expected)
 
         # Strided variant
         arr = np.array(['abcde', 'abc', ''] * 5, dtype=dtype)[::2]
         mask = np.array([False, True, False] * 5)[::2]
-        arrow_arr = pa.array(arr, mask=mask)
+        arrow_arr = pa.array(arr, string_type, mask=mask)
 
         expected = pa.array(['abcde', '', None, 'abcde', '', None,
-                             'abcde', ''], type='utf8')
+                             'abcde', ''], type=expected_type)
         assert arrow_arr.equals(expected)
 
     # 0 itemsize
     arr = np.array(['', '', ''], dtype='<U0')
-    arrow_arr = pa.array(arr)
-    expected = pa.array(['', '', ''], type='utf8')
+    arrow_arr = pa.array(arr, string_type)
+    expected = pa.array(['', '', ''], type=expected_type)
     assert arrow_arr.equals(expected)
 
 
@@ -3268,8 +3284,9 @@ def test_array_from_numpy_str_utf8():
 @pytest.mark.numpy
 @pytest.mark.slow
 @pytest.mark.large_memory
-def test_numpy_binary_overflow_to_chunked():
-    # ARROW-3762, ARROW-5966
+@pytest.mark.parametrize('large_types', [False, True])
+def test_numpy_binary_overflow_to_chunked(large_types):
+    # ARROW-3762, ARROW-5966, GH-35289
 
     # 2^31 + 1 bytes
     values = [b'x']
@@ -3286,24 +3303,34 @@ def test_numpy_binary_overflow_to_chunked():
     unicode_values += [unicode_unique_strings[i % 10]
                        for i in range(1 << 11)]
 
-    for case, ex_type in [(values, pa.binary()),
-                          (unicode_values, pa.utf8())]:
+    binary_type = pa.large_binary() if large_types else pa.binary()
+    string_type = pa.large_utf8() if large_types else pa.utf8()
+    for case, ex_type in [(values, binary_type),
+                          (unicode_values, string_type)]:
         arr = np.array(case)
-        arrow_arr = pa.array(arr)
+        arrow_arr = pa.array(arr, ex_type)
         arr = None
 
-        assert isinstance(arrow_arr, pa.ChunkedArray)
         assert arrow_arr.type == ex_type
+        if large_types:
+            # Large types shouldn't be chunked
+            assert isinstance(arrow_arr, pa.Array)
+
+            for i in range(len(arrow_arr)):
+                val = arrow_arr[i]
+                assert val.as_py() == case[i]
+        else:
+            assert isinstance(arrow_arr, pa.ChunkedArray)
 
-        # Split up into 16MB chunks. 128 * 16 = 2048, so 129
-        assert arrow_arr.num_chunks == 129
+            # Split up into 16MB chunks. 128 * 16 = 2048, so 129
+            assert arrow_arr.num_chunks == 129
 
-        value_index = 0
-        for i in range(arrow_arr.num_chunks):
-            chunk = arrow_arr.chunk(i)
-            for val in chunk:
-                assert val.as_py() == case[value_index]
-                value_index += 1
+            value_index = 0
+            for i in range(arrow_arr.num_chunks):
+                chunk = arrow_arr.chunk(i)
+                for val in chunk:
+                    assert val.as_py() == case[value_index]
+                    value_index += 1
 
 
 @pytest.mark.large_memory
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 6f28205a18e13..8a756a262b602 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -145,6 +145,7 @@ def test_option_class_equality(request):
         pc.ArraySortOptions(),
         pc.AssumeTimezoneOptions("UTC"),
         pc.CastOptions.safe(pa.int8()),
+        pc.CumulativeOptions(start=None, skip_nulls=False),
         pc.CountOptions(),
         pc.DayOfWeekOptions(count_from_zero=False, week_start=0),
         pc.DictionaryEncodeOptions(),
@@ -167,11 +168,13 @@ def test_option_class_equality(request):
         pc.PadOptions(5),
         pc.PairwiseOptions(period=1),
         pc.PartitionNthOptions(1, null_placement="at_start"),
-        pc.CumulativeOptions(start=None, skip_nulls=False),
+        pc.PivotWiderOptions(["height"], unexpected_key_behavior="raise"),
         pc.QuantileOptions(),
         pc.RandomOptions(),
         pc.RankOptions(sort_keys="ascending",
                        null_placement="at_start", tiebreaker="max"),
+        pc.RankQuantileOptions(sort_keys="ascending",
+                               null_placement="at_start"),
         pc.ReplaceSliceOptions(0, 1, "a"),
         pc.ReplaceSubstringOptions("a", "b"),
         pc.RoundOptions(2, "towards_infinity"),
@@ -181,6 +184,7 @@ def test_option_class_equality(request):
         pc.ScalarAggregateOptions(),
         pc.SelectKOptions(0, sort_keys=[("b", "ascending")]),
         pc.SetLookupOptions(pa.array([1])),
+        pc.SkewOptions(min_count=2),
         pc.SliceOptions(0, 1, 1),
         pc.SortOptions([("dummy", "descending")], null_placement="at_start"),
         pc.SplitOptions(),
@@ -438,6 +442,20 @@ def test_variance():
     assert pc.variance(data, ddof=1).as_py() == 6.0
 
 
+def test_skew():
+    data = [1, 1, None, 2]
+    assert pc.skew(data).as_py() == pytest.approx(0.707106781186548, rel=1e-10)
+    assert pc.skew(data, skip_nulls=False).as_py() is None
+    assert pc.skew(data, min_count=4).as_py() is None
+
+
+def test_kurtosis():
+    data = [1, 1, None, 2]
+    assert pc.kurtosis(data).as_py() == pytest.approx(-1.5, rel=1e-10)
+    assert pc.kurtosis(data, skip_nulls=False).as_py() is None
+    assert pc.kurtosis(data, min_count=4).as_py() is None
+
+
 def test_count_substring():
     for (ty, offset) in [(pa.string(), pa.int32()),
                          (pa.large_string(), pa.int64())]:
@@ -3360,6 +3378,60 @@ def test_rank_options():
                        tiebreaker="NonExisting")
 
 
+def test_rank_quantile_options():
+    arr = pa.array([None, 1, None, 2, None])
+    expected = pa.array([0.7, 0.1, 0.7, 0.3, 0.7], type=pa.float64())
+
+    # Ensure rank_quantile can be called without specifying options
+    result = pc.rank_quantile(arr)
+    assert result.equals(expected)
+
+    # Ensure default RankOptions
+    result = pc.rank_quantile(arr, options=pc.RankQuantileOptions())
+    assert result.equals(expected)
+
+    # Ensure sort_keys tuple usage
+    result = pc.rank_quantile(arr, options=pc.RankQuantileOptions(
+        sort_keys=[("b", "ascending")])
+    )
+    assert result.equals(expected)
+
+    result = pc.rank_quantile(arr, null_placement="at_start")
+    expected_at_start = pa.array([0.3, 0.7, 0.3, 0.9, 0.3], type=pa.float64())
+    assert result.equals(expected_at_start)
+
+    result = pc.rank_quantile(arr, sort_keys="descending")
+    expected_descending = pa.array([0.7, 0.3, 0.7, 0.1, 0.7], type=pa.float64())
+    assert result.equals(expected_descending)
+
+    with pytest.raises(ValueError, match="not a valid sort order"):
+        pc.rank_quantile(arr, sort_keys="XXX")
+
+
+def test_rank_normal_options():
+    arr = pa.array([None, 1, None, 2, None])
+
+    expected = pytest.approx(
+        [0.5244005127080407, -1.2815515655446004, 0.5244005127080407,
+         -0.5244005127080409, 0.5244005127080407])
+    result = pc.rank_normal(arr)
+    assert result.to_pylist() == expected
+    result = pc.rank_normal(arr, null_placement="at_end", sort_keys="ascending")
+    assert result.to_pylist() == expected
+    result = pc.rank_normal(arr, options=pc.RankQuantileOptions())
+    assert result.to_pylist() == expected
+
+    expected = pytest.approx(
+        [-0.5244005127080409, 1.2815515655446004, -0.5244005127080409,
+         0.5244005127080407, -0.5244005127080409])
+    result = pc.rank_normal(arr, null_placement="at_start", sort_keys="descending")
+    assert result.to_pylist() == expected
+    result = pc.rank_normal(arr,
+                            options=pc.RankQuantileOptions(null_placement="at_start",
+                                                           sort_keys="descending"))
+    assert result.to_pylist() == expected
+
+
 def create_sample_expressions():
     # We need a schema for substrait conversion
     schema = pa.schema([pa.field("i64", pa.int64()), pa.field(
@@ -3729,3 +3801,29 @@ def test_pairwise_diff():
     with pytest.raises(pa.ArrowInvalid,
                        match="overflow"):
         pa.compute.pairwise_diff_checked(arr, period=-1)
+
+
+def test_pivot_wider():
+    key_names = ["width", "height"]
+
+    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11])
+    assert result.as_py() == {}
+
+    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
+                            key_names)
+    assert result.as_py() == {"width": None, "height": 10}
+    # check key order
+    assert list(result.as_py()) == ["width", "height"]
+
+    result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
+                            key_names=key_names)
+    assert result.as_py() == {"width": None, "height": 10}
+
+    with pytest.raises(KeyError, match="Unexpected pivot key: depth"):
+        result = pc.pivot_wider(["height", "width", "depth"], [10, None, 11],
+                                key_names=key_names,
+                                unexpected_key_behavior="raise")
+
+    with pytest.raises(ValueError, match="Encountered more than one non-null value"):
+        result = pc.pivot_wider(["height", "width", "height"], [10, None, 11],
+                                key_names=key_names)
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 6a36b41daf302..239ae55f2f760 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -387,7 +387,7 @@ def read_bytes(self, b, **kwargs):
         """
         :param b: bytes to be parsed
         :param kwargs: arguments passed on to open the csv file
-        :return: b parsed as a single RecordBatch
+        :return: b parsed as a single Table
         """
         raise NotImplementedError
 
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index eb79121b1cdbe..b310aa4021e6b 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import base64
+from contextlib import contextmanager
 from datetime import timedelta
 import random
 import pyarrow.fs as fs
@@ -47,6 +48,11 @@
 FOOTER_KEY_NAME = "footer_key"
 COL_KEY = b"1234567890123450"
 COL_KEY_NAME = "col_key"
+KEYS = {FOOTER_KEY_NAME: FOOTER_KEY, COL_KEY_NAME: COL_KEY}
+EXTRA_COL_KEY = b"2345678901234501"
+EXTRA_COL_KEY_NAME = "col2_key"
+COLUMNS = ["year", "n_legs", "animal"]
+COLUMN_KEYS = {COL_KEY_NAME: ["n_legs", "animal"]}
 
 
 def create_sample_table():
@@ -66,11 +72,11 @@ def create_sample_table():
     )
 
 
-def create_encryption_config():
+def create_encryption_config(footer_key, column_keys):
     return pe.EncryptionConfiguration(
-        footer_key=FOOTER_KEY_NAME,
+        footer_key=footer_key,
         plaintext_footer=False,
-        column_keys={COL_KEY_NAME: ["n_legs", "animal"]},
+        column_keys=column_keys,
         encryption_algorithm="AES_GCM_V1",
         # requires timedelta or an assertion is raised
         cache_lifetime=timedelta(minutes=5.0),
@@ -82,11 +88,11 @@ def create_decryption_config():
     return pe.DecryptionConfiguration(cache_lifetime=300)
 
 
-def create_kms_connection_config():
+def create_kms_connection_config(keys):
     return pe.KmsConnectionConfig(
         custom_kms_conf={
-            FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"),
-            COL_KEY_NAME: COL_KEY.decode("UTF-8"),
+            key_name: key.decode("UTF-8") if isinstance(key, bytes) else key
+            for key_name, key in keys.items()
         }
     )
 
@@ -95,64 +101,262 @@ def kms_factory(kms_connection_configuration):
     return InMemoryKmsClient(kms_connection_configuration)
 
 
+@contextmanager
+def cond_raises(success, error_type, match):
+    if success:
+        yield
+    else:
+        with pytest.raises(error_type, match=match):
+            yield
+
+
+def do_test_dataset_encryption_decryption(table, extra_column_path=None):
+    # use extra column key for column extra_column_path, if given
+    if extra_column_path:
+        keys = dict(**KEYS, **{EXTRA_COL_KEY_NAME: EXTRA_COL_KEY})
+        column_keys = dict(**COLUMN_KEYS, **{EXTRA_COL_KEY_NAME: [extra_column_path]})
+        extra_column_name = extra_column_path.split(".")[0]
+    else:
+        keys = KEYS
+        column_keys = COLUMN_KEYS
+        extra_column_name = None
+
+    # define the actual test
+    def assert_decrypts(
+        read_keys,
+        read_columns,
+        to_table_success,
+        dataset_success=True,
+    ):
+        # use all keys for writing
+        write_keys = keys
+        encryption_config = create_encryption_config(FOOTER_KEY_NAME, column_keys)
+        decryption_config = create_decryption_config()
+        encrypt_kms_connection_config = create_kms_connection_config(write_keys)
+        decrypt_kms_connection_config = create_kms_connection_config(read_keys)
+
+        crypto_factory = pe.CryptoFactory(kms_factory)
+        parquet_encryption_cfg = ds.ParquetEncryptionConfig(
+            crypto_factory, encrypt_kms_connection_config, encryption_config
+        )
+        parquet_decryption_cfg = ds.ParquetDecryptionConfig(
+            crypto_factory, decrypt_kms_connection_config, decryption_config
+        )
+
+        # create write_options with dataset encryption config
+        pformat = pa.dataset.ParquetFileFormat()
+        write_options = pformat.make_write_options(
+            encryption_config=parquet_encryption_cfg
+        )
+
+        mockfs = fs._MockFileSystem()
+        mockfs.create_dir("/")
+
+        ds.write_dataset(
+            data=table,
+            base_dir="sample_dataset",
+            format=pformat,
+            file_options=write_options,
+            filesystem=mockfs,
+        )
+
+        # read without decryption config -> errors if dataset was properly encrypted
+        pformat = pa.dataset.ParquetFileFormat()
+        with pytest.raises(IOError, match=r"no decryption"):
+            ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
+
+        # set decryption config for parquet fragment scan options
+        pq_scan_opts = ds.ParquetFragmentScanOptions(
+            decryption_config=parquet_decryption_cfg
+        )
+        pformat = pa.dataset.ParquetFileFormat(
+            default_fragment_scan_options=pq_scan_opts
+        )
+        with cond_raises(dataset_success, ValueError, match="Unknown master key"):
+            dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
+            with cond_raises(to_table_success, ValueError, match="Unknown master key"):
+                assert table.select(read_columns).equals(dataset.to_table(read_columns))
+
+        # set decryption properties for parquet fragment scan options
+        decryption_properties = crypto_factory.file_decryption_properties(
+            decrypt_kms_connection_config, decryption_config)
+        pq_scan_opts = ds.ParquetFragmentScanOptions(
+            decryption_properties=decryption_properties
+        )
+
+        pformat = pa.dataset.ParquetFileFormat(
+            default_fragment_scan_options=pq_scan_opts
+        )
+        with cond_raises(dataset_success, ValueError, match="Unknown master key"):
+            dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
+            with cond_raises(to_table_success, ValueError, match="Unknown master key"):
+                assert table.select(read_columns).equals(dataset.to_table(read_columns))
+
+    # some notable column names and keys
+    all_column_names = table.column_names
+    encrypted_column_names = [column_name.split(".")[0]
+                              for key_name, column_names in column_keys.items()
+                              for column_name in column_names]
+    plaintext_column_names = [column_name
+                              for column_name in all_column_names
+                              if column_name not in encrypted_column_names and
+                              (extra_column_path is None or
+                               not extra_column_path.startswith(f"{column_name}."))]
+    assert len(encrypted_column_names) > 0
+    assert len(plaintext_column_names) > 0
+    footer_key_only = {FOOTER_KEY_NAME: FOOTER_KEY}
+    column_keys_only = {key_name: key
+                        for key_name, key in keys.items()
+                        if key_name != FOOTER_KEY_NAME}
+
+    # the test scenarios
+
+    # read with footer key only, can only read plaintext columns
+    assert_decrypts(footer_key_only, plaintext_column_names, True)
+    assert_decrypts(footer_key_only, encrypted_column_names, False)
+    assert_decrypts(footer_key_only, all_column_names, False)
+
+    # read with all but footer key, cannot read any columns
+    assert_decrypts(column_keys_only, plaintext_column_names, False, False)
+    assert_decrypts(column_keys_only, encrypted_column_names, False, False)
+    assert_decrypts(column_keys_only, all_column_names, False, False)
+
+    # with footer key and one column key, all plaintext and
+    # those encrypted columns that use that key, can be read
+    if len(column_keys) > 1:
+        for column_key_name, column_key_column_names in column_keys.items():
+            for encrypted_column_name in column_key_column_names:
+                # if one nested field of a column is encrypted,
+                # the entire column is considered encrypted
+                encrypted_column_name = encrypted_column_name.split(".")[0]
+
+                # decrypt with footer key and one column key
+                read_keys = {key_name: key
+                             for key_name, key in keys.items()
+                             if key_name in [FOOTER_KEY_NAME, column_key_name]}
+
+                # that one encrypted column can only be read
+                # if it is not a column path / nested field
+                plaintext_and_one_success = encrypted_column_name != extra_column_name
+                plaintext_and_one = plaintext_column_names + [encrypted_column_name]
+
+                assert_decrypts(read_keys, plaintext_column_names, True)
+                assert_decrypts(read_keys, plaintext_and_one, plaintext_and_one_success)
+                assert_decrypts(read_keys, encrypted_column_names, False)
+                assert_decrypts(read_keys, all_column_names, False)
+
+    # with all column keys, all columns can be read
+    assert_decrypts(keys, plaintext_column_names, True)
+    assert_decrypts(keys, encrypted_column_names, True)
+    assert_decrypts(keys, all_column_names, True)
+
+
 @pytest.mark.skipif(
     encryption_unavailable, reason="Parquet Encryption is not currently enabled"
 )
 def test_dataset_encryption_decryption():
-    table = create_sample_table()
+    do_test_dataset_encryption_decryption(create_sample_table())
 
-    encryption_config = create_encryption_config()
-    decryption_config = create_decryption_config()
-    kms_connection_config = create_kms_connection_config()
 
-    crypto_factory = pe.CryptoFactory(kms_factory)
-    parquet_encryption_cfg = ds.ParquetEncryptionConfig(
-        crypto_factory, kms_connection_config, encryption_config
-    )
-    parquet_decryption_cfg = ds.ParquetDecryptionConfig(
-        crypto_factory, kms_connection_config, decryption_config
+@pytest.mark.skipif(
+    encryption_unavailable, reason="Parquet Encryption is not currently enabled"
+)
+@pytest.mark.parametrize("column_name", ["list", "list.list.element"])
+def test_list_encryption_decryption(column_name):
+    list_data = pa.array(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9], [-1], [-2], [-3]],
+        type=pa.list_(pa.int32()),
     )
+    table = create_sample_table().append_column("list", list_data)
 
-    # create write_options with dataset encryption config
-    pformat = pa.dataset.ParquetFileFormat()
-    write_options = pformat.make_write_options(encryption_config=parquet_encryption_cfg)
+    do_test_dataset_encryption_decryption(table, column_name)
 
-    mockfs = fs._MockFileSystem()
-    mockfs.create_dir("/")
 
-    ds.write_dataset(
-        data=table,
-        base_dir="sample_dataset",
-        format=pformat,
-        file_options=write_options,
-        filesystem=mockfs,
+@pytest.mark.skipif(
+    encryption_unavailable,
+    reason="Parquet Encryption is not currently enabled"
+)
+@pytest.mark.parametrize(
+    "column_name", [
+        "map", "map.key", "map.value", "map.key_value.key", "map.key_value.value"
+    ]
+)
+def test_map_encryption_decryption(column_name):
+    map_type = pa.map_(pa.string(), pa.int32())
+    map_data = pa.array(
+        [
+            [("k1", 1), ("k2", 2)], [("k1", 3), ("k3", 4)], [("k2", 5), ("k3", 6)],
+            [("k4", 7)], [], []
+        ],
+        type=map_type
     )
+    table = create_sample_table().append_column("map", map_data)
 
-    # read without decryption config -> should error is dataset was properly encrypted
-    pformat = pa.dataset.ParquetFileFormat()
-    with pytest.raises(IOError, match=r"no decryption"):
-        ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
-
-    # set decryption config for parquet fragment scan options
-    pq_scan_opts = ds.ParquetFragmentScanOptions(
-        decryption_config=parquet_decryption_cfg
-    )
-    pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
-    dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
+    do_test_dataset_encryption_decryption(table, column_name)
 
-    assert table.equals(dataset.to_table())
 
-    # set decryption properties for parquet fragment scan options
-    decryption_properties = crypto_factory.file_decryption_properties(
-        kms_connection_config, decryption_config)
-    pq_scan_opts = ds.ParquetFragmentScanOptions(
-        decryption_properties=decryption_properties
+@pytest.mark.skipif(
+    encryption_unavailable, reason="Parquet Encryption is not currently enabled"
+)
+@pytest.mark.parametrize(
+    "column_name", ["struct", "struct.f1", "struct.f2"]
+)
+def test_struct_encryption_decryption(column_name):
+    struct_fields = [("f1", pa.int32()), ("f2", pa.string())]
+    struct_type = pa.struct(struct_fields)
+    struct_data = pa.array(
+        [(1, "one"), (2, "two"), (3, "three"), (4, "four"), (5, "five"), (6, "six")],
+        type=struct_type
     )
+    table = create_sample_table().append_column("struct", struct_data)
+
+    do_test_dataset_encryption_decryption(table, column_name)
 
-    pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts)
-    dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs)
 
-    assert table.equals(dataset.to_table())
+@pytest.mark.skipif(
+    encryption_unavailable,
+    reason="Parquet Encryption is not currently enabled"
+)
+@pytest.mark.parametrize(
+    "column_name",
+    [
+        "col",
+        "col.element",
+        "col.element.key",
+        "col.element.value",
+        "col.element.value.f1",
+        "col.element.value.f2",
+        "col.list.element",
+        "col.list.element.key_value.key",
+        "col.list.element.key_value.value",
+        "col.list.element.key_value.value.f1",
+        "col.list.element.key_value.value.f2"
+    ]
+)
+def test_deep_nested_encryption_decryption(column_name):
+    struct_fields = [("f1", pa.int32()), ("f2", pa.string())]
+    struct_type = pa.struct(struct_fields)
+    struct1 = (1, "one")
+    struct2 = (2, "two")
+    struct3 = (3, "three")
+    struct4 = (4, "four")
+    struct5 = (5, "five")
+    struct6 = (6, "six")
+
+    map_type = pa.map_(pa.int32(), struct_type)
+    map1 = {1: struct1, 2: struct2}
+    map2 = {3: struct3}
+    map3 = {4: struct4}
+    map4 = {5: struct5, 6: struct6}
+
+    list_type = pa.list_(map_type)
+    list1 = [map1, map2]
+    list2 = [map3]
+    list3 = [map4]
+    list_data = [pa.array([list1, list2, None, list3, None, None], type=list_type)]
+    table = create_sample_table().append_column("col", list_data)
+
+    do_test_dataset_encryption_decryption(table, column_name)
 
 
 @pytest.mark.skipif(
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 634d9ce2d8d93..185b5bb424b1e 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -99,7 +99,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
 
 
 class ExampleUuidScalarType(pa.ExtensionScalar):
-    def as_py(self):
+    def as_py(self, *, maps_as_pydicts=None):
         return None if self.value is None else UUID(bytes=self.value.as_py())
 
 
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index 978c92307a69e..c3f9fe333bd02 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import abc
 from collections import OrderedDict
 from decimal import Decimal
 import io
@@ -30,7 +31,7 @@
 import pytest
 
 import pyarrow as pa
-from pyarrow.json import read_json, ReadOptions, ParseOptions
+from pyarrow.json import read_json, open_json, ReadOptions, ParseOptions
 
 
 def generate_col_names():
@@ -111,26 +112,20 @@ def test_parse_options(pickle_module):
                                  unexpected_field_behavior="ignore")
 
 
-class BaseTestJSONRead:
-
+class BaseTestJSON(abc.ABC):
+    @abc.abstractmethod
     def read_bytes(self, b, **kwargs):
-        return self.read_json(pa.py_buffer(b), **kwargs)
+        """
+        :param b: bytes to be parsed
+        :param kwargs: arguments passed on to open the json file
+        :return: b parsed as a single Table
+        """
+        raise NotImplementedError
 
     def check_names(self, table, names):
         assert table.num_columns == len(names)
         assert [c.name for c in table.columns] == names
 
-    def test_file_object(self):
-        data = b'{"a": 1, "b": 2}\n'
-        expected_data = {'a': [1], 'b': [2]}
-        bio = io.BytesIO(data)
-        table = self.read_json(bio)
-        assert table.to_pydict() == expected_data
-        # Text files not allowed
-        sio = io.StringIO(data.decode())
-        with pytest.raises(TypeError):
-            self.read_json(sio)
-
     def test_block_sizes(self):
         rows = b'{"a": 1}\n{"a": 2}\n{"a": 3}'
         read_options = ReadOptions()
@@ -229,25 +224,6 @@ def test_empty_rows(self):
         assert table.num_columns == 0
         assert table.num_rows == 2
 
-    def test_reconcile_across_blocks(self):
-        # ARROW-12065: reconciling inferred types across blocks
-        first_row = b'{                               }\n'
-        read_options = ReadOptions(block_size=len(first_row))
-        for next_rows, expected_pylist in [
-            (b'{"a": 0}', [None, 0]),
-            (b'{"a": []}', [None, []]),
-            (b'{"a": []}\n{"a": [[1]]}', [None, [], [[1]]]),
-            (b'{"a": {}}', [None, {}]),
-            (b'{"a": {}}\n{"a": {"b": {"c": 1}}}',
-             [None, {"b": None}, {"b": {"c": 1}}]),
-        ]:
-            table = self.read_bytes(first_row + next_rows,
-                                    read_options=read_options)
-            expected = {"a": expected_pylist}
-            assert table.to_pydict() == expected
-            # Check that the issue was exercised
-            assert table.column("a").num_chunks > 1
-
     def test_explicit_schema_decimal(self):
         rows = (b'{"a": 1}\n'
                 b'{"a": 1.45}\n'
@@ -339,6 +315,281 @@ def test_stress_block_sizes(self):
                         assert table.to_pydict() == expected.to_pydict()
 
 
+class BaseTestJSONRead(BaseTestJSON):
+
+    def read_bytes(self, b, **kwargs):
+        return self.read_json(pa.py_buffer(b), **kwargs)
+
+    def test_file_object(self):
+        data = b'{"a": 1, "b": 2}\n'
+        expected_data = {'a': [1], 'b': [2]}
+        bio = io.BytesIO(data)
+        table = self.read_json(bio)
+        assert table.to_pydict() == expected_data
+        # Text files not allowed
+        sio = io.StringIO(data.decode())
+        with pytest.raises(TypeError):
+            self.read_json(sio)
+
+    def test_reconcile_across_blocks(self):
+        # ARROW-12065: reconciling inferred types across blocks
+        first_row = b'{                               }\n'
+        read_options = ReadOptions(block_size=len(first_row))
+        for next_rows, expected_pylist in [
+            (b'{"a": 0}', [None, 0]),
+            (b'{"a": []}', [None, []]),
+            (b'{"a": []}\n{"a": [[1]]}', [None, [], [[1]]]),
+            (b'{"a": {}}', [None, {}]),
+            (b'{"a": {}}\n{"a": {"b": {"c": 1}}}',
+             [None, {"b": None}, {"b": {"c": 1}}]),
+        ]:
+            table = self.read_bytes(first_row + next_rows,
+                                    read_options=read_options)
+            expected = {"a": expected_pylist}
+            assert table.to_pydict() == expected
+            # Check that the issue was exercised
+            assert table.column("a").num_chunks > 1
+
+
+class BaseTestStreamingJSONRead(BaseTestJSON):
+    def open_json(self, json, *args, **kwargs):
+        """
+        Reads the JSON file into memory using pyarrow's open_json
+        json The JSON bytes
+        args Positional arguments to be forwarded to pyarrow's open_json
+        kwargs Keyword arguments to be forwarded to pyarrow's open_json
+        """
+        read_options = kwargs.setdefault('read_options', ReadOptions())
+        read_options.use_threads = self.use_threads
+        return open_json(json, *args, **kwargs)
+
+    def open_bytes(self, b, **kwargs):
+        return self.open_json(pa.py_buffer(b), **kwargs)
+
+    def check_reader(self, reader, expected_schema, expected_data):
+        assert reader.schema == expected_schema
+        batches = list(reader)
+        assert len(batches) == len(expected_data)
+        for batch, expected_batch in zip(batches, expected_data):
+            batch.validate(full=True)
+            assert batch.schema == expected_schema
+            assert batch.to_pydict() == expected_batch
+
+    def read_bytes(self, b, **kwargs):
+        return self.open_bytes(b, **kwargs).read_all()
+
+    def test_file_object(self):
+        data = b'{"a": 1, "b": 2}\n'
+        expected_data = {'a': [1], 'b': [2]}
+        bio = io.BytesIO(data)
+        reader = self.open_json(bio)
+        expected_schema = pa.schema([('a', pa.int64()),
+                                     ('b', pa.int64())])
+        self.check_reader(reader, expected_schema, [expected_data])
+
+    def test_bad_first_chunk(self):
+        bad_first_chunk = b'{"i": 0            }\n{"i": 1}'
+        read_options = ReadOptions()
+        read_options.block_size = 3
+        with pytest.raises(
+            pa.ArrowInvalid,
+            match="straddling object straddles two block boundaries*"
+        ):
+            self.open_bytes(bad_first_chunk, read_options=read_options)
+
+    def test_bad_middle_chunk(self):
+        bad_middle_chunk = b'{"i": 0}\n{"i":     1}\n{"i": 2}'
+        read_options = ReadOptions()
+        read_options.block_size = 10
+        expected_schema = pa.schema([('i', pa.int64())])
+
+        reader = self.open_bytes(bad_middle_chunk, read_options=read_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == {
+            'i': [0]
+        }
+        with pytest.raises(
+            pa.ArrowInvalid,
+            match="straddling object straddles two block boundaries*"
+        ):
+            reader.read_next_batch()
+
+        with pytest.raises(StopIteration):
+            reader.read_next_batch()
+
+    def test_bad_first_parse(self):
+        bad_first_block = b'{"n": }\n{"n": 10000}'
+        read_options = ReadOptions()
+        read_options.block_size = 16
+        with pytest.raises(pa.ArrowInvalid,
+                           match="JSON parse error: Invalid value.*"):
+            self.open_bytes(bad_first_block, read_options=read_options)
+
+    def test_bad_middle_parse_after_empty(self):
+        bad_first_block = b'{            }{"n": }\n{"n": 10000}'
+        read_options = ReadOptions()
+        read_options.block_size = 16
+        with pytest.raises(pa.ArrowInvalid,
+                           match="JSON parse error: Invalid value.*"):
+            self.open_bytes(bad_first_block, read_options=read_options)
+
+    def test_bad_middle_parse(self):
+        bad_middle_chunk = b'{"n": 1000}\n{"n": 200 00}\n{"n": 3000}'
+        read_options = ReadOptions()
+        read_options.block_size = 10
+        expected_schema = pa.schema([('n', pa.int64())])
+
+        reader = self.open_bytes(bad_middle_chunk, read_options=read_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == {
+            'n': [1000]
+        }
+        with pytest.raises(
+            pa.ArrowInvalid,
+            match="JSON parse error:\
+ Missing a comma or '}' after an object member*"
+        ):
+            reader.read_next_batch()
+
+        with pytest.raises(StopIteration):
+            reader.read_next_batch()
+
+    def test_non_linewise_chunker_first_block(self):
+        bad_middle_chunk = b'{"n": 0}{1}\n{"n": 2}'
+        read_options = ReadOptions(block_size=10)
+        parse_options = ParseOptions(newlines_in_values=True)
+        expected_schema = pa.schema([('n', pa.int64())])
+
+        reader = self.open_bytes(
+            bad_middle_chunk,
+            read_options=read_options,
+            parse_options=parse_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == {
+            'n': [0]
+        }
+        with pytest.raises(pa.ArrowInvalid,
+                           match="JSON parse error *"):
+            reader.read_next_batch()
+
+        with pytest.raises(StopIteration):
+            reader.read_next_batch()
+
+    def test_non_linewise_chunker_bad_first_block(self):
+        bad_middle_chunk = b'{"n": 0}{1}\n{"n": 2}'
+        read_options = ReadOptions(block_size=10)
+        parse_options = ParseOptions(newlines_in_values=True)
+        expected_schema = pa.schema([('n', pa.int64())])
+
+        reader = self.open_bytes(
+            bad_middle_chunk,
+            read_options=read_options,
+            parse_options=parse_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == {
+            'n': [0]
+        }
+        with pytest.raises(pa.ArrowInvalid,
+                           match="JSON parse error *"):
+            reader.read_next_batch()
+
+        with pytest.raises(StopIteration):
+            reader.read_next_batch()
+
+    def test_non_linewise_chunker_bad_middle_block(self):
+        bad_middle_chunk = b'{"n": 0}\n{"n":    1}\n{}"n":2}\n{"n": 3}'
+        read_options = ReadOptions(block_size=10)
+        parse_options = ParseOptions(newlines_in_values=True)
+        expected_schema = pa.schema([('n', pa.int64())])
+
+        reader = self.open_bytes(
+            bad_middle_chunk,
+            read_options=read_options,
+            parse_options=parse_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == {
+            'n': [0]
+        }
+        assert reader.read_next_batch().to_pydict() == {
+            'n': [1]
+        }
+
+        with pytest.raises(pa.ArrowInvalid,
+                           match="JSON parse error *"):
+            reader.read_next_batch()
+
+        with pytest.raises(StopIteration):
+            reader.read_next_batch()
+
+    def test_ignore_leading_empty_blocks(self):
+        leading_empty_chunk = b'    \n{"b": true, "s": "foo"}'
+        explicit_schema = pa.schema([
+            ('b', pa.bool_()),
+            ('s', pa.utf8())
+        ])
+        read_options = ReadOptions(block_size=24)
+        parse_options = ParseOptions(explicit_schema=explicit_schema)
+        expected_data = {
+            'b': [True], 's': ["foo"]
+        }
+
+        reader = self.open_bytes(
+            leading_empty_chunk,
+            read_options=read_options,
+            parse_options=parse_options)
+        self.check_reader(reader, explicit_schema, [expected_data])
+
+    def test_inference(self):
+        rows = b'{"a": 0, "b": "foo"    }\n\
+        {"a": 1, "c": true  }\n{"a": 2, "d": 4.0}'
+        expected_schema = pa.schema([
+            ('a', pa.int64()),
+            ('b', pa.utf8())
+        ])
+        expected_data = {'a': [0], 'b': ["foo"]}
+
+        read_options = ReadOptions(block_size=32)
+        parse_options = ParseOptions(unexpected_field_behavior="infer")
+        reader = self.open_bytes(
+            rows,
+            read_options=read_options,
+            parse_options=parse_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == expected_data
+        with pytest.raises(pa.ArrowInvalid,
+                           match="JSON parse error: unexpected field"):
+            reader.read_next_batch()
+
+        expected_schema = pa.schema([
+            ('a', pa.int64()),
+            ('b', pa.utf8()),
+            ('c', pa.bool_()),
+        ])
+        expected_data = {'a': [0, 1], 'b': ["foo", None], 'c': [None, True]}
+        read_options = ReadOptions(block_size=64)
+        reader = self.open_bytes(rows, read_options=read_options,
+                                 parse_options=parse_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == expected_data
+        with pytest.raises(pa.ArrowInvalid,
+                           match="JSON parse error: unexpected field"):
+            reader.read_next_batch()
+
+        expected_schema = pa.schema([
+            ('a', pa.int64()),
+            ('b', pa.utf8()),
+            ('c', pa.bool_()),
+            ('d', pa.float64()),
+        ])
+        expected_data = {'a': [0, 1, 2], 'b': ["foo", None, None],
+                         'c': [None, True, None], 'd': [None, None, 4.0]}
+        read_options = ReadOptions(block_size=96)
+        reader = self.open_bytes(rows, read_options=read_options,
+                                 parse_options=parse_options)
+        assert reader.schema == expected_schema
+        assert reader.read_next_batch().to_pydict() == expected_data
+
+
 class TestSerialJSONRead(BaseTestJSONRead, unittest.TestCase):
 
     def read_json(self, *args, **kwargs):
@@ -357,3 +608,14 @@ def read_json(self, *args, **kwargs):
         table = read_json(*args, **kwargs)
         table.validate(full=True)
         return table
+
+
+class TestSerialStreamingJSONRead(BaseTestStreamingJSONRead, unittest.TestCase):
+
+    use_threads = False
+
+
+@pytest.mark.threading
+class TestThreadedStreamingJSONRead(BaseTestStreamingJSONRead, unittest.TestCase):
+
+    use_threads = True
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index f356874c576ce..4ad04c9ad1ecb 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -598,6 +598,20 @@ def test_decimal_metadata(self):
         assert data_column['numpy_type'] == 'object'
         assert data_column['metadata'] == {'precision': 26, 'scale': 11}
 
+    @pytest.mark.parametrize('typ', [
+        pa.decimal32,
+        pa.decimal64,
+        pa.decimal128,
+        pa.decimal256,
+    ])
+    def test_decimal_other_bitwidts(self, typ):
+        df = pd.DataFrame({'a': [decimal.Decimal('3.14')]})
+        schema = pa.schema([pa.field('a', type=typ(4, 2))])
+        table = pa.Table.from_pandas(df, schema=schema)
+        col_meta = table.schema.pandas_metadata['columns'][0]
+        assert col_meta['pandas_type'] == 'decimal'
+        assert col_meta['metadata'] == {'precision': 4, 'scale': 2}
+
     def test_table_column_subset_metadata(self):
         # ARROW-1883
         # non-default index
@@ -2041,6 +2055,19 @@ def test_strided_objects(self, tmpdir):
         df = pd.DataFrame.from_dict(data)
         _check_pandas_roundtrip(df)
 
+    @pytest.mark.parametrize("typ", [
+        pa.decimal32,
+        pa.decimal64,
+        pa.decimal128,
+        pa.decimal256,
+    ])
+    def test_decimal_array_to_pandas(self, typ):
+        data = [decimal.Decimal('3.14'), None]
+        arr = pa.array(data, type=typ(3, 2))
+        result = arr.to_pandas()
+        expected = pd.Series(data)
+        tm.assert_series_equal(result, expected)
+
 
 class TestConvertListTypes:
     """
@@ -5228,6 +5255,13 @@ def roundtrip(df, schema=None):
               schema=schema)
 
 
+def test_bytes_column_name_to_pandas():
+    df = pd.DataFrame([[0.1, 0.2], [0.3, 0.4]], columns=[b'col1', b'col2'])
+    table = pa.Table.from_pandas(df)
+    assert table.column_names == ['col1', 'col2']
+    assert table.to_pandas().equals(df)
+
+
 @pytest.mark.processes
 def test_is_data_frame_race_condition():
     # See https://github.com/apache/arrow/issues/39313
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 3f4a53c473e7e..29db36eddc715 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -786,6 +786,22 @@ def test_map(pickle_module):
     restored = pickle_module.loads(pickle_module.dumps(s))
     assert restored.equals(s)
 
+    assert s.as_py(maps_as_pydicts="strict") == {'a': 1, 'b': 2}
+
+
+def test_map_duplicate_fields():
+    ty = pa.map_(pa.string(), pa.int8())
+    v = [('a', 1), ('a', 2)]
+    s = pa.scalar(v, type=ty)
+
+    assert s.as_py(maps_as_pydicts=None) == v
+
+    with pytest.raises(KeyError):
+        assert s.as_py(maps_as_pydicts="strict")
+
+    with pytest.warns(match="Encountered key 'a' which was already encountered"):
+        assert s.as_py(maps_as_pydicts="lossy") == {'a': 2}
+
 
 def test_dictionary(pickle_module):
     indices = pa.array([2, None, 1, 2, 0, None])
@@ -898,3 +914,15 @@ def test_map_scalar_as_py_with_custom_field_name():
             pa.field("custom_value", pa.string()),
         ),
     ).as_py() == [("foo", "bar")]
+
+
+def test_nested_map_types_with_maps_as_pydicts():
+    ty = pa.struct([
+        pa.field('x', pa.map_(pa.string(), pa.int8())),
+        pa.field('y', pa.list_(pa.map_(pa.string(), pa.int8()))),
+    ])
+
+    v = {'x': {'a': 1}, 'y': [{'b': 2}, {'c': 3}]}
+    s = pa.scalar(v, type=ty)
+
+    assert s.as_py(maps_as_pydicts="strict") == v
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index b6d36787fbd37..ecf24b1796b69 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -75,8 +75,12 @@ def test_type_to_pandas_dtype():
         (pa.date64(), M8),
         (pa.timestamp('ms'), M8),
         (pa.binary(), np.object_),
+        (pa.large_binary(), np.object_),
+        (pa.binary_view(), np.object_),
         (pa.binary(12), np.object_),
         (pa.string(), np.object_),
+        (pa.large_string(), np.object_),
+        (pa.string_view(), np.object_),
         (pa.list_(pa.int8()), np.object_),
         # (pa.list_(pa.int8(), 2), np.object_),  # TODO needs pandas conversion
         (pa.map_(pa.int64(), pa.float64()), np.object_),
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 4c058ccecda5e..180ae7b4c1a72 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1888,6 +1888,26 @@ def test_table_unify_dictionaries():
     assert table.schema.metadata == {b"key1": b"value1"}
 
 
+def test_table_maps_as_pydicts():
+    arrays = [
+        pa.array(
+            [{'x': 1, 'y': 2}, {'z': 3}],
+            type=pa.map_(pa.string(), pa.int32())
+        )
+    ]
+    table = pa.Table.from_arrays(arrays, names=['a'])
+
+    table_dict = table.to_pydict(maps_as_pydicts="strict")
+    assert 'a' in table_dict
+    column_list = table_dict['a']
+    assert len(column_list) == 2
+    assert column_list == [{'x': 1, 'y': 2}, {'z': 3}]
+
+    table_list = table.to_pylist(maps_as_pydicts="strict")
+    assert len(table_list) == 2
+    assert table_list == [{'a': {'x': 1, 'y': 2}}, {'a': {'z': 3}}]
+
+
 def test_concat_tables():
     data = [
         list(range(5)),
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 3caf068a4c9b1..27d63a67fedbd 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -69,8 +69,12 @@ def _get_pandas_type_map():
                 'ns': np.dtype('timedelta64[ns]'),
             },
             _Type_BINARY: np.object_,
+            _Type_LARGE_BINARY: np.object_,
+            _Type_BINARY_VIEW: np.object_,
             _Type_FIXED_SIZE_BINARY: np.object_,
             _Type_STRING: np.object_,
+            _Type_LARGE_STRING: np.object_,
+            _Type_STRING_VIEW: np.object_,
             _Type_LIST: np.object_,
             _Type_MAP: np.object_,
             _Type_DECIMAL32: np.object_,
diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
index 89780da10f7cf..5878d1f902627 100644
--- a/python/pyarrow/util.py
+++ b/python/pyarrow/util.py
@@ -230,6 +230,20 @@ def _break_traceback_cycle_from_frame(frame):
     refs = frame = this_frame = None
 
 
+def _download_urllib(url, out_path):
+    from urllib.request import urlopen
+    with urlopen(url) as response:
+        with open(out_path, 'wb') as f:
+            f.write(response.read())
+
+
+def _download_requests(url, out_path):
+    import requests
+    with requests.get(url) as response:
+        with open(out_path, 'wb') as f:
+            f.write(response.content)
+
+
 def download_tzdata_on_windows():
     r"""
     Download and extract latest IANA timezone database into the
@@ -240,19 +254,23 @@ def download_tzdata_on_windows():
 
     import tarfile
 
+    tzdata_url = "https://data.iana.org/time-zones/tzdata-latest.tar.gz"
     tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
-    tzdata_compressed = os.path.join(tzdata_path, "tzdata.tar.gz")
+    tzdata_compressed_path = os.path.join(tzdata_path, "tzdata.tar.gz")
+    windows_zones_url = "https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml"  # noqa
+    windows_zones_path = os.path.join(tzdata_path, "windowsZones.xml")
     os.makedirs(tzdata_path, exist_ok=True)
 
-    from urllib.request import urlopen
-    with urlopen('https://data.iana.org/time-zones/tzdata-latest.tar.gz') as response:
-        with open(tzdata_compressed, 'wb') as f:
-            f.write(response.read())
-
-    assert os.path.exists(tzdata_compressed)
+    # Try to download the files with requests and then fall back to urllib. This
+    # works around possible issues in certain older environment (GH-45295)
+    try:
+        _download_requests(tzdata_url, tzdata_compressed_path)
+        _download_requests(windows_zones_url, windows_zones_path)
+    except ImportError:
+        _download_urllib(tzdata_url, tzdata_compressed_path)
+        _download_urllib(windows_zones_url, windows_zones_path)
 
-    tarfile.open(tzdata_compressed).extractall(tzdata_path)
+    assert os.path.exists(tzdata_compressed_path)
+    assert os.path.exists(windows_zones_path)
 
-    with urlopen('https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml') as response_zones:   # noqa
-        with open(os.path.join(tzdata_path, "windowsZones.xml"), 'wb') as f:
-            f.write(response_zones.read())
+    tarfile.open(tzdata_compressed_path).extractall(tzdata_path)
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index c051efb0aaab1..ad81586d37efa 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -4,6 +4,7 @@ hypothesis
 pytest
 pytz
 pyuwsgi; sys.platform != 'win32' and python_version < '3.13'
+requests; sys_platform == 'win32'
 tzdata; sys_platform == 'win32'
 
 # We generally test with the oldest numpy version that supports a given Python
diff --git a/r/.lintr b/r/.lintr
index d920f77e9bbd3..6c1674689fa3d 100644
--- a/r/.lintr
+++ b/r/.lintr
@@ -1,23 +1,8 @@
-license:  '#  Licensed to the Apache Software Foundation (ASF) under one
-  #  or more contributor license agreements.  See the NOTICE file
-  #  distributed with this work for additional information
-  #  regarding copyright ownership.  The ASF licenses this file
-  #  to you under the Apache License, Version 2.0 (the
-  #  "License"); you may not use this file except in compliance
-  #  with the License.  You may obtain a copy of the License at
-  #
-  #   http://www.apache.org/licenses/LICENSE-2.0
-  #
-  #  Unless required by applicable law or agreed to in writing,
-  #  software distributed under the License is distributed on an
-  #  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  #  KIND, either express or implied.  See the License for the
-  #  specific language governing permissions and limitations
-  #  under the License.'
 linters: linters_with_defaults(
   indentation_linter = NULL,
   line_length_linter = line_length_linter(120),
   object_name_linter = NULL,
+  return_linter = NULL,
   # Even with a liberal definition of name styles, some of our names cause issues due to `.`s for s3 classes or NA in the name
   # TODO: figure out if we con contribute to lintr to make these work
   # object_name_linter = object_name_linter(styles = c("snake_case", "camelCase", "CamelCase", "symbols", "dotted.case", "UPPERCASE", "SNAKE_CASE")),
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 1500978a0ff87..2203e507e42fc 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 19.0.0.9000
+Version: 19.0.1.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index fbe66aa14bf68..5caec65df25fe 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,9 @@
   under the License.
 -->
 
-# arrow 19.0.0.9000
+# arrow 19.0.1.9000
+
+# arrow 19.0.1
 
 # arrow 19.0.0
 
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 00bbafb55c5e3..8382c36775e8f 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -43,6 +43,19 @@ template:
         })();
       </script>
       <!-- End Matomo Code -->
+      <!-- Kapa AI -->
+      <script
+          async
+          src="https://widget.kapa.ai/kapa-widget.bundle.js"
+          data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
+          data-project-name="Apache Arrow"
+          data-project-color="#000000"
+          data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
+          data-modal-disclaimer="This is a custom LLM with access to all of [Arrow documentation](https://arrow.apache.org/docs/).  If you want an R-specific answer, please mention this in your question."
+          data-consent-required="true" 
+          data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
+      ></script>
+      <!-- End Kapa AI -->
   opengraph:
     image:
       src: https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png
diff --git a/r/configure.win b/r/configure.win
index b6ac19faea2d4..e0682917e9b17 100755
--- a/r/configure.win
+++ b/r/configure.win
@@ -61,10 +61,9 @@ function configure_binaries() {
   OPENSSL_LIBS="-lcrypto -lcrypt32"
   MIMALLOC_LIBS="-lbcrypt -lpsapi"
   BROTLI_LIBS="-lbrotlienc -lbrotlidec -lbrotlicommon" # Common goes last since dec and enc depend on it
-  AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management \
-            -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 \
-            -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common \
-            -luserenv -lversion -lws2_32 -lbcrypt -lwininet -lwinhttp"
+  # We build aws-sdk-cpp bundled now, so the AWS libs are included in arrow_bundled_dependencies
+  # but we also need to include these Windows system libraries
+  AWS_LIBS="-luserenv -lversion -lws2_32 -lbcrypt -lwininet -lwinhttp -lsecur32 -lshlwapi -lncrypt"
   # pkg-config --libs libcurl
   GCS_LIBS="-lcurl -lnormaliz -lssh2 -lgdi32 -lssl -lcrypto -lcrypt32 -lwldap32 \
             -lz -lws2_32 -lnghttp2 -ldbghelp"
@@ -88,7 +87,7 @@ function configure_binaries() {
   PKG_LIBS="-L${RWINLIB}/lib"'$(subst gcc,,$(COMPILED_BY))$(R_ARCH) '
   PKG_LIBS="$PKG_LIBS -L${RWINLIB}/lib"'$(R_ARCH)$(CRT) '
   PKG_LIBS="$PKG_LIBS -larrow_dataset -larrow_acero -lparquet -larrow -larrow_bundled_dependencies \
-            -lutf8proc -lthrift -lsnappy -lz -lzstd -llz4 -lbz2 ${BROTLI_LIBS} -lole32 \
+            -lutf8proc -lsnappy -lz -lzstd -llz4 -lbz2 ${BROTLI_LIBS} -lole32 \
             ${MIMALLOC_LIBS} ${OPENSSL_LIBS}"
 
   # S3, GCS, and re2 support only for Rtools40 (i.e. R >= 4.0)
diff --git a/r/pkgdown/assets/versions.html b/r/pkgdown/assets/versions.html
index cbeff74fa4ce4..db8a97badb97f 100644
--- a/r/pkgdown/assets/versions.html
+++ b/r/pkgdown/assets/versions.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html>
-<body><p><a href="../dev/r/">19.0.0.9000 (dev)</a></p>
-<p><a href="../r/">19.0.0 (release)</a></p>
+<body><p><a href="../dev/r/">19.0.1.9000 (dev)</a></p>
+<p><a href="../r/">19.0.1 (release)</a></p>
 <p><a href="../18.1/r/">18.1.0</a></p>
 <p><a href="../17.0/r/">17.0.0</a></p>
 <p><a href="../16.1/r/">16.1.0</a></p>
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json
index 105c4d2f3d0e2..ea0d663b2a6fb 100644
--- a/r/pkgdown/assets/versions.json
+++ b/r/pkgdown/assets/versions.json
@@ -1,10 +1,10 @@
 [
     {
-        "name": "19.0.0.9000 (dev)",
+        "name": "19.0.1.9000 (dev)",
         "version": "dev/"
     },
     {
-        "name": "19.0.0 (release)",
+        "name": "19.0.1 (release)",
         "version": ""
     },
     {
diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp
index 95f19e4f2f256..d941ce4d3ff23 100644
--- a/r/src/filesystem.cpp
+++ b/r/src/filesystem.cpp
@@ -258,7 +258,7 @@ std::string fs___SubTreeFileSystem__base_path(
 
 // [[arrow::export]]
 cpp11::writable::list fs___FileSystemFromUri(const std::string& path) {
-  using cpp11::literals::operator"" _nm;
+  using cpp11::literals::operator""_nm;
 
   std::string out_path;
   auto io_context = MainRThread::GetInstance().CancellableIOContext();
diff --git a/r/src/safe-call-into-r-impl.cpp b/r/src/safe-call-into-r-impl.cpp
index 92dce7e0ba0a7..c2fa1e1eac6b0 100644
--- a/r/src/safe-call-into-r-impl.cpp
+++ b/r/src/safe-call-into-r-impl.cpp
@@ -45,13 +45,7 @@ bool SetEnableSignalStopSource(bool enabled) {
 }
 
 // [[arrow::export]]
-bool CanRunWithCapturedR() {
-#if defined(HAS_UNWIND_PROTECT)
-  return MainRThread::GetInstance().Executor() == nullptr;
-#else
-  return false;
-#endif
-}
+bool CanRunWithCapturedR() { return MainRThread::GetInstance().Executor() == nullptr; }
 
 // [[arrow::export]]
 std::string TestSafeCallIntoR(cpp11::function r_fun_that_returns_a_string,
diff --git a/r/src/safe-call-into-r.h b/r/src/safe-call-into-r.h
index 0ffd1d16dca01..c00d13b2fbff9 100644
--- a/r/src/safe-call-into-r.h
+++ b/r/src/safe-call-into-r.h
@@ -29,12 +29,8 @@
 #include <functional>
 #include <thread>
 
-// Unwind protection was added in R 3.5 and some calls here use it
-// and crash R in older versions (ARROW-16201). Implementation provided
-// in safe-call-into-r-impl.cpp so that we can skip some tests
-// when this feature is not provided. This also checks that there
-// is not already an event loop registered (via MainRThread::Executor()),
-// because only one of these can exist at any given time.
+// This checks that there is not already an event loop registered (via
+// MainRThread::Executor()), because only one of these can exist at any given time.
 bool CanRunWithCapturedR();
 
 // The MainRThread class keeps track of the thread on which it is safe
diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R
index f658c531e78b5..51e56278f928b 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -142,20 +142,11 @@ test_that("Properties of collapsed query", {
     summarize(total = sum(int, na.rm = TRUE)) %>%
     mutate(extra = total * 5)
 
-  # print(tbl %>%
-  #   filter(dbl > 2) %>%
-  #   select(chr, int, lgl) %>%
-  #   mutate(twice = int * 2L) %>%
-  #   group_by(lgl) %>%
-  #   summarize(total = sum(int, na.rm = TRUE)) %>%
-  #   mutate(extra = total * 5))
-
-  #   # A tibble: 3 × 3
+  #  # A tibble: 2 × 3
   #   lgl   total extra
   #   <lgl> <int> <dbl>
-  # 1 FALSE     8    40
-  # 2 TRUE      8    40
-  # 3 NA       25   125
+  # 1 TRUE      5    25
+  # 2 NA       36   180
 
   # Avoid evaluating just for nrow
   expect_identical(dim(q), c(NA_integer_, 3L))
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 6c68133cd893d..2de7653643bb6 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -84,10 +84,10 @@ try_download <- function(from_url, to_file, hush = quietly) {
     return(FALSE)
   }
   # We download some fairly large files, so ensure the timeout is set appropriately.
-  # This assumes a static library size of 100 MB (generous) and a download speed
-  # of .3 MB/s (slow). This is to anticipate slower user connections or load on
-  # artifactory servers.
-  opts <- options(timeout = max(300, getOption("timeout")))
+  # This assumes a static library size of 100 MB (our current biggest is 78 MB) and
+  # a download speed of 0.2 MB/s (slow). This is to anticipate slower user connections
+  # or load on artifactory servers.
+  opts <- options(timeout = max(600, getOption("timeout")))
   on.exit(options(opts))
 
   status <- try(
@@ -635,12 +635,12 @@ build_libarrow <- function(src_dir, dst_dir) {
   invisible(status)
 }
 
-ensure_cmake <- function(cmake_minimum_required = "3.16") {
+ensure_cmake <- function(cmake_minimum_required = "3.25") {
   cmake <- find_cmake(version_required = cmake_minimum_required)
 
   if (is.null(cmake)) {
     # If not found, download it
-    CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4")
+    CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.31.2")
     if (on_macos) {
       postfix <- "-macos-universal.tar.gz"
     } else if (tolower(Sys.info()[["machine"]]) %in% c("arm64", "aarch64")) {
@@ -698,7 +698,7 @@ find_cmake <- function(paths = c(
                          if (on_macos) "/Applications/CMake.app/Contents/bin/cmake",
                          Sys.which("cmake3")
                        ),
-                       version_required = "3.16") {
+                       version_required) {
   # Given a list of possible cmake paths, return the first one that exists and is new enough
   # version_required should be a string or packageVersion; numeric version
   # can be misleading (e.g. 3.10 is actually 3.1)
@@ -715,7 +715,7 @@ find_cmake <- function(paths = c(
       } else {
         # Keep trying
         lg("Not using cmake found at %s", path, .indent = "****")
-        if (found_version > 0) {
+        if (found_version > "0") {
           lg("Version >= %s required; found %s", version_required, found_version, .indent = "*****")
         } else {
           # If cmake_version() couldn't determine version, it returns 0
@@ -737,7 +737,7 @@ cmake_version <- function(cmd = "cmake") {
       package_version(sub(pat, "\\1", raw_version[which_line]))
     },
     error = function(e) {
-      return(0)
+      return("0")
     }
   )
 }
diff --git a/r/tools/update-checksums.R b/r/tools/update-checksums.R
index f41652e87849e..5502c9c9cee6b 100644
--- a/r/tools/update-checksums.R
+++ b/r/tools/update-checksums.R
@@ -28,6 +28,13 @@ args <- commandArgs(TRUE)
 VERSION <- args[1]
 tools_root <- ""
 
+# Use gsed on macOS and sed otherwise
+if (identical(unname(Sys.info()["sysname"]), "Darwin")) {
+  SED_BIN <- "gsed"
+} else {
+  SED_BIN <- "sed"
+}
+
 if (length(args) != 1) {
   stop("Usage: Rscript tools/update-checksums.R <version>")
 }
@@ -62,7 +69,7 @@ for (path in binary_paths) {
   if (grepl("windows", path)) {
     cat(paste0("Converting ", path, " to windows style line endings\n"))
     # UNIX style line endings cause errors with mysys2 sha512sum
-    sed_status <- system2("sed", args = c("-i", "s/\\\\r//", file))
+    sed_status <- system2(SED_BIN, args = c("-i", "s/\\\\r//", file))
     if (sed_status != 0) {
       stop("Failed to remove \\r from windows checksum file. Exit code: ", sed_status)
     }
diff --git a/ruby/red-arrow/test/each-raw-record/test-map-array.rb b/ruby/red-arrow/test/each-raw-record/test-map-array.rb
index f256543a853e3..2637086b209ab 100644
--- a/ruby/red-arrow/test/each-raw-record/test-map-array.rb
+++ b/ruby/red-arrow/test/each-raw-record/test-map-array.rb
@@ -269,9 +269,10 @@ def test_time64_micro
 
   def test_time64_nano
     unit = Arrow::TimeUnit::NANO
+    # 00:10:00.123456789
+    value = Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)
     records = [
-      # 00:10:00.123456789
-      [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}],
+      [{"key1" => value, "key2" => nil}],
       [nil],
     ]
     target = build({
diff --git a/ruby/red-arrow/test/raw-records/test-map-array.rb b/ruby/red-arrow/test/raw-records/test-map-array.rb
index c9d3f06ee0402..c7137c07f06ce 100644
--- a/ruby/red-arrow/test/raw-records/test-map-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-map-array.rb
@@ -269,9 +269,10 @@ def test_time64_micro
 
   def test_time64_nano
     unit = Arrow::TimeUnit::NANO
+    # 00:10:00.123456789
+    value = Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)
     records = [
-      # 00:10:00.123456789
-      [{"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil}],
+      [{"key1" => value, "key2" => nil}],
       [nil],
     ]
     target = build({
diff --git a/ruby/red-arrow/test/test-array.rb b/ruby/red-arrow/test/test-array.rb
index 6dbf7e52e8eb4..f566a40ef1be8 100644
--- a/ruby/red-arrow/test/test-array.rb
+++ b/ruby/red-arrow/test/test-array.rb
@@ -142,7 +142,7 @@ def setup
 
   sub_test_case("#take") do
     def setup
-      values = [1, 0 ,2]
+      values = [1, 0, 2]
       @array = Arrow::Int16Array.new(values)
     end
 
@@ -205,14 +205,14 @@ def setup
 
   sub_test_case("#concatenate") do
     test("Arrow::Array: same") do
-      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
+      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4, 5, 6]),
                    Arrow::Int32Array.new([1, 2, nil]).
                      concatenate(Arrow::Int32Array.new([4, 5]),
                                  Arrow::Int32Array.new([6])))
     end
 
     test("Arrow::Array: castable") do
-      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
+      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4, 5, 6]),
                    Arrow::Int32Array.new([1, 2, nil]).
                      concatenate(Arrow::Int8Array.new([4, 5]),
                                  Arrow::UInt32Array.new([6])))
@@ -226,7 +226,7 @@ def setup
     end
 
     test("Array") do
-      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
+      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4, nil, 6]),
                    Arrow::Int32Array.new([1, 2, nil]).
                      concatenate([4, nil],
                                  [6]))
@@ -243,13 +243,13 @@ def setup
 
   sub_test_case("#+") do
     test("Arrow::Array: same") do
-      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
+      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4, 5, 6]),
                    Arrow::Int32Array.new([1, 2, nil]) +
                    Arrow::Int32Array.new([4, 5, 6]))
     end
 
     test("Arrow::Array: castable") do
-      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,5, 6]),
+      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4, 5, 6]),
                    Arrow::Int32Array.new([1, 2, nil]) +
                    Arrow::Int8Array.new([4, 5, 6]))
     end
@@ -262,7 +262,7 @@ def setup
     end
 
     test("Array") do
-      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4 ,nil, 6]),
+      assert_equal(Arrow::Int32Array.new([1, 2, nil, 4, nil, 6]),
                    Arrow::Int32Array.new([1, 2, nil]) +
                    [4, nil, 6])
     end
diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb
index a69e926156809..2117e60df9ca5 100644
--- a/ruby/red-arrow/test/test-table.rb
+++ b/ruby/red-arrow/test/test-table.rb
@@ -1521,10 +1521,10 @@ def setup
                                       ["key2_right", [100, 20]],
                                       ["string", ["1-100", "2-20"]],
                                     ]),
-                    table1.join(table2,
-                                ["key1", "key2"],
-                                left_suffix: "_left",
-                                right_suffix: "_right"))
+                   table1.join(table2,
+                               ["key1", "key2"],
+                               left_suffix: "_left",
+                               right_suffix: "_right"))
     end
   end
 end
diff --git a/ruby/red-arrow/test/values/test-map-array.rb b/ruby/red-arrow/test/values/test-map-array.rb
index 401c2f33aaadb..54e7fecfcc658 100644
--- a/ruby/red-arrow/test/values/test-map-array.rb
+++ b/ruby/red-arrow/test/values/test-map-array.rb
@@ -261,9 +261,10 @@ def test_time64_micro
 
   def test_time64_nano
     unit = Arrow::TimeUnit::NANO
+    # 00:10:00.123456789
+    value = Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)
     values = [
-      # 00:10:00.123456789
-      {"key1" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789), "key2" => nil},
+      {"key1" => value, "key2" => nil},
       nil,
     ]
     target = build({