diff --git a/.github/actions/cmake-build/action.yml b/.github/actions/cmake-build/action.yml
new file mode 100644
index 00000000..dab19520
--- /dev/null
+++ b/.github/actions/cmake-build/action.yml
@@ -0,0 +1,45 @@
+name: CMake Build
+description: Build CMake Project
+
+inputs:
+  cmake:
+    description: Path to CMake executable
+    required: True
+  ninja:
+    description: Path to ninja executable
+    required: True
+  source:
+    description: Path to source directory
+    required: True
+  build:
+    description: Path to build directory
+    required: True
+  jobs:
+    description: Number of jobs to use
+    default: 1
+  config:
+    description: CMake configuration to build
+    default: RelWithDebInfo
+  args:
+    description: Extra arguments to pass CMake
+
+runs:
+  using: composite
+  steps:
+    - shell: pwsh
+      run: |
+        function Invoke-NativeCommand {
+          $command = $args[0]
+          $arguments = $args[1..($args.Length)]
+          & $command @arguments
+          if ($LastExitCode -ne 0) {
+            Write-Error "Exit code $LastExitCode while running $command $arguments"
+          }
+        }
+        if ($IsWindows) {
+          $vsPath = &"${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -Property InstallationPath
+          Import-Module (Get-ChildItem $vsPath -Recurse -File -Filter Microsoft.VisualStudio.DevShell.dll).FullName
+          Enter-VsDevShell -VsInstallPath $vsPath -SkipAutomaticLocation -DevCmdArguments '-arch=x64'
+        }
+        Invoke-NativeCommand '${{ inputs.cmake }}' '-S${{ inputs.source }}' '-B${{ inputs.build }}' '-GNinja Multi-Config' '-DCMAKE_MAKE_PROGRAM=${{ inputs.ninja }}' '-DCMAKE_INSTALL_PREFIX=${{ inputs.build }}/prefix' ${{ inputs.args }}
+        Invoke-NativeCommand '${{ inputs.cmake }}' --build '${{ inputs.build }}' --config '${{ inputs.config }}' -j${{ inputs.jobs }} '--' -k0
diff --git a/.github/actions/fetch-clang/action.yml b/.github/actions/fetch-clang/action.yml
new file mode 100644
index 00000000..730e4b84
--- /dev/null
+++ b/.github/actions/fetch-clang/action.yml
@@ -0,0 +1,66 @@
+name: Fetch Clang
+description: Puts clang's path into the output
+
+inputs:
+  version:
+    description: Version of Clang to fetch
+    required: true
+  base-directory:
+    description: Directory in which to install clang
+outputs:
+  clang:
+    description: Path of clang executable
+    value: ${{ steps.script.outputs.clang }}
+  clangxx:
+    description: Path of clang++ executable
+    value: ${{ steps.script.outputs.clangxx }}
+
+runs:
+  using: composite
+  steps:
+    - id: script
+      shell: pwsh
+      working-directory: ${{ inputs.base-directory }}
+      run: |
+        $version = ${{ inputs.version }}
+        function Invoke-NativeCommand {
+          $command = $args[0]
+          $arguments = $args[1..($args.Length)]
+          & $command @arguments
+          if ($LastExitCode -ne 0) {
+            Write-Error "Exit code $LastExitCode while running $command $arguments"
+          }
+        }
+        if ($IsMacOs) {
+        } elseif ($IsLinux) {
+          $tmp = New-TemporaryFile
+          Invoke-WebRequest -Uri 'https://apt.llvm.org/llvm-snapshot.gpg.key' -OutFile $tmp
+          Invoke-NativeCommand sudo apt-key add $tmp
+          $tmp | Remove-Item
+          Invoke-NativeCommand sudo add-apt-repository -y "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-${version} main"
+          Invoke-NativeCommand sudo apt-get update
+          $pkgs = @("clang-${version}", "libc++-${version}-dev", "libc++abi-${version}-dev")
+          if (${version} -eq 12) {
+            $pkgs += "libunwind-${version}-dev"
+          }
+          if (${version} -ge 14) {
+            $pkgs += "libclang-rt-${version}-dev"
+          }
+          Invoke-NativeCommand sudo apt-get install -y $pkgs
+          Add-Content "${env:GITHUB_OUTPUT}" "clang=$((Get-Command clang-${version}).Source)"
+          Add-Content "${env:GITHUB_OUTPUT}" "clangxx=$((Get-Command clang++-${version}).Source)"
+        } elseif ($IsWindows) {
+          $release = Invoke-WebRequest -Uri 'https://api.github.com/repos/llvm/llvm-project/releases' -UseBasicParsing |
+            ConvertFrom-Json |
+            Select-Object -Property @{Name = 'version'; Expression = {[System.Management.Automation.SemanticVersion]$_.tag_name.Substring('llvmorg-'.Length)}},assets |
+            Where-Object {$_.version.Major -eq $version -and ($_.assets | Where-Object {$_.name -like "LLVM-*-win64.exe"})} |
+            Sort-Object |
+            Select-Object -First 1
+          $uri = ($release.assets | Where-Object {$_.name -eq "LLVM-$($release.version)-win64.exe"}).browser_download_url
+          $tmp = New-TemporaryFile | Rename-Item -NewName { $_ -replace 'tmp$', 'exe' } –PassThru
+          Invoke-WebRequest -Uri $uri -OutFile $tmp
+          Start-Process "$tmp" -Wait -NoNewWindow -ArgumentList /S,"/D=$(Join-Path (Get-Location) LLVM)"
+          $tmp | Remove-Item
+          Add-Content "${env:GITHUB_OUTPUT}" "clang=$(Join-Path (Get-Location) LLVM bin clang)"
+          Add-Content "${env:GITHUB_OUTPUT}" "clangxx=$(Join-Path (Get-Location) LLVM bin clang++)"
+        }
diff --git a/.github/actions/fetch-cmake/action.yml b/.github/actions/fetch-cmake/action.yml
new file mode 100644
index 00000000..aab3b06a
--- /dev/null
+++ b/.github/actions/fetch-cmake/action.yml
@@ -0,0 +1,60 @@
+name: Fetch CMake
+description: Puts CMake's path into the output
+
+inputs:
+  version:
+    description: Version of CMake to fetch
+    default: 3.24.2
+  base-directory:
+    description: Directory in which to install CMake
+outputs:
+  cmake:
+    description: Path of CMake executable
+    value: ${{ steps.script.outputs.cmake }}
+  ctest:
+    description: Path of CTest executable
+    value: ${{ steps.script.outputs.ctest }}
+
+runs:
+  using: composite
+  steps:
+    - id: script
+      shell: pwsh
+      working-directory: ${{ inputs.base-directory }}
+      run: |
+        $version = '${{ inputs.version }}'
+        $oldVersion = [System.Version]$version -lt [System.Version]'3.20.0'
+        $arch = 'x86_64'
+        $ext = 'tar.gz'
+        $binDir = 'bin'
+        if ($IsMacOs) {
+          if ($oldVersion) {
+            $os = 'Darwin'
+          } else {
+            $os = 'macos'
+            $arch = 'universal'
+          }
+          $binDir = 'CMake.app/Contents/bin'
+        } elseif ($IsLinux) {
+          if ($oldVersion) {
+            $os = 'Linux'
+          } else {
+            $os = 'linux'
+          }
+        } elseif ($IsWindows) {
+          if ($oldVersion) {
+            $os = 'win64'
+            $arch = 'x64'
+          } else {
+            $os = 'windows'
+          }
+          $ext = 'zip'
+        }
+        $base = "cmake-${version}-${os}-${arch}"
+        $uri = "https://github.com/Kitware/CMake/releases/download/v${version}/${base}.${ext}"
+        $tmp = New-TemporaryFile
+        Invoke-WebRequest -Uri $uri -OutFile $tmp
+        cmake -E tar xf $tmp
+        $tmp | Remove-Item
+        Add-Content "${env:GITHUB_OUTPUT}" "cmake=$(Join-Path (Get-Location) $base $binDir cmake)"
+        Add-Content "${env:GITHUB_OUTPUT}" "ctest=$(Join-Path (Get-Location) $base $binDir ctest)"
diff --git a/.github/actions/fetch-libstdc++/action.yml b/.github/actions/fetch-libstdc++/action.yml
new file mode 100644
index 00000000..22c5b23f
--- /dev/null
+++ b/.github/actions/fetch-libstdc++/action.yml
@@ -0,0 +1,23 @@
+name: Fetch libstdc++
+description: Fetches libstdc++
+
+inputs:
+  version:
+    description: Version of libstdc++ to fetch
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - shell: pwsh
+      run: |
+        function Invoke-NativeCommand {
+          $command = $args[0]
+          $arguments = $args[1..($args.Length)]
+          & $command @arguments
+          if ($LastExitCode -ne 0) {
+            Write-Error "Exit code $LastExitCode while running $command $arguments"
+          }
+        }
+        Invoke-NativeCommand sudo apt-get update
+        Invoke-NativeCommand sudo apt-get install -y libstdc++-${{ inputs.version }}-dev
diff --git a/.github/actions/fetch-ninja/action.yml b/.github/actions/fetch-ninja/action.yml
new file mode 100644
index 00000000..af43be50
--- /dev/null
+++ b/.github/actions/fetch-ninja/action.yml
@@ -0,0 +1,35 @@
+name: Fetch Ninja
+description: Puts ninja's path into the output
+
+inputs:
+  version:
+    description: Version of Ninja to fetch
+    default: 1.11.1
+  base-directory:
+    description: Directory in which to install Ninja
+outputs:
+  ninja:
+    description: Path of ninja executable
+    value: ${{ steps.script.outputs.ninja }}
+
+runs:
+  using: composite
+  steps:
+    - id: script
+      shell: pwsh
+      working-directory: ${{ inputs.base-directory }}
+      run: |
+        $version = '${{ inputs.version }}'
+        if ($IsMacOs) {
+          $os = 'mac'
+        } elseif ($IsLinux) {
+          $os = 'linux'
+        } elseif ($IsWindows) {
+          $os = 'win'
+        }
+        $uri = "https://github.com/ninja-build/ninja/releases/download/v${version}/ninja-${os}.zip"
+        $tmp = New-TemporaryFile
+        Invoke-WebRequest -Uri $uri -OutFile $tmp
+        cmake -E tar xf $tmp
+        $tmp | Remove-Item
+        Add-Content "${env:GITHUB_OUTPUT}" "ninja=$(Join-Path (Get-Location) ninja)"
diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml
new file mode 100644
index 00000000..f9490bd5
--- /dev/null
+++ b/.github/actions/run-tests/action.yml
@@ -0,0 +1,26 @@
+name: Run Tests
+description: Run Tests
+
+inputs:
+  ctest:
+    description: Path to CTest executable
+    required: True
+  test-dir:
+    description: Path to test directory
+    required: True
+  attempts:
+    description: Number of attempts to run per test
+    default: 3
+  jobs:
+    description: Number of jobs to use
+    default: 1
+  config:
+    description: CTest configuration to test
+    default: RelWithDebInfo
+
+runs:
+  using: composite
+  steps:
+    - shell: pwsh
+      run: |
+        & '${{ inputs.ctest }}' --test-dir '${{ inputs.test-dir }}' -C ${{ inputs.config }} -V -j${{ inputs.jobs }} --repeat until-pass:${{ inputs.attempts }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a7eccc31..fe8c5bf0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,97 +14,108 @@ on:
   workflow_dispatch: ~
 
 env:
-  CMAKE_VERSION: 3.18.4
-  NINJA_VERSION: 1.10.1
   CTEST_OUTPUT_ON_FAILURE: 1
   NINJA_STATUS: '[%f/%t %o/sec] '
 
 jobs:
+  build-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      tests-matrix: ${{ steps.script.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v2  
+      - id: script
+        shell: pwsh
+        run: |
+          $json = Get-Content -Raw .github/workflows/test_matrix.json | ConvertFrom-Json
+          Add-Content "${env:GITHUB_OUTPUT}" "matrix=$(ConvertTo-Json $json -Compress)"
   tests:
+    needs: build-matrix
     strategy:
+      fail-fast: false
       matrix:
-        conf:
-          - name: Ubuntu (Clang 12 - TSAN)
-            os: ubuntu-20.04
-            cc: clang-12
-            cxx: clang++-12
-            tsan: YES
-
-          - name: Ubuntu (Clang 12 - no TSAN)
-            os: ubuntu-20.04
-            cc: clang-12
-            cxx: clang++-12
-            tsan: NO
-
-          - name: macOS (Clang 11 - no TSAN)
-            os: macos-latest
-            cc: clang
-            cxx: clang++
-            tsan: NO
-
-          - name: Windows (Visual Studio Enterprise 2022)
-            os: windows-2022
-            cc: cl
-            cxx: cl
-            tsan: NO
-
-    name: ${{ matrix.conf.name }}
-
-    runs-on: ${{ matrix.conf.os }}
-
-    env:
-      CC: ${{ matrix.conf.cc }}
-      CXX: ${{ matrix.conf.cxx }}
-
+        include:
+          ${{ fromJSON(needs.build-matrix.outputs.tests-matrix) }}
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
     steps:
-      - uses: actions/checkout@v1
+      - uses: actions/checkout@v3
 
       - uses: friendlyanon/fetch-core-count@v1
         id: cores
 
-      - run: cmake -E make_directory build/tools
-
-      - name: Install CMake and Ninja
-        id: tools
-        working-directory: build/tools
-        run: cmake -D RUNNER_OS=${{ runner.os }}
-          -P ../../cmake/ciToolsUpdate.cmake
+      - shell: pwsh
+        run: New-Item build/tools -ItemType Directory -ErrorAction SilentlyContinue
 
-      - name: Combine CI variables
-        id: args
-        shell: cmake -P {0}
-        run: >
-          message([==[::set-output name=args::${{ matrix.conf.os }}
-          "${{ steps.tools.outputs.cmake }}"
-          "${{ steps.tools.outputs.ninja }}"
-          ${{ steps.cores.outputs.plus_one }}]==])
-
-      - name: Install clang 12
-        working-directory: ${{ env.HOME }}
+      - shell: sudo pwsh -File {0}
         run: |
-            sudo apt-get update
-            sudo apt-get install clang-12 libc++-12-dev libc++abi-12-dev
-        if: ${{ startsWith(matrix.conf.os, 'ubuntu') }}
-
-      - name: Build examples
-        run: cmake -P cmake/ciBuild.cmake -- example build/example
-          ${{ steps.args.outputs.args }}
-        continue-on-error: ${{ startsWith(matrix.conf.os, 'macos') }}
-
-      - name: Build tests
+          Add-Content -Value 'Acquire::Retries "100";' -Path /etc/apt/apt.conf.d/99-custom
+          Add-Content -Value 'Acquire::https::Timeout "240";' -Path /etc/apt/apt.conf.d/99-custom
+          Add-Content -Value 'Acquire::http::Timeout "240";' -Path /etc/apt/apt.conf.d/99-custom
+          Add-Content -Value 'APT::Get::Assume-Yes "true";' -Path /etc/apt/apt.conf.d/99-custom
+          Add-Content -Value 'APT::Install-Recommends "false";' -Path /etc/apt/apt.conf.d/99-custom
+          Add-Content -Value 'APT::Install-Suggests "false";' -Path /etc/apt/apt.conf.d/99-custom
+        if: ${{ startsWith(matrix.os, 'ubuntu') }}
+
+      - uses: ./.github/actions/fetch-cmake
+        id: cmake
+        with:
+          base-directory: build/tools
+
+      - uses: ./.github/actions/fetch-ninja
+        id: ninja
+        with:
+          base-directory: build/tools
+
+      - uses: ./.github/actions/fetch-libstdc++
+        id: libstdcxx
+        with:
+          version: ${{ matrix.libstdcxx-version }}
+        if: ${{ matrix.stdlib == 'libstdc++' }}
+
+      - uses: ./.github/actions/fetch-clang
+        id: clang
+        with:
+          version: ${{ matrix.clang-version }}
+          base-directory: build/tools
+        if: ${{ matrix.clang-version }}
+
+      - name: Build Examples
+        uses: ./.github/actions/cmake-build
+        continue-on-error: ${{ matrix.os == 'macos-11' }}
+        env:
+          CXX: ${{ steps.clang.outputs.clangxx }}
+        with:
+          cmake: ${{ steps.cmake.outputs.cmake }}
+          ninja: ${{ steps.ninja.outputs.ninja }}
+          jobs: ${{ steps.cores.outputs.plus_one }}
+          source: example
+          build: build/example
+          args: >
+            -DBUILD_SHARED_LIBS=${{ matrix.shared }}
+            ${{ ( matrix.stdlib == 'libc++' && '-DCMAKE_CXX_FLAGS=-stdlib=libc++' ) || '' }}
+
+      - name: Build Tests
         id: build_tests
-        continue-on-error: ${{ startsWith(matrix.conf.os, 'macos') }}
-        run: cmake -P cmake/ciBuild.cmake -- test build/test
-          ${{ steps.args.outputs.args }}
-          -D ENABLE_THREAD_SANITIZER:BOOL=${{ matrix.conf.tsan }}
-
-      - name: Run tests
-        continue-on-error: ${{ startsWith(matrix.conf.os, 'macos') }}
+        uses: ./.github/actions/cmake-build
+        continue-on-error: ${{ matrix.os == 'macos-11' }}
+        env:
+          CXX: ${{ steps.clang.outputs.clangxx }}
+        with:
+          cmake: ${{ steps.cmake.outputs.cmake }}
+          ninja: ${{ steps.ninja.outputs.ninja }}
+          jobs: ${{ steps.cores.outputs.plus_one }}
+          source: test
+          build: build/test
+          args: >
+            -DBUILD_SHARED_LIBS=${{ matrix.shared }}
+            -DENABLE_THREAD_SANITIZER=${{ matrix.tsan }}
+            ${{ ( matrix.stdlib == 'libc++' && '-DCMAKE_CXX_FLAGS=-stdlib=libc++' ) || '' }}
+
+      - uses: ./.github/actions/run-tests
+        continue-on-error: ${{ startsWith(matrix.os, 'macos') }}
         if: steps.build_tests.outcome == 'success'
-        working-directory: build/test
-        shell: cmake -P {0}
-        run: >
-          include(../../cmake/exec.cmake)
-
-          exec("${{ steps.tools.outputs.ctest }}" -C Release -V
-          -j ${{ steps.cores.outputs.plus_one }})
+        with:
+          ctest: ${{ steps.cmake.outputs.ctest }}
+          jobs: ${{ steps.cores.outputs.plus_one }}
+          test-dir: build/test
diff --git a/.github/workflows/test_matrix.json b/.github/workflows/test_matrix.json
new file mode 100644
index 00000000..108ac187
--- /dev/null
+++ b/.github/workflows/test_matrix.json
@@ -0,0 +1,265 @@
+[
+	{
+		"os": "ubuntu-20.04",
+		"clang-version": 11,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-20.04, clang-11, libc++, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-20.04",
+		"clang-version": 11,	
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-20.04, clang-11, libc++, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-20.04",
+		"clang-version": 12,	
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-20.04, clang-12, libc++, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-20.04",
+		"clang-version": 12,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-20.04, clang-12, libc++, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-20.04",
+		"clang-version": 13,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-20.04, clang-13, libc++, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-20.04",
+		"clang-version": 13,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-20.04, clang-13, libc++, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 14,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-14, libc++, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 14,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-22.04, clang-14, libc++, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-15, libc++, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libc++",
+		"tsan": true,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-15, libc++, shared=false, tsan=true"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-22.04, clang-15, libc++, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 14,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 11,
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-14, libstdc++11, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 14,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 11,
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-22.04, clang-14, libstdc++11, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 14,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 12,
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-14, libstdc++12, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 14,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 12,
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-22.04, clang-14, libstdc++12, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 11,
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-15, libstdc++11, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 11,
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-22.04, clang-15, libstdc++11, shared=true, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 11,
+		"tsan": true,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-15, libstdc++11, shared=false, tsan=true"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 12,
+		"tsan": false,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-15, libstdcxx-12, shared=false, tsan=false"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 12,
+		"tsan": true,
+		"shared": false,
+		"name" : "ubuntu-22.04, clang-15, libstdc++12, shared=false, tsan=true"
+	},
+	{
+		"os": "ubuntu-22.04",
+		"clang-version": 15,
+		"stdlib": "libstdc++",
+		"libstdcxx-version": 12,
+		"tsan": false,
+		"shared": true,
+		"name" : "ubuntu-22.04, clang-15, libstdcxx-12, shared=true, tsan=false"
+	},
+	{
+		"os": "windows-2019",
+		"msvc-version": 2019,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": false,
+		"name" : "windows-2019, msvc-2019, msvc-stl, shared=false, tsan=false"
+	},
+	{
+		"os": "windows-2019",
+		"msvc-version": 2019,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": true,
+		"name" : "windows-2019, msvc-2019, msvc-stl, shared=true, tsan=false"
+	},
+	{
+		"os": "windows-2022",
+		"msvc-version": 2022,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": false,
+		"name" : "windows-2022, msvc-2022, msvc-stl, shared=false, tsan=false"
+	},
+	{
+		"os": "windows-2022",
+		"msvc-version": 2022,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": true,
+		"name" : "windows-2022, msvc-2022, msvc-stl, shared=true, tsan=false"
+	},
+	{
+		"os": "windows-2022",
+		"clang-version": 14,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": false,
+		"name" : "windows-2022, clang-14, msvc-stl, shared=false, tsan=false"
+	},
+	{
+		"os": "windows-2022",
+		"clang-version": 14,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": true,
+		"name" : "windows-2022, clang-14, msvc-stl, shared=true, tsan=false"
+	},
+	{
+		"os": "windows-2022",
+		"clang-version": 15,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": false,
+		"name" : "windows-2022, clang-15, msvc-stl, shared=false, tsan=false"
+	},
+	{
+		"os": "windows-2022",
+		"clang-version": 15,
+		"stdlib": "msvc-stl",
+		"tsan": false,
+		"shared": true,
+		"name" : "windows-2022, clang-15, msvc-stl, shared=true, tsan=false"
+	},
+	{
+		"os": "macos-12",
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": false,
+		"name" : "macos-12, libc++, shared=false, tsan=false"		
+	},
+	{
+		"os": "macos-12",
+		"stdlib": "libc++",
+		"tsan": false,
+		"shared": true,
+		"name" : "macos-12, libc++, shared=true, tsan=false"
+	},
+	{
+		"os": "macos-12",
+		"stdlib": "libc++",
+		"tsan": true,
+		"shared": false,
+		"name" : "macos-12, libc++, shared=false, tsan=true"
+	}
+]
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 22dc9c68..cb8837a9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.16)
 
 project(concurrencpp
-        VERSION 0.1.5
+        VERSION 0.1.6
         LANGUAGES CXX)
 
 include(cmake/coroutineOptions.cmake)
@@ -15,7 +15,6 @@ if(concurrencpp_INCLUDE_WITHOUT_SYSTEM)
 endif()
 
 # ---- Declare library ----
-set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
 
 set(concurrencpp_sources
         source/task.cpp
@@ -30,7 +29,7 @@ set(concurrencpp_sources
         source/results/promises.cpp
         source/runtime/runtime.cpp
         source/threads/async_lock.cpp
-        source/threads/binary_semaphore.cpp
+        source/threads/async_condition_variable.cpp
         source/threads/thread.cpp
         source/timers/timer.cpp
         source/timers/timer_queue.cpp)
@@ -73,13 +72,14 @@ set(concurrencpp_headers
         include/concurrencpp/runtime/constants.h
         include/concurrencpp/runtime/runtime.h
         include/concurrencpp/threads/async_lock.h
-        include/concurrencpp/threads/binary_semaphore.h
+        include/concurrencpp/threads/async_condition_variable.h
         include/concurrencpp/threads/thread.h
         include/concurrencpp/threads/cache_line.h
         include/concurrencpp/timers/constants.h
         include/concurrencpp/timers/timer.h
         include/concurrencpp/timers/timer_queue.h
-        include/concurrencpp/utils/bind.h)
+        include/concurrencpp/utils/bind.h
+        include/concurrencpp/utils/slist.h)
 
 add_library(concurrencpp ${concurrencpp_headers} ${concurrencpp_sources})
 add_library(concurrencpp::concurrencpp ALIAS concurrencpp)
@@ -91,8 +91,20 @@ target_include_directories(concurrencpp
 
 target_compile_features(concurrencpp PUBLIC cxx_std_20)
 
+set_target_properties(concurrencpp PROPERTIES
+        CXX_VISIBILITY_PRESET hidden
+        VISIBILITY_INLINES_HIDDEN ON
+        VERSION "${PROJECT_VERSION}"
+        SOVERSION "${PROJECT_VERSION_MAJOR}"
+)
+
 target_coroutine_options(concurrencpp)
 
+target_compile_definitions(concurrencpp
+        PRIVATE $<$<STREQUAL:$<TARGET_PROPERTY:concurrencpp,TYPE>,SHARED_LIBRARY>:CRCPP_EXPORT_API>
+        INTERFACE $<$<STREQUAL:$<TARGET_PROPERTY:concurrencpp,TYPE>,SHARED_LIBRARY>:CRCPP_IMPORT_API>
+)
+
 find_package(Threads REQUIRED)
 target_link_libraries(concurrencpp PUBLIC Threads::Threads)
 
@@ -107,11 +119,18 @@ include(GNUInstallDirs)
 set(concurrencpp_directory "concurrencpp-${PROJECT_VERSION}")
 set(concurrencpp_include_directory "${CMAKE_INSTALL_INCLUDEDIR}/${concurrencpp_directory}")
 
-install(TARGETS concurrencpp
-        EXPORT concurrencppTargets
-        ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT concurrencpp_Development
-        RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT concurrencpp_Development
-        INCLUDES DESTINATION "${concurrencpp_include_directory}")
+install(
+  TARGETS concurrencpp
+  EXPORT concurrencppTargets
+  ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+          COMPONENT concurrencpp_Development
+  INCLUDES
+  DESTINATION "${concurrencpp_include_directory}"
+  COMPONENT concurrencpp_Development
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+          COMPONENT concurrencpp_Runtime
+          NAMELINK_COMPONENT concurrencpp_Development
+  RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT concurrencpp_Runtime)
 
 set(concurrencpp_install_cmakedir
         "${CMAKE_INSTALL_LIBDIR}/cmake/${concurrencpp_directory}")
@@ -122,14 +141,19 @@ write_basic_package_version_file(
         COMPATIBILITY SameMinorVersion
         ARCH_INDEPENDENT)
 
-install(EXPORT concurrencppTargets
-        NAMESPACE concurrencpp::
-        DESTINATION "${concurrencpp_install_cmakedir}")
+install(
+  EXPORT concurrencppTargets
+  NAMESPACE concurrencpp::
+  DESTINATION "${concurrencpp_install_cmakedir}"
+  COMPONENT concurrencpp_Development)
 
-install(FILES
-        "${PROJECT_SOURCE_DIR}/cmake/concurrencppConfig.cmake"
+install(
+  FILES "${PROJECT_SOURCE_DIR}/cmake/concurrencppConfig.cmake"
         "${PROJECT_BINARY_DIR}/concurrencppConfigVersion.cmake"
-        DESTINATION "${concurrencpp_install_cmakedir}")
+  DESTINATION "${concurrencpp_install_cmakedir}"
+  COMPONENT concurrencpp_Development)
 
-install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/"
-        DESTINATION "${concurrencpp_include_directory}")
+install(
+  DIRECTORY "${PROJECT_SOURCE_DIR}/include/"
+  DESTINATION "${concurrencpp_include_directory}"
+  COMPONENT concurrencpp_Development)
diff --git a/README.md b/README.md
index 15cfd482..002b9c76 100644
--- a/README.md
+++ b/README.md
@@ -2,17 +2,19 @@
 
 ![Latest Release](https://img.shields.io/github/v/release/David-Haim/concurrencpp.svg) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
-concurrencpp is a tasking library for C++ allowing developers to write highly concurrent applications easily and safely by using tasks, executors and coroutines.
+concurrencpp brings the power of concurrent tasks to the C++ world, allowing developers to write highly concurrent applications easily and safely by using tasks, executors and coroutines.
 By using concurrencpp applications can break down big procedures that need to be processed asynchronously into smaller tasks that run concurrently and work in a co-operative manner to achieve the wanted result.
 concurrencpp also allows applications to write parallel algorithms easily by using parallel coroutines.
 
 concurrencpp main advantages are:
-* Being able to write modern concurrency code without having to rely on low-level concurrency primitives like locks and condition variables.
-* Being able to write highly concurrent and parallel applications that scale automatically to use all hardware resources, as needed.
-* Being able to write non-blocking, synchronous-like code easily by using   C++20 coroutines and the `co_await` keyword.
+* Writing modern concurrency code using higher level tasks instead of low level primitives like `std::thread` and `std::mutex`.
+* Writing highly concurrent and parallel applications that scale automatically to use all hardware resources, as needed.
+* Achieving non-blocking, synchronous-like code easily by using C++20 coroutines and the `co_await` keyword.
 * Reducing the possibility of race conditions, data races and deadlocks by using high-level objects with built-in synchronization.
 * concurrencpp provides various types of commonly used executors with a complete coroutine integration.
 * Applications can extend the library by implementing their own provided executors.
+* concurrencpp is mature and well tested on various platforms and operating systems.
+
 ----
  ### Table of contents
 * [concurrencpp overview](#concurrencpp-overview)
@@ -33,7 +35,7 @@ concurrencpp main advantages are:
     * [Parallel Fibonacci example](#parallel-fibonacci-example)
 * [Result-promises](#result-promises)
     * [`result_promise` API](#result_promise-api)
-    * [`result_promise` example](#example-marshaling-asynchronous-result-using-result_promise)
+    * [`result_promise` example](#result_promise-example)
 * [Shared result objects](#shared-result-objects)
     * [`shared_result` API](#shared_result-api)
     * [`shared_result` example](#shared_result-example)
@@ -55,15 +57,20 @@ concurrencpp main advantages are:
     * [Delay object example](#delay-object-example)
 * [Generators](#generators)     
 	* [`generator` API](#generator-api)
+	* [`generator` example](#generator-example)
 * [Asynchronous locks](#asynchronous-locks)     
 	* [`async_lock` API](#async_lock-api)
 	* [`scoped_async_lock` API](#scoped_async_lock-api)
+	* [`async_lock` example](#async_lock-example)
+* [Asynchronous condition variable](#asynchronous-condition-variables)     
+	* [`async_condition_variable` API](#async_condition_variable-api)
+	* [`async_condition_variable` example](#async_condition_variable-example)
 * [The runtime object](#the-runtime-object)
     * [`runtime` API](#runtime-api)
     * [Creating user-defined executors](#creating-user-defined-executors)
     * [`task` objects](#task-objects)
     * [`task` API](#task-api)
-    * [Using a user-defined executor example](#example-using-a-user-defined-executor)
+    * [Writing a user-defined executor example](#example-writing-a-user-defined-executor)
 * [Supported platforms and tools](#supported-platforms-and-tools)
 * [Building, installing and testing](#building-installing-and-testing)
 
@@ -71,15 +78,15 @@ concurrencpp main advantages are:
 
 ###  concurrencpp overview
 
-concurrencpp is a task-centric library. A task is an asynchronous operation. Tasks offer a higher level of abstraction for concurrent code than traditional thread-centric approaches. Tasks can be chained together, meaning that tasks pass their asynchronous result from one to another, where the result of one task is used as if it were a parameter or an intermediate value of another ongoing task. Tasks allow applications to utilize available hardware resources better and scale much more than using raw threads, since tasks can be suspended, waiting for another task to produce a result, without blocking underlying OS-threads. Tasks bring much more productivity to developers by allowing them to focus more on business-logic and less on low-level concepts like thread management and inter-thread synchronization.
+concurrencpp is built around the concept of concurrent tasks. A task is an asynchronous operation. Tasks offer a higher level of abstraction for concurrent code than traditional thread-centric approaches. Tasks can be chained together, meaning that tasks pass their asynchronous result from one to another, where the result of one task is used as if it were a parameter or an intermediate value of another ongoing task. Tasks allow applications to utilize available hardware resources better and scale much more than using raw threads, since tasks can be suspended, awaiting another task to produce a result, without blocking underlying OS-threads. Tasks bring much more productivity to developers by allowing them to focus more on business-logic and less on low-level concepts like thread management and inter-thread synchronization.
 
-While tasks specify *what* actions have to be executed, *executors* are worker-objects that specify *where and how* to execute tasks. Executors spare applications the managing of thread pools and task queues themselves. Executors also decouple those concepts away from application code, by providing a unified API for creating and scheduling tasks.
+While tasks specify *what* actions have to be executed, *executors* are worker-objects that specify *where and how* to execute tasks. Executors spare applications the tedious management of thread pools and task queues. Executors also decouple those concepts away from application code, by providing a unified API for creating and scheduling tasks.
 
 Tasks communicate with each other using *result objects*. A result object is an asynchronous pipe that pass the asynchronous result of one task to another ongoing-task. Results can be awaited and resolved in a non-blocking manner.
 
-These three concepts - the task, the executor and the associated result are the building blocks of concurrencpp. Executors run tasks that communicate with each-other by sending results through result-objects. Tasks, executors and result objects work together symbiotically to produce concurrent code which is fast and clean.
+These three concepts - the task, the executor and the associated result are the building blocks of concurrencpp. Executors run tasks that communicate with each other by sending results through result-objects. Tasks, executors and result objects work together symbiotically to produce concurrent code which is fast and clean.
 
-concurrencpp is built around the RAII concept. In order to use tasks and executors, applications create a `runtime` instance in the beginning of the `main` function. The runtime is then used to acquire existing executors and register new user-defined executors. Executors are used to create and schedule tasks to run, and they might return a `result` object that can be used to marshal the asynchronous result to another task that acts as its consumer.
+concurrencpp is built around the RAII concept. In order to use tasks and executors, applications create a `runtime` instance in the beginning of the `main` function. The runtime is then used to acquire existing executors and register new user-defined executors. Executors are used to create and schedule tasks to run, and they might return a `result` object that can be used to pass the asynchronous result to another task that acts as its consumer.
 When the runtime is destroyed, it iterates over every stored executor and calls its `shutdown` method. Every executor then exits gracefully. Unscheduled tasks are destroyed, and attempts to create new tasks will throw an exception.
 
 #### *"Hello world" program using concurrencpp:*
@@ -99,7 +106,7 @@ int main() {
 }
 ```
 
-In this basic example, we created a runtime object, then we acquired the thread executor from the runtime. We used `submit` to pass a lambda as our given callable. This lambda returns `void`, hence, the executor returns a `result<void>` object that marshals the asynchronous result back to the caller.  `main` calls  `get` which blocks the main thread until the result becomes ready. If no exception was thrown, `get` returns `void`. If an exception was thrown, `get` re-throws it. Asynchronously, `thread_executor` launches a new thread of execution and runs the given lambda. It implicitly `co_return void` and the task is finished. `main` is then unblocked.
+In this basic example, we created a runtime object, then we acquired the thread executor from the runtime. We used `submit` to pass a lambda as our given callable. This lambda returns `void`, hence, the executor returns a `result<void>` object that passes the asynchronous result back to the caller.  `main` calls  `get` which blocks the main thread until the result becomes ready. If no exception was thrown, `get` returns `void`. If an exception was thrown, `get` re-throws it. Asynchronously, `thread_executor` launches a new thread of execution and runs the given lambda. It implicitly `co_return void` and the task is finished. `main` is then unblocked.
       
 #### *Concurrent even-number counting:*
 
@@ -182,12 +189,12 @@ concurrencpp allows applications to produce and consume coroutines as the main w
 
 Eager tasks start to run the moment they are invoked. This type of execution is recommended when applications need to fire an asynchronous action and consume its result later on (fire and consume later), or completely ignore the asynchronous result (fire and forget).
  
-Eager tasks can return  `result` or `null_result`. `result` return type tells the coroutine to marshal the returned value or the thrown exception (fire and consume later)  while `null_result` return type tells the coroutine to drop and ignore any of them (fire and forget).
+Eager tasks can return  `result` or `null_result`. `result` return type tells the coroutine to pass the returned value or the thrown exception (fire and consume later)  while `null_result` return type tells the coroutine to drop and ignore any of them (fire and forget).
 
 Eager coroutines can start to run synchronously, in the caller thread. This kind of coroutines is called "regular coroutines".
 Concurrencpp eager coroutines can also start to run in parallel, inside a given executor, this kind of coroutines is called "parallel coroutines".
-  
- Lazy tasks, on the other hand, start to run only when `co_await`ed. This type of tasks is recommended when the result of the task is meant to be consumed immediately after creating the task. Lazy tasks, being deferred, are a bit more optimized for the case of immediate-consumption, as they do not need special thread-synchronization in order  to marshal the asynchronous result back to its consumer. The compiler might also optimize away some memory allocations needed to form the underlying coroutine promise. It is not possible to fire a lazy task and execute something else meanwhile  - the firing of a lazy-callee coroutine necessarily means the suspension of the caller-coroutine. The caller coroutine will only be resumed when the lazy-callee coroutine completes. Lazy tasks can only return `lazy_result`.  
+
+Lazy tasks, on the other hand, start to run only when `co_await`ed. This type of tasks is recommended when the result of the task is meant to be consumed immediately after creating the task. Lazy tasks, being deferred, are a bit more optimized for the case of immediate-consumption, as they do not need special thread-synchronization in order  to pass the asynchronous result back to its consumer. The compiler might also optimize away some memory allocations needed to form the underlying coroutine promise. It is not possible to fire a lazy task and execute something else meanwhile  - the firing of a lazy-callee coroutine necessarily means the suspension of the caller-coroutine. The caller coroutine will only be resumed when the lazy-callee coroutine completes. Lazy tasks can only return `lazy_result`.  
 
 Lazy tasks can be converted to eager tasks by calling  `lazy_result::run`. This method runs the lazy task inline and returns a `result` object that monitors the newly started task. If developers are unsure which result type to use, they are encouraged to use lazy results, as they can be converted to regular (eager) results if needed.  
 
@@ -262,7 +269,7 @@ class executor {
     void post(callable_type&& callable, argument_types&& ... arguments);
     
     /*
-        Like post, but returns a result object that marshals the asynchronous result.
+        Like post, but returns a result object that passes the asynchronous result.
         Throws errors::runtime_shutdown exception if shutdown has been called before.
     */
     template<class callable_type, class ... argument_types>
@@ -277,7 +284,7 @@ class executor {
     void bulk_post(std::span<callable_type> callable_list);
 
     /*
-        Like bulk_post, but returns an array of result objects that marshal the asynchronous results.
+        Like bulk_post, but returns an array of result objects that passes the asynchronous results.
         Throws errors::runtime_shutdown exception if shutdown has been called before.
     */    
     template<class callable_type>
@@ -321,9 +328,9 @@ One overload receives a single task object as an argument, and another that rece
 The second overload is used to enqueue a batch of tasks. This allows better scheduling heuristics and decreased contention.
 
 Applications don't have to rely on `enqueue` alone, `concurrencpp::executor` provides an API for scheduling user callables by converting them to task objects behind the scenes.
-Applications can request executors to return a result object that marshals the asynchronous result of the provided callable. This is done by calling `executor::submit` and `executor::bulk_submit`.
+Applications can request executors to return a result object that passes the asynchronous result of the provided callable. This is done by calling `executor::submit` and `executor::bulk_submit`.
 `submit` gets a callable, and returns a result object. `executor::bulk_submit` gets a `span` of callables and returns a `vector`of result objects in a similar way `submit` works.
-In many cases, applications are not interested in the asynchronous value or exception. In this case, applications can use `executor:::post` and `executor::bulk_post` to schedule a callable or a `span` of callables to be executed, but also tells the task to drop any returned value or thrown exception. Not marshaling the asynchronous result is faster than marshaling, but then we have no way of knowing the status or the result of the ongoing task.
+In many cases, applications are not interested in the asynchronous value or exception. In this case, applications can use `executor:::post` and `executor::bulk_post` to schedule a callable or a `span` of callables to be executed, but also tells the task to drop any returned value or thrown exception. Not passing the asynchronous result is faster than passing , but then we have no way of knowing the status or the result of the ongoing task.
 
 `post`, `bulk_post`, `submit` and `bulk_submit` use `enqueue` behind the scenes for the underlying scheduling mechanism.
 
@@ -552,13 +559,13 @@ class manual_executor {
 
 Asynchronous values and exceptions can be consumed using concurrencpp result objects. The `result` type represents the asynchronous result of an eager task while `lazy_result` represents the deferred result of a lazy task. 
 
-When a task (eager or lazy) completes, it either returns a valid value or throws an exception. In either case, this asynchronous result is marshaled to the consumer of the result object.
+When a task (eager or lazy) completes, it either returns a valid value or throws an exception. In either case, this asynchronous result is passed to the consumer of the result object.
 
 `result` objects form asymmetric coroutines - the execution of a caller-coroutine is not effected by the execution of a callee-coroutine, both coroutines can run independently. Only when consuming the result of the callee-coroutine, the caller-coroutine might be suspended awaiting the callee to complete. Up until that point both coroutines run independently. The callee-coroutine runs whether its result is consumed or not. 
 
 `lazy_result` objects form symmetric coroutines - execution of a callee-coroutine happens only after the suspension of the caller-coroutine. When awaiting a lazy result, the current coroutine is suspended and the lazy task associated with the lazy result starts to run. After the callee-coroutine completes and yields a result, the caller-coroutine is resumed. If a lazy result is not consumed, its associated lazy task never starts to run. 
 
-All result objects are a move-only type, and as such, they cannot be used after their content was moved to another result object. In this case, the result object is considered to be empty and attempts to call any method other than `operator bool` and `operator = ` will throw.
+All result objects are a move-only type, and as such, they cannot be used after their content was moved to another result object. In this case, the result object is considered to be empty and attempts to call any method other than `operator bool` and `operator = ` will throw an exception.
 
 After the asynchronous result has been pulled out of the result object (for example, by calling `get` or `operator co_await`), the result object becomes empty. Emptiness can be tested with `operator bool`.
 
@@ -681,8 +688,8 @@ class result{
 
 A lazy result object represents the result of a deferred lazy task. 
 
-`lazy_result` has the responsibility of both starting the associated lazy task and marshaling its deferred result back to its consumer. 
-When awaited or resolved, the lazy result suspends the current coroutine and starts the associated lazy task. when the associated task completes, its asynchronous value is marshaled to the caller task, which is then resumed. 
+`lazy_result` has the responsibility of both starting the associated lazy task and passing its deferred result back to its consumer. 
+When awaited or resolved, the lazy result suspends the current coroutine and starts the associated lazy task. when the associated task completes, its asynchronous value is passed to the caller task, which is then resumed. 
 
 Sometimes, an API might return a lazy result, but applications need its associated task to run eagerly (without suspending the caller task). In this case, lazy tasks can be converted to eager tasks by calling  `run` on its associated lazy result. In this case, the associated task will start to run inline, without suspending the caller task. The original lazy result is emptied and a valid `result` object that monitors the newly started task will be returned instead.
 
@@ -761,7 +768,7 @@ class lazy_result {
 
 ###  Parallel coroutines
 
-Regular eager coroutines start to run synchronously in the calling thread of execution. Execution might shift to another thread of execution if the coroutine undergoes a rescheduling, for example by awaiting an unready result object inside it.
+Regular eager coroutines start to run synchronously in the calling thread of execution. Execution might shift to another thread of execution if a coroutine undergoes a rescheduling, for example by awaiting an unready result object inside it.
 concurrencpp also provides parallel coroutines, which start to run inside a given executor, not in the invoking thread of execution. This style of scheduling coroutines is especially helpful when writing parallel algorithms, recursive algorithms and concurrent algorithms that use the fork-join model.
 
 Every parallel coroutine must meet the following preconditions:
@@ -778,7 +785,13 @@ concurrencpp will start the coroutine suspended and immediately reschedule it to
 If the executor passed to the parallel coroutine is null, the coroutine will not start to run and an `std::invalid_argument` exception will be thrown synchronously. 
 If all preconditions are met, Applications can consume the result of the parallel coroutine by using the returned result object. 
 
-#### *Parallel Fibonacci example:*
+#### Parallel Fibonacci example:
+
+In this example, we calculate the 30-th member of the Fibonacci sequence in a parallel manner.
+We start launching each Fibonacci step in its own parallel coroutine. The first argument is a dummy `executor_tag` and the second argument is the threadpool executor.
+Every recursive step invokes a new parallel coroutine that runs in parallel. Each result is `co_return`ed to its parent task and acquired by using `co_await`.   
+When we deem the input to be small enough to be calculated synchronously (when `curr <= 10`), we stop executing each recursive step in its own task and just solve the algorithm synchronously.
+
 ```cpp
 #include "concurrencpp/concurrencpp.h"
 #include <iostream>
@@ -816,11 +829,6 @@ int main() {
 }
 ```
 
-In this example, we calculate the 30-th member of the Fibonacci sequence in a parallel manner.
-We start launching each Fibonacci step in its own parallel coroutine. The first argument is a dummy `executor_tag` and the second argument is the threadpool executor.
-Every recursive step invokes a new parallel coroutine that runs in parallel. Each result is `co_return`ed to its parent task and acquired by using `co_await`.   
-When we deem the input to be small enough to be calculated synchronously (when `curr <= 10`), we stop executing each recursive step in its own task and just solve the algorithm synchronously.
-
 To compare, this is how the same code is written without using parallel coroutines, and relying on `executor::submit` alone.
 Since `fibonacci` returns a `result<int>`, submitting it recursively via `executor::submit` will result a `result<result<int>>`.
 
@@ -872,7 +880,7 @@ Just like result objects, result-promises are a move only type that becomes empt
 If a result-promise gets out of scope and no result/exception has been set, the result-promise destructor sets a `concurrencpp::errors::broken_task` exception using the `set_exception` method.
 Suspended and blocked tasks waiting for the associated result object are resumed/unblocked.
 
-Result promises can convert callback style of code into `async/await` style of code: whenever a component requires a callback to marshal the asynchronous result, we can pass a callback that calls `set_result` or `set_exception` (depending on the asynchronous result itself) on the passed result promise, and return the associated result.
+Result promises can convert callback style of code into `async/await` style of code: whenever a component requires a callback to pass the asynchronous result, we can pass a callback that calls `set_result` or `set_exception` (depending on the asynchronous result itself) on the passed result promise, and return the associated result.
 
 #### `result_promise` API
 
@@ -952,7 +960,10 @@ class result_promise {
 };
 ```
 
-#### *Example: Marshaling asynchronous result using* `result_promise`:
+#### `result_promise` example:
+
+In this example, `result_promise` is used to push data from one thread, and it can be pulled from its associated `result` object from another thread. 
+
 ```cpp
 #include "concurrencpp/concurrencpp.h"
 
@@ -973,17 +984,18 @@ int main() {
     my_3_party_executor.join();
 }
 ```
+
 In this example, We use `std::thread` as a third-party executor. This represents a scenario when a non-concurrencpp executor is used as part of the application life-cycle. We extract the result object before we pass the promise and block the main thread until the result becomes ready. In `my_3_party_executor`, we set a result as if we `co_return`ed it.
 
 ### Shared result objects
 
 Shared results are a special kind of result objects that allow multiple consumers to access the asynchronous result, similar to `std::shared_future`.  Different consumers from different threads can call functions like `await`, `get` and `resolve` in a thread safe manner.
 
-Shared results are built from regular result objects and unlike regular result objects, they are both copyable and movable. As such, `shared_result` behaves like an `std::shared_ptr` object. If the shared result was moved to another instance, the shared result is empty, and trying to access it will throw an exception.
+Shared results are built from regular result objects and unlike regular result objects, they are both copyable and movable. As such, `shared_result` behaves like `std::shared_ptr` type. If a shared result instance is moved to another instance, the instance becomes empty, and trying to access it will throw an exception.
 
-In order to support multiple consumers, the shared-result object will return a *reference* to asynchronous value instead of moving it (like a regular result object). For example, a `shared_result<int>`will return an `int&` when `get`,`await` etc. are called. If the underlying type of the `shared_result` is `void` or a reference type (like `int&`), they are returned as usual. If the asynchronous result is a thrown-exception, it is re-thrown.
+In order to support multiple consumers, shared results return a *reference* to the asynchronous value instead of moving it (like a regular results). For example, a `shared_result<int>` returns an `int&` when `get`,`await` etc. are called. If the underlying type of the `shared_result` is `void` or a reference type (like `int&`), they are returned as usual. If the asynchronous result is a thrown-exception, it is re-thrown.
 
-Do note that while acquiring the asynchronous result using `shared_result` from multiple threads is thread-safe, the actual value might not be. For example, multiple threads can acquire an asynchronous integer by receiving its reference (`int&`). It *does not* make the integer itself thread safe. It is alright to mutate the asynchronous value if the asynchronous value is already thread safe. Alternatively, applications are encouraged to use `const` types to begin with (like `const int`), and acquire constant-references (like `const int&`) that prevent mutation.
+Do note that while acquiring the asynchronous result using `shared_result` from multiple threads is thread-safe, the actual value might not be thread safe. For example, multiple threads can acquire an asynchronous integer by receiving its reference (`int&`). It *does not* make the integer itself thread safe. It is alright to mutate the asynchronous value if the asynchronous value is already thread safe. Alternatively, applications are encouraged to use `const` types to begin with (like `const int`), and acquire constant-references (like `const int&`) that prevent mutation.
 
 #### `shared_result` API
 ```cpp
@@ -1097,7 +1109,10 @@ class share_result {
 };
 ```
 
-#### `shared_result` example
+#### `shared_result` example:
+
+In this example, a `result` object is converted to a `shared_result` object and a reference to an asynchronous `int` result is acquired by many tasks spawned with `thread_executor`.
+
 ```cpp
 #include "concurrencpp/concurrencpp.h"
 
@@ -1140,13 +1155,13 @@ int main() {
 ```
 
 ### Termination in concurrencpp
-When the runtime object gets out of scope of `main`, the application terminates.
-The runtime iterates each stored executor and calls its `shutdown` method. Trying to access either the timer-queue or any executor throws `errors::runtime_shutdown` exception. When an executor shuts down, it clears its inner task queues, destroying un-executed `task` objects. If a task object stores a concurrencpp-coroutine, that coroutine is resumed inline and an `errors::broken_task` exception is thrown. 
+When the runtime object gets out of scope of `main`, it iterates each stored executor and calls its `shutdown` method. Trying to access the timer-queue or any executor will throw an `errors::runtime_shutdown` exception. When an executor shuts down, it clears its inner task queues, destroying un-executed `task` objects. If a task object stores a concurrencpp-coroutine, that coroutine is resumed inline and an `errors::broken_task` exception is thrown inside it. 
 In any case where  a `runtime_shutdown` or a `broken_task` exception is thrown, applications should terminate their current code-flow gracefully as soon as possible. Those exceptions should not be ignored.
+Both `runtime_shutdown` and `broken_task` inherit from `errors::interrupted_task` base class, and this type can also be used in a `catch` clause to handle termination in a unified way.
 
 ### Resume executors
-Many concurrencpp asynchronous actions will require an executor as their resume executor. When an asynchronous action (implemented as a coroutine) can finish synchronously, it resumes immediately in the calling thread of execution. If the asynchronous action can't finish synchronously, it will be resumed when it finishes, inside the given resume-executor. 
-For example, `when_any` utility function requires a resume-executor as its first argument. `when_any` returns a `lazy_result` which becomes ready when at least one given result becomes ready. If one of the results is already ready at the moment of calling `when_any`, the calling coroutine is resumed synchronously in the calling thread of execution. If not, the  calling coroutine will be resumed when at least of result is finished, inside the given resume-executor. 
+Many concurrencpp asynchronous actions require an instance of an executor as their *resume executor*. When an asynchronous action (implemented as a coroutine) can finish synchronously, it resumes immediately in the calling thread of execution. If the asynchronous action can't finish synchronously, it will be resumed when it finishes, inside the given resume-executor. 
+For example, `when_any` utility function requires an instance of a resume-executor as its first argument. `when_any` returns a `lazy_result` which becomes ready when at least one given result becomes ready. If one of the results is already ready at the moment of calling `when_any`, the calling coroutine is resumed synchronously in the calling thread of execution. If not, the calling coroutine will be resumed when at least of result is finished, inside the given resume-executor. 
 Resume executors are important because they mandate where coroutines are resumed in cases where it's not clear where a coroutine is supposed to be resumed (for example, in the case of `when_any` and `when_all`), or in cases where the asynchronous action is processed inside one of the concurrencpp workers, which are only used to process that specific action, and not application code.  
 
 ### Utility functions
@@ -1281,7 +1296,7 @@ lazy_result<when_any_result<std::vector<typename std::iterator_traits<iterator_t
 
 #### `resume_on` function
 `resume_on` returns an awaitable that suspends the current coroutine and resumes it inside given `executor`. This is an important function that makes sure a coroutine is running in the right executor. For example, applications might schedule a background task using the `background_executor` and await the returned result object. In this case, the awaiting coroutine will be resumed inside the background executor. A call to `resume_on` with another cpu-bound executor makes sure that cpu-bound lines of code will not run on the background executor once the background task is completed. 
-If a coroutine was re-scheduled to run on another executor using `resume_on`, but that executor is shut down before it can resume it, that coroutine is resumed and an `erros::broken_task` exception is thrown. In this case, applications need to quite gracefully.  
+If a task is re-scheduled to run on another executor using `resume_on`, but that executor is shut down before it can resume the suspended task, that task is resumed immediately and an `erros::broken_task` exception is thrown. In this case, applications need to quite gracefully.  
 ```cpp
 /*
     Returns an awaitable that suspends the current coroutine and resumes it inside executor.
@@ -1301,8 +1316,8 @@ Regular timers have four properties that define them:
 
 1. Callable - a callable that will be scheduled to run as a task periodically.
 2. Executor - an executor that schedules the callable to run periodically.
-3. Due time - from the time of creation, the interval in milliseconds the timer will be scheduled to run for the first time.
-4. Frequency - from the time the timer was scheduled to run for the first time, the interval in milliseconds the callable will be scheduled to run periodically, until the timer is destructed or cancelled.
+3. Due time - from the time of creation, the interval in milliseconds in which the callable will be scheduled to run for the first time.
+4. Frequency - from the time the callable is scheduled to run for the first time, the interval in milliseconds the callable will be scheduled to run periodically, until the timer is destructed or cancelled.
 
 Like other objects in concurrencpp, timers are a move only type that can be empty.
 When a timer is destructed or `timer::cancel` is called, the timer cancels its scheduled but not yet executed tasks. Ongoing tasks are uneffected. The timer callable must be thread safe. It is recommended to set the due time and the frequency of timers to a granularity of 50 milliseconds. 
@@ -1455,7 +1470,10 @@ class timer {
 };
 ```
 
-#### *Regular timer example:*
+#### Regular timer example:
+
+In this example we create a regular timer by using the timer queue. The timer schedules its callable to run after 1.5 seconds, then fires its callable every 2 seconds. The given callable runs on the threadpool executor.
+
 ```cpp
 #include "concurrencpp/concurrencpp.h"
 
@@ -1479,13 +1497,15 @@ int main() {
     return 0;
 }
 ```
-In this example we create a regular timer  by using the timer queue. The timer schedules its callable after 1.5 seconds, then fires its callable every 2 seconds. The given callable runs in the threadpool executor.
 
 #### Oneshot timers
 
 A oneshot timer is a one-time timer with only a due time - after it schedules its callable to run once it never reschedules it to run again.  
 
-#### *Oneshot timer example:*
+#### Oneshot timer example:
+
+In this example, we create a timer that runs only once - after 3 seconds from its creation, the timer will schedule its callable to run on a new thread of execution (using `thread_executor`).
+
 ```cpp
 #include "concurrencpp/concurrencpp.h"
 
@@ -1506,13 +1526,15 @@ int main() {
     return 0;
 }
 ```
-In this example, we create a timer that runs only once - after 3 seconds from its creation, the timer will schedule to run its callable on a new thread of execution (using `concurrencpp::thread_executor`).
 
 #### Delay objects
 
-A delay object is a result object that becomes ready when its due time is reached. Applications can `co_await` this result object to delay the current coroutine in a non-blocking way.  The current coroutine is resumed by the executor that was passed to `make_delay_object`.
+A delay object is a lazy result object that becomes ready when it's `co_await`ed and its due time is reached. Applications can `co_await` this result object to delay the current coroutine in a non-blocking way.  The current coroutine is resumed by the executor that was passed to `make_delay_object`.
+
+#### Delay object example:
+
+In this example, we spawn a task (that does not return any result or thrown exception), which delays itself in a loop by calling `co_await` on a delay object.
 
-#### *Delay object example:*
 ```cpp
 #include "concurrencpp/concurrencpp.h"
 
@@ -1542,32 +1564,9 @@ int main() {
 }
 ```
 
-In this example, we created a coroutine (that does not marshal any result or thrown exception), which delays itself in a loop by calling `co_await` on a delay object.
-
 ### Generators 
 A generator is a lazy, synchronous coroutine that is able to produce a stream of values to consume. Generators use the `co_yield` keyword to yield values back to their consumers.
- 
-##### Example: 
-A generator that yields the n-th member of the Sequence `S(n) = 1 + 2 + 3 + ... + n`  where `n <= 100`:
-
-```cpp
-concurrencpp::generator<int> sequence() {
-    int i = 1;
-    int sum = 0;
-    while (i <= 100) {
-        sum += i;
-        ++i;
-        co_yield sum;
-    }
-}
 
-int main() {
-    for (auto value : sequence()) {
-        std::cout << value << std::end;
-    }
-    return 0;
-} 
-```
 Generators are meant to be used synchronously - they can only use the `co_yield` keyword and **must not** use the `co_await` keyword. A generator will continue  to produce values as long as the `co_yield` keyword is called. 
 If the `co_return` keyword is called (explicitly or implicitly), then the generator will stop producing values.  Similarly, if an exception is thrown then the generator will stop producing values and the thrown exception will be re-thrown to the consumer of the generator.
 
@@ -1575,7 +1574,7 @@ Generators are meant to be used in a `range-for` loop: Generators implicitly pro
 
 When a generator is created, it starts as a lazy task. When its `begin` method is called, the generator is resumed for the first time and an iterator is returned. The lazy task is resumed repeatedly by calling `operator++` on the returned iterator. The returned iterator will be equal to `end` iterator when the generator finishes execution either by exiting gracefully or throwing an exception.  As mentioned earlier, this happens behind the scenes by the inner mechanism of the loop and the generator, and should not be called directly.
 
-Like other objects in concurrencpp, Generators are a move-only type. After a generator was moved, it is considered empty and trying to access its inner methods (other than `operator bool`) will throw an exception. The emptiness of a generator should not generally occur - it is advised to consume generators upon their creation in a `for` loop and not to try to call its methods individually. 
+Like other objects in concurrencpp, Generators are a move-only type. After a generator was moved, it is considered empty and trying to access its inner methods (other than `operator bool`) will throw an exception. The emptiness of a generator should not generally occur - it is advised to consume generators upon their creation in a `for` loop and not to try to call their methods individually. 
 
 #### `generator` API
 ```cpp
@@ -1652,80 +1651,49 @@ class generator_iterator {
     friend bool operator!=(generator_end_iterator end_it, const generator_iterator& it) noexcept;
 };
 ```    
+#### `generator` example: 
+
+In this example, we will write a generator that yields the n-th member of the Sequence `S(n) = 1 + 2 + 3 + ... + n`  where `n <= 100`:
+
+```cpp
+concurrencpp::generator<int> sequence() {
+    int i = 1;
+    int sum = 0;
+    while (i <= 100) {
+        sum += i;
+        ++i;
+        co_yield sum;
+    }
+}
+
+int main() {
+    for (auto value : sequence()) {
+        std::cout << value << std::end;
+    }
+    return 0;
+} 
+```
 
 ### Asynchronous locks
-Regular synchronous locks cannot be used safely inside coroutines for a number of reasons:
+Regular synchronous locks cannot be used safely inside tasks for a number of reasons:
 
  - Synchronous locks, such as `std::mutex`, are expected to be locked and unlocked in the same thread of execution. Unlocking a synchronous lock in a thread which had not locked it is undefined behavior. Since tasks can be suspended and resumed in any thread of execution, synchronous locks will break when used inside tasks.
  - Synchronous locks were created to work with *threads* and not with *coroutines*. If a synchronous lock is already locked by one thread, then when another thread tries to lock it, the entire thread of execution will be blocked and will be unblocked when the lock is released. This mechanism works well for traditional multi-threading paradigms but not for coroutines: with coroutines, we want *tasks* to be *suspended and resumed* without blocking or interfering with the execution of underlying threads and executors.    
 
   `concurrencpp::async_lock` solves those issues by providing a similar API to `std::mutex`, with the main difference that calls to `concurrencpp::async_lock` will return a lazy-result that can be `co_awaited` safely inside tasks.  If one task tries to lock an async-lock and fails, the task will be suspended, and will be resumed when the lock is unlocked and acquired by the suspended task. This allows executors to process a huge amount of tasks waiting to acquire a lock without expensive context-switching and expensive kernel calls. 
 
-Similar to how `std::mutex` works, only one task can acquire `async_lock` at any given time, and a *read barrier* is place at the moment of acquiring. Releasing an async lock places a *write barrier* and allows the next task to acquire it, creating a chain of one-modifier at a time who sees the changes other modifiers had done and posts its modifications for the next modifiers to see.    
+Similar to how `std::mutex` works, only one task can acquire `async_lock` at any given time, and a *read barrier* is place at the moment of acquiring. Releasing an async lock places a *write barrier* and allows the next task to acquire it, creating a chain of one-modifier at a time which sees the changes other modifiers had done and posts its modifications for the next modifiers to see.    
 
 Like `std::mutex`, `concurrencpp::async_lock` ***is not recursive***. Extra attention must be given when acquiring such lock - A lock must not be acquired again in a task that has been spawned by another task which had already acquired the lock. In such case, an unavoidable dead-lock will occur.  Unlike other objects in concurrencpp, `async_lock` is neither copiable nor movable. 
 
 Like standard locks, `concurrencpp::async_lock` is meant to be used with scoped wrappers which leverage C++ RAII idiom to ensure locks are always unlocked upon  function return or thrown exception. `async_lock::lock` returns a lazy-result of a scoped wrapper that calls `async_lock::unlock` on destruction. Raw uses of `async_lock::unlock` are discouraged. `concurrencpp::scoped_async_lock` acts as the scoped wrapper and provides an API which is almost identical to `std::unique_lock`. `concurrencpp::scoped_async_lock` is movable, but not copiable.
 
-`async_lock::lock` and `scoped_async_lock::lock` require a resume-executor as their parameter. Upon calling those methods, if the lock is available for locking, then it is locked and the current task is resumed immediately. If not, then the current task is suspended, and will be resumed inside the given resume-executor when the lock is finally acquired by it. 
+`async_lock::lock` and `scoped_async_lock::lock` require a resume-executor as their parameter. Upon calling those methods, if the lock is available for locking, then it is locked and the current task is resumed immediately. If not, then the current task is suspended, and will be resumed inside the given resume-executor when the lock is finally acquired. 
 
 `concurrencpp::scoped_async_lock` wraps an `async_lock` and ensure it's properly unlocked. like `std::unique_lock`, there are cases it does not wrap any lock, and in this case it's considered to be empty.  An empty  `scoped_async_lock` can happen when it's defaultly constructed, moved, or `scoped_async_lock::release` method is called. An empty scoped-async-lock will not unlock any lock on destruction. 
 
 Even if the scoped-async-lock is not empty, it does not mean that it owns the underlying async-lock and it will unlock it on destruction. Non-empty and non-owning scoped-async locks can happen if `scoped_async_lock::unlock` was called or the scoped-async-lock was constructed using `scoped_async_lock(async_lock&, std::defer_lock_t)` constructor.
 
-#### `async_lock` *example:*
-
-```cpp
-#include "concurrencpp/concurrencpp.h"
-
-#include <vector>
-#include <iostream>
-
-std::vector<size_t> numbers;
-concurrencpp::async_lock lock;
-
-concurrencpp::result<void> add_numbers(concurrencpp::executor_tag,
-                                       std::shared_ptr<concurrencpp::executor> executor,
-                                       size_t begin,
-                                       size_t end) {
-    for (auto i = begin; i < end; i++) {
-        concurrencpp::scoped_async_lock raii_wrapper = co_await lock.lock(executor);
-        numbers.push_back(i);
-    }
-}
-
-int main() {
-    concurrencpp::runtime runtime;
-    constexpr size_t range = 10'000'000;
-    constexpr size_t sections = 4;
-    concurrencpp::result<void> results[sections];
-
-    for (size_t i = 0; i < 4; i++) {
-        const auto range_start = i * range / sections;
-        const auto range_end = (i + 1) * range / sections;
-
-        results[i] = add_numbers({}, runtime.thread_pool_executor(), range_start, range_end);
-    }
-
-    for (auto& result : results) {
-        result.get();
-    }
-
-    std::cout << "vector size is " << numbers.size() << std::endl;
-
-    // make sure the vector state has not been corrupted by unprotected concurrent accesses
-    std::sort(numbers.begin(), numbers.end());
-    for (size_t i = 0; i < range; i++) {
-        if (numbers[i] != i) {
-            std::cerr << "vector state is corrupted." << std::endl;
-            return -1;
-        }
-    }
-
-    std::cout << "succeeded pushing range [0 - 10,000,000] concurrently to the vector!" << std::endl;
-    return 0;
-}
-```
 #### `async_lock` API
 ```cpp
 class async_lock {
@@ -1854,15 +1822,242 @@ class scoped_async_lock {
     */
     async_lock* mutex() const noexcept;
 };
+```
+#### `async_lock` example:
 
+In this example we push 10,000,000 integers to an `std::vector` object from different tasks concurrently, while using `async_lock` to make sure no data race occurs and the correctness of the internal state of that vector object is preserved.   
+
+```cpp
+#include "concurrencpp/concurrencpp.h"
+
+#include <vector>
+#include <iostream>
+
+std::vector<size_t> numbers;
+concurrencpp::async_lock lock;
+
+concurrencpp::result<void> add_numbers(concurrencpp::executor_tag,
+                                       std::shared_ptr<concurrencpp::executor> executor,
+                                       size_t begin,
+                                       size_t end) {
+    for (auto i = begin; i < end; i++) {
+        concurrencpp::scoped_async_lock raii_wrapper = co_await lock.lock(executor);
+        numbers.push_back(i);
+    }
+}
+
+int main() {
+    concurrencpp::runtime runtime;
+    constexpr size_t range = 10'000'000;
+    constexpr size_t sections = 4;
+    concurrencpp::result<void> results[sections];
+
+    for (size_t i = 0; i < 4; i++) {
+        const auto range_start = i * range / sections;
+        const auto range_end = (i + 1) * range / sections;
+
+        results[i] = add_numbers({}, runtime.thread_pool_executor(), range_start, range_end);
+    }
+
+    for (auto& result : results) {
+        result.get();
+    }
+
+    std::cout << "vector size is " << numbers.size() << std::endl;
+
+    // make sure the vector state has not been corrupted by unprotected concurrent accesses
+    std::sort(numbers.begin(), numbers.end());
+    for (size_t i = 0; i < range; i++) {
+        if (numbers[i] != i) {
+            std::cerr << "vector state is corrupted." << std::endl;
+            return -1;
+        }
+    }
+
+    std::cout << "succeeded pushing range [0 - 10,000,000] concurrently to the vector!" << std::endl;
+    return 0;
+}
+```
+
+### Asynchronous condition variables
+
+`async_condition_variable` imitates the standard `condition_variable` and can be used safely with tasks alongside `async_lock`. `async_condition_variable` works with `async_lock` to suspend a task until some shared memory (protected by the lock) has changed. Tasks that want to monitor shared memory changes will lock an instance of `async_lock`, and call `async_condition_variable::await`.  This will atomically unlock the lock and suspend the current task until some modifier task notifies the condition variable. A modifier task acquires the lock, modifies the shared memory, unlocks the lock and call either `notify_one` or `notify_all`.
+When a suspended task is resumed (using the resume executor that was given to `await`), it locks the lock again, allowing the task to continue from the point of suspension seamlessly.
+Like `async_lock`, `async_condition_variable` is neither movable or copiable - it is meant to be created in one place and accessed by multiple tasks.
+
+`async_condition_variable::await` overloads require a resume-executor, which will be used to resume the task, and a locked `scoped_async_lock`. `async_condition_variable::await` comes with two overloads - one that accepts a predicate and one that doesn't. The overload which does not accept a predicate will suspend the calling task immediately upon invocation until it's resumed by a call to `notify_*`. The overload which does accept a predicate works by letting the predicate inspect the shared memory and suspend the task repeatedly until the shared memory has reached its wanted state. schematically it works like calling 
+
+```cpp
+while (!pred()) { // pred() inspects the shared memory and returns true or false
+	co_await await(resume_executor, lock); // suspend the current task until another task calls `notify_xxx`
+}
+```
+Just like the standard condition variable, applications are encouraged to use the predicate-overload, as it allows more fine-grained control over suspensions and resumptions.
+`async_condition_variable` can be used to write concurrent collections and data-structures like concurrent queues and channels.
+
+Internally, `async_condition_variable` holds a suspension-queue, in which tasks enqueue themselves when they await the condition variable to be notified. When any of `notify_*` methods are called, the notifying task dequeues either one task or all of the tasks, depending on the invoked method. Tasks are dequeued from the suspension-queue in a fifo manner. 
+For example, if Task A calls `await` and then Task B calls `await`, then Task C calls `notify_one`, then internally task A will be dequeued and and resumed. Task B will remain suspended until another call to `notify_one` or `notify_all` is called. If task A and task B are suspended and task C calls `notify_all`, then both tasks will be dequeued and resumed. 
+
+#### `async_condition_variable` API
+```cpp
+class async_condition_variable {
+	/*
+		Constructor.
+	*/
+	async_condition_variable() noexcept;
+
+	/*
+		Atomically releases lock and suspends the current task by adding it to *this suspension-queue.
+		Throws std::invalid_argument if resume_executor is null.
+		Throws std::invalid_argument if lock is not locked at the moment of calling this method.
+		Might throw std::system_error if the underlying std::mutex throws. 
+	*/
+	lazy_result<void> await(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock);
+
+	/*
+		Equivalent to:		
+		while (!pred()) {
+            co_await await(resume_executor, lock);
+        }
+		
+		Might throw any exception that await(resume_executor, lock) might throw.
+		Might throw any exception that pred might throw.
+	*/
+	template<class predicate_type>
+	lazy_result<void> await(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock, predicate_type pred);
+	
+	/*
+		Dequeues one task from *this suspension-queue and resumes it, if any available at the moment of calling this method.
+		The suspended task is resumed by scheduling it to run on the executor given when await was called.
+		Might throw std::system_error if the underlying std::mutex throws. 
+	*/
+	void notify_one();
+	
+	/*
+		Dequeues all tasks from *this suspension-queue and resumes them, if any available at the moment of calling this method.
+		The suspended tasks are resumed by scheduling them to run on the executors given when await was called.
+		Might throw std::system_error if the underlying std::mutex throws. 
+	*/
+	void notify_all();
+};
 ```
 
+#### `async_condition_variable` example:
+
+In this example, `async_lock` and `async_condition_variable` work together to implement a concurrent queue that can be used to send data (in this example, integers) between tasks. Note that some methods return a `result` while another return `lazy_result`, showing how both eager and lazy tasks can work together.
+
+```cpp
+#include "concurrencpp/concurrencpp.h"
+
+#include <queue>
+#include <iostream>
+
+using namespace concurrencpp;
+
+class concurrent_queue {
+
+   private:
+    async_lock _lock;
+    async_condition_variable _cv;
+    std::queue<int> _queue;
+    bool _abort = false;
+
+   public:
+    concurrent_queue() = default;
+
+    result<void> shutdown(std::shared_ptr<executor> resume_executor) {
+        {
+            auto guard = co_await _lock.lock(resume_executor);
+            _abort = true;
+        }
+
+        _cv.notify_all();
+    }
+
+    lazy_result<void> push(std::shared_ptr<executor> resume_executor, int i) {
+        {
+            auto guard = co_await _lock.lock(resume_executor);
+            _queue.push(i);
+        }
+
+        _cv.notify_one();
+    }
+
+    lazy_result<int> pop(std::shared_ptr<executor> resume_executor) {
+        auto guard = co_await _lock.lock(resume_executor); 
+        co_await _cv.await(resume_executor, guard, [this] {
+            return _abort || !_queue.empty();
+        });
+
+        if (!_queue.empty()) {
+            auto result = _queue.front();
+            _queue.pop();
+
+            co_return result;        
+        }
+
+        assert(_abort);
+        throw std::runtime_error("queue has been shut down.");
+    }
+};
+
+result<void> producer_loop(executor_tag,
+                           std::shared_ptr<thread_pool_executor> tpe,
+                           concurrent_queue& queue,
+                           int range_start,
+                           int range_end) {
+    for (; range_start < range_end; ++range_start) {
+        co_await queue.push(tpe, range_start);
+    }
+}
+
+result<void> consumer_loop(executor_tag, std::shared_ptr<thread_pool_executor> tpe, concurrent_queue& queue) {
+    try {
+        while (true) {
+            std::cout << co_await queue.pop(tpe) << std::endl;
+        }
+    } catch (const std::exception& e) {
+        std::cerr << e.what() << std::endl;
+    }
+}
+
+int main() {
+    runtime runtime;
+    const auto thread_pool_executor = runtime.thread_pool_executor();
+    concurrent_queue queue;
+
+    result<void> producers[4];
+    result<void> consumers[4];
+
+    for (int i = 0; i < 4; i++) {
+        producers[i] = producer_loop({}, thread_pool_executor, queue, i * 5, (i + 1) * 5);
+    }
+
+    for (int i = 0; i < 4; i++) {
+        consumers[i] = consumer_loop({}, thread_pool_executor, queue);
+    }
+
+    for (int i = 0; i < 4; i++) {
+        producers[i].get();
+    }
+
+    queue.shutdown(thread_pool_executor).get();
+
+    for (int i = 0; i < 4; i++) {
+        consumers[i].get();
+    }
+
+    return 0;
+}
+```
+
+
 ### The runtime object
  
 The concurrencpp runtime object is the agent used to acquire, store and create new executors.  
 The runtime must be created as a value type as soon as the main function starts to run.
 When the concurrencpp runtime gets out of scope, it iterates over its stored executors and shuts them down one by one by calling `executor::shutdown`. Executors then exit their inner work loop and any subsequent attempt to schedule a new task will throw a `concurrencpp::runtime_shutdown` exception. The runtime also contains the global timer queue used to create timers and delay objects.
-Upon destruction, stored executors will destroy unexecuted tasks, and wait for ongoing tasks to finish. If an ongoing task tries to use an executor to spawn new tasks or schedule its own task continuation - an exception will be thrown. In this case, ongoing tasks need to quit as soon as possible, allowing their underlying executors to quit. The timer queue will also be shut down, cancelling all running timers.  With this RAII style of code, no tasks can be processed before the creation of the runtime object, and while/after the runtime gets out of scope.
+Upon destruction, stored executors destroy unexecuted tasks, and wait for ongoing tasks to finish. If an ongoing task tries to use an executor to spawn new tasks or schedule its own task continuation - an exception will be thrown. In this case, ongoing tasks need to quit as soon as possible, allowing their underlying executors to quit. The timer queue will also be shut down, cancelling all running timers.  With this RAII style of code, no tasks can be processed before the creation of the runtime object, and while/after the runtime gets out of scope.
 This frees concurrent applications from needing to communicate termination messages explicitly. Tasks are free use executors as long as the runtime object is alive.
 
 #### `runtime` API
@@ -1941,20 +2136,20 @@ class runtime {
 
 #### Creating user-defined executors
 
-As mentioned before, Applications can create their own custom executor type by inheriting the `derivable_executor` class.
+Applications can create their own custom executor type by inheriting the `derivable_executor` class.
 There are a few points to consider when implementing user defined executors:
 The most important thing is to remember that executors are used from multiple threads, so implemented methods must be thread-safe.
 
-New executors can be created using `runtime::make_executor`. Applications must not create new executors with plain instantiation (such as `std::make_shared` or plain `new`), only by using  `runtime::make_executor`. Also, applications must not try to re-instantiate the built-in concurrencpp executors, like the `thread_pool_executor` or the `thread_executor`, those executors must only be accessed through their existing instance in the runtime object.
+New executors can be created using `runtime::make_executor`. Applications must not create new executors with plain instantiation (such as `std::make_shared` or plain `new`), only by using  `runtime::make_executor`. Also, applications must not try to re-instantiate the built-in concurrencpp executors, like the `thread_pool_executor` or the `thread_executor`, those executors must only be accessed through their existing instances in the runtime object.
 
 Another important point is to handle shutdown correctly: `shutdown`, `shutdown_requested` and `enqueue` should all monitor the executor state and behave accordingly when invoked:
 * `shutdown` should tell underlying threads to quit and then join them.
- * `shutdown` might be called multiple times, and the method must handle this scenario by ignoring any subsequent call to `shutdown` after the first invocation.
+* `shutdown` might be called multiple times, and the method must handle this scenario by ignoring any subsequent calls to `shutdown` after the first invocation.
 * `enqueue` must throw a `concurrencpp::errors::runtime_shutdown` exception if `shutdown` had been called before.
 
 #### `task` objects
  
-Implementing executors is one of the rare cases applications need to work with `concurrencpp::task` class directly. `concurrencpp::task` is a `std::function` like object, but with a few differences.
+Implementing executors is one of the rare cases where applications need to work with `concurrencpp::task` class directly. `concurrencpp::task` is an `std::function` like object, but with a few differences.
 Like `std::function`, the task object stores a callable that acts as the asynchronous operation.
 Unlike `std::function`, `task` is a move only type. On invocation, task objects receive no parameters and return `void`. Moreover, every task object can be invoked only once. After the first invocation, the task object becomes empty.
 Invoking an empty task object is equivalent to invoking an empty lambda (`[]{}`), and will not throw any exception.
@@ -2025,9 +2220,11 @@ Task objects try to use different methods to optimize the usage of the stored ty
 
 };
 ```
-When implementing user-defined executors, it is up to the implementation to store tasks (when `enqueue` is called), and execute them according to the executor inner-mechanism.
+When implementing user-defined executors, it is up to the implementation to store `task` objects (when `enqueue` is called), and execute them according to the executor inner-mechanism.
+
+#### Example: writing a user-defined executor:
 
-#### *Example: using a user-defined executor:*
+In this example, we create an executor which logs actions like enqueuing tasks or executing them. We implement the `executor` interface, and we request the runtime to create and store an instance of it by calling `runtime::make_executor`. The rest of the application behaves exactly the same as if we were to use non user-defined executors.
 
 ```cpp
 #include "concurrencpp/concurrencpp.h"
@@ -2144,12 +2341,10 @@ int main() {
 }
 ```
 
-In this example, we created an executor which logs actions like enqueuing a task or executing it. We implement the `executor` interface, and we request the runtime to create and store an instance of it by calling `runtime::make_executor`. The rest of the application behaves exactly the same as if we were to use non user-defined executors.
-
 ### Supported platforms and tools
 
 * **Operating systems:** Linux, macOS, Windows (Windows 10 and above)
-* **Compilers:** MSVC (Visual Studio 2019 version 16.8.2 and above), Clang (Clang-11 and above)
+* **Compilers:** MSVC (Visual Studio 2019 version 16.8.2 and above), Clang 14+, Clang 11-13 with libc++
 * **Tools:** CMake (3.16 and above) 
 
 ### Building, installing and testing
@@ -2194,8 +2389,14 @@ $ cmake --build build/test
 $ cd build/test
 $ ctest . -V
 ```
+##### Important note regarding Linux and libc++
+When compiling on Linux, the library tries to use `libstdc++` by default. If you intend to use `libc++` as your standard library implementation, `CMAKE_TOOLCHAIN_FILE` flag should be specified as below: 
 
-##### Via package managers on Windows and *nix platforms
+```cmake
+$ cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/libc++.cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/lib
+```
+
+##### Installing concurrencpp with vcpkg or Conan
 
 Alternatively to building and installing the library manually, developers may get stable releases of concurrencpp via the [vcpkg](https://vcpkg.io/) and [Conan](https://conan.io/) package managers:
 
@@ -2222,4 +2423,4 @@ $ cmake -S sandbox -B build/sandbox
   #for release mode: cmake -DCMAKE_BUILD_TYPE=Release -S sandbox -B build/sandbox
 $ cmake --build build/sandbox  
 $ ./build/sandbox #runs the sandbox
-```
\ No newline at end of file
+```
diff --git a/cmake/ciBuild.cmake b/cmake/ciBuild.cmake
deleted file mode 100644
index 7389570c..00000000
--- a/cmake/ciBuild.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-# Don't ignore empty list elements
-cmake_policy(SET CMP0007 NEW)
-
-set(args "")
-foreach(n RANGE ${CMAKE_ARGC})
-  if(NOT "${CMAKE_ARGV${n}}" STREQUAL "")
-    list(APPEND args "${CMAKE_ARGV${n}}")
-  endif()
-endforeach()
-
-list(FIND args "--" index)
-if(index EQUAL -1)
-  message(FATAL_ERROR "No -- divider found in arguments list")
-else()
-  set(temp "${args}")
-  math(EXPR index "${index} + 1")
-  list(SUBLIST temp ${index} -1 args)
-endif()
-
-list(POP_FRONT args source build os cmake ninja cores)
-
-include(cmake/exec.cmake)
-include(cmake/setCiVars.cmake)
-
-exec(${cmake} -S ${source} -B ${build} -G Ninja -D CMAKE_MAKE_PROGRAM=${ninja}
--D CMAKE_BUILD_TYPE=RelWithDebInfo -D CMAKE_INSTALL_PREFIX=build/prefix ${flags} ${args})
-
-exec(${cmake} --build ${build} --config RelWithDebInfo -j ${cores})
diff --git a/cmake/ciToolsUpdate.cmake b/cmake/ciToolsUpdate.cmake
deleted file mode 100644
index 85fb0f16..00000000
--- a/cmake/ciToolsUpdate.cmake
+++ /dev/null
@@ -1,53 +0,0 @@
-set(cmake_version $ENV{CMAKE_VERSION})
-set(ninja_version $ENV{NINJA_VERSION})
-
-if(RUNNER_OS STREQUAL "Windows")
-  set(ninja_suffix "win.zip")
-  set(cmake_suffix "win64-x64.zip")
-  set(cmake_dir "cmake-${cmake_version}-win64-x64/bin")
-elseif(RUNNER_OS STREQUAL "Linux")
-  set(ninja_suffix "linux.zip")
-  set(cmake_suffix "Linux-x86_64.tar.gz")
-  set(cmake_dir "cmake-${cmake_version}-Linux-x86_64/bin")
-elseif(RUNNER_OS STREQUAL "macOS")
-  set(ninja_suffix "mac.zip")
-  set(cmake_suffix "Darwin-x86_64.tar.gz")
-  set(cmake_dir "cmake-${cmake_version}-Darwin-x86_64/CMake.app/Contents/bin")
-endif()
-
-set(cmake_url "https://github.com/Kitware/CMake/releases/download/v${cmake_version}/cmake-${cmake_version}-${cmake_suffix}")
-file(DOWNLOAD "${cmake_url}" ./cmake.zip)
-execute_process(COMMAND ${CMAKE_COMMAND} -E tar xvf ./cmake.zip OUTPUT_QUIET)
-message(STATUS "Installed CMake")
-
-set(ninja_url "https://github.com/ninja-build/ninja/releases/download/v${ninja_version}/ninja-${ninja_suffix}")
-file(DOWNLOAD "${ninja_url}" ./ninja.zip)
-execute_process(COMMAND ${CMAKE_COMMAND} -E tar xvf ./ninja.zip OUTPUT_QUIET)
-message(STATUS "Installed Ninja")
-
-set(export_script "#!/bin/sh\n")
-
-file(TO_CMAKE_PATH "${CMAKE_SOURCE_DIR}/${cmake_dir}" cmake_dir)
-file(TO_CMAKE_PATH "${CMAKE_SOURCE_DIR}/ninja" ninja_out)
-
-function(echo MESSAGE)
-  execute_process(COMMAND ${CMAKE_COMMAND} -E echo "${MESSAGE}")
-endfunction()
-
-set(export_script "${export_script}export CTEST=\"${cmake_dir}/ctest\"\n")
-echo("::set-output name=ctest::${cmake_dir}/ctest")
-message(STATUS "ctest path: ${cmake_dir}/ctest")
-
-set(export_script "${export_script}export CMAKE=\"${cmake_dir}/cmake\"\n")
-echo("::set-output name=cmake::${cmake_dir}/cmake")
-message(STATUS "cmake path: ${cmake_dir}/cmake")
-
-set(export_script "${export_script}export NINJA=\"${ninja_out}\"\n")
-echo("::set-output name=ninja::${ninja_out}")
-message(STATUS "ninja path: ${ninja_out}")
-
-file(WRITE export.sh "${export_script}")
-
-if (NOT RUNNER_OS STREQUAL "Windows")
-  execute_process(COMMAND chmod +x ninja export.sh "${cmake_dir}/cmake" "${cmake_dir}/ctest")
-endif()
diff --git a/cmake/coroutineOptions.cmake b/cmake/coroutineOptions.cmake
index e6aef1cc..20f16b2a 100644
--- a/cmake/coroutineOptions.cmake
+++ b/cmake/coroutineOptions.cmake
@@ -2,11 +2,9 @@
 # current compiler doesn't support coroutines.
 #
 function(target_coroutine_options TARGET)
-  if(MSVC)
-    target_compile_options(${TARGET} PUBLIC /std:c++latest /permissive-)
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    target_compile_options(${TARGET} PUBLIC /permissive-)
   elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    target_compile_options(${TARGET} PUBLIC -stdlib=libc++ -fcoroutines-ts)
-    target_link_options(${TARGET} PUBLIC -stdlib=libc++)
     set_target_properties(${TARGET} PROPERTIES CXX_EXTENSIONS NO)
   else()
     message(FATAL_ERROR "Compiler not supported: ${CMAKE_CXX_COMPILER_ID}")
diff --git a/cmake/exec.cmake b/cmake/exec.cmake
deleted file mode 100644
index 0bb7e2d3..00000000
--- a/cmake/exec.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-function(exec)
-  set(args "")
-  foreach(arg IN LISTS ARGN)
-    string(FIND "${arg}" " " index)
-    if(index EQUAL -1)
-      list(APPEND args "${arg}")
-    else()
-      list(APPEND args "\"${arg}\"")
-    endif()
-  endforeach()
-
-  string(ASCII 27 Esc)
-  list(JOIN args " " args)
-  message(STATUS "${Esc}[36mExecuting: ${args}${Esc}[m")
-
-  execute_process(COMMAND ${ARGN} RESULT_VARIABLE result)
-
-  if(NOT result EQUAL 0)
-    message(FATAL_ERROR "${Esc}[1;31mBad exit status (${result})${Esc}[m")
-  endif()
-endfunction()
diff --git a/cmake/libc++.cmake b/cmake/libc++.cmake
new file mode 100644
index 00000000..1f4825f7
--- /dev/null
+++ b/cmake/libc++.cmake
@@ -0,0 +1,5 @@
+# Specify this file as CMAKE_TOOLCHAIN_FILE when invoking CMake with Clang
+# to link to libc++ instead of libstdc++
+
+string(APPEND CMAKE_CXX_FLAGS " -stdlib=libc++")
+string(APPEND CMAKE_EXE_LINKER_FLAGS " -stdlib=libc++ -lc++abi")
diff --git a/cmake/setCiVars.cmake b/cmake/setCiVars.cmake
deleted file mode 100644
index 5c5d91b6..00000000
--- a/cmake/setCiVars.cmake
+++ /dev/null
@@ -1,12 +0,0 @@
-if (os MATCHES "^windows")
-  execute_process(
-    COMMAND "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Auxiliary/Build/vcvars64.bat" && set
-    OUTPUT_FILE environment_script_output.txt
-  )
-  file(STRINGS environment_script_output.txt output_lines)
-  foreach(line IN LISTS output_lines)
-    if (line MATCHES "^([a-zA-Z0-9_-]+)=(.*)$")
-      set(ENV{${CMAKE_MATCH_1}} "${CMAKE_MATCH_2}")
-    endif()
-  endforeach()
-endif()
diff --git a/example/3_async_file_processing/source/main.cpp b/example/3_async_file_processing/source/main.cpp
index f7e618eb..5e63c9b4 100644
--- a/example/3_async_file_processing/source/main.cpp
+++ b/example/3_async_file_processing/source/main.cpp
@@ -21,9 +21,10 @@
    background_executor.
 */
 
+#include <cstring>
+#include <fstream>
 #include <iostream>
 #include <vector>
-#include <fstream>
 
 #include "concurrencpp/concurrencpp.h"
 
diff --git a/example/4_async_file_processing_version_2/source/main.cpp b/example/4_async_file_processing_version_2/source/main.cpp
index d20279f0..f295bfd0 100644
--- a/example/4_async_file_processing_version_2/source/main.cpp
+++ b/example/4_async_file_processing_version_2/source/main.cpp
@@ -12,9 +12,10 @@
         Both versions are identical in terms of functionality and the final outcome.
 */
 
+#include <cstring>
+#include <fstream>
 #include <iostream>
 #include <vector>
-#include <fstream>
 
 #include "concurrencpp/concurrencpp.h"
 
diff --git a/example/5_prime_number_finder/source/main.cpp b/example/5_prime_number_finder/source/main.cpp
index 9955b681..0cc4cce8 100644
--- a/example/5_prime_number_finder/source/main.cpp
+++ b/example/5_prime_number_finder/source/main.cpp
@@ -2,6 +2,7 @@
     In this example we will collect all prime numbers from 0 to 1,000,000 in a parallel manner, using parallel coroutines.
  */
 
+#include <cmath>
 #include <iostream>
 
 #include "concurrencpp/concurrencpp.h"
@@ -15,7 +16,7 @@ bool is_prime(int num) {
         return true;
     }
 
-    const auto range = static_cast<int>(sqrt(num));
+    const auto range = static_cast<int>(std::sqrt(num));
     if (num % 2 == 0 || num % 3 == 0) {
         return false;
     }
diff --git a/include/concurrencpp/concurrencpp.h b/include/concurrencpp/concurrencpp.h
index 02dd2fb6..075c2068 100644
--- a/include/concurrencpp/concurrencpp.h
+++ b/include/concurrencpp/concurrencpp.h
@@ -18,5 +18,6 @@
 #include "concurrencpp/results/generator.h"
 #include "concurrencpp/executors/executor_all.h"
 #include "concurrencpp/threads/async_lock.h"
+#include "concurrencpp/threads/async_condition_variable.h"
 
 #endif
diff --git a/include/concurrencpp/errors.h b/include/concurrencpp/errors.h
index 14417f8f..3965eb47 100644
--- a/include/concurrencpp/errors.h
+++ b/include/concurrencpp/errors.h
@@ -4,39 +4,43 @@
 #include <stdexcept>
 
 namespace concurrencpp::errors {
-    struct empty_object : public std::runtime_error {
+    struct CRCPP_API empty_object : public std::runtime_error {
         using runtime_error::runtime_error;
     };
 
-    struct empty_result : public empty_object {
+    struct CRCPP_API empty_result : public empty_object {
         using empty_object::empty_object;
     };
 
-    struct empty_result_promise : public empty_object {
+    struct CRCPP_API empty_result_promise : public empty_object {
         using empty_object::empty_object;
     };
 
-    struct empty_awaitable : public empty_object {
+    struct CRCPP_API empty_awaitable : public empty_object {
         using empty_object::empty_object;
     };
 
-    struct empty_timer : public empty_object {
+    struct CRCPP_API empty_timer : public empty_object {
         using empty_object::empty_object;
     };
 
-    struct empty_generator : public empty_object {
+    struct CRCPP_API empty_generator : public empty_object {
         using empty_object::empty_object;
     };
 
-    struct broken_task : public std::runtime_error {
+    struct CRCPP_API interrupted_task : public std::runtime_error {
         using runtime_error::runtime_error;
     };
 
-    struct result_already_retrieved : public std::runtime_error {
-        using runtime_error::runtime_error;
+    struct CRCPP_API broken_task : public interrupted_task {
+        using interrupted_task::interrupted_task;
+    };
+
+    struct CRCPP_API runtime_shutdown : public interrupted_task {
+        using interrupted_task::interrupted_task;
     };
 
-    struct runtime_shutdown : public std::runtime_error {
+    struct CRCPP_API result_already_retrieved : public std::runtime_error {
         using runtime_error::runtime_error;
     };
 }  // namespace concurrencpp::errors
diff --git a/include/concurrencpp/executors/derivable_executor.h b/include/concurrencpp/executors/derivable_executor.h
index a4e553bf..fe92d039 100644
--- a/include/concurrencpp/executors/derivable_executor.h
+++ b/include/concurrencpp/executors/derivable_executor.h
@@ -6,34 +6,29 @@
 
 namespace concurrencpp {
     template<class concrete_executor_type>
-    class derivable_executor : public executor {
+    struct CRCPP_API derivable_executor : public executor {
 
-       private:
-        concrete_executor_type& self() noexcept {
-            return *static_cast<concrete_executor_type*>(this);
-        }
-
-       public:
         derivable_executor(std::string_view name) : executor(name) {}
 
         template<class callable_type, class... argument_types>
         void post(callable_type&& callable, argument_types&&... arguments) {
-            return do_post(self(), std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...);
+            return do_post<concrete_executor_type>(std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...);
         }
 
         template<class callable_type, class... argument_types>
         auto submit(callable_type&& callable, argument_types&&... arguments) {
-            return do_submit(self(), std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...);
+            return do_submit<concrete_executor_type>(std::forward<callable_type>(callable),
+                                                     std::forward<argument_types>(arguments)...);
         }
 
         template<class callable_type>
         void bulk_post(std::span<callable_type> callable_list) {
-            return do_bulk_post(self(), callable_list);
+            return do_bulk_post<concrete_executor_type>(callable_list);
         }
 
         template<class callable_type, class return_type = std::invoke_result_t<callable_type>>
         std::vector<concurrencpp::result<return_type>> bulk_submit(std::span<callable_type> callable_list) {
-            return do_bulk_submit(self(), callable_list);
+            return do_bulk_submit<concrete_executor_type>(callable_list);
         }
     };
 }  // namespace concurrencpp
diff --git a/include/concurrencpp/executors/executor.h b/include/concurrencpp/executors/executor.h
index ed2d5857..673fee60 100644
--- a/include/concurrencpp/executors/executor.h
+++ b/include/concurrencpp/executors/executor.h
@@ -10,12 +10,12 @@
 #include <string_view>
 
 namespace concurrencpp::details {
-    [[noreturn]] void throw_runtime_shutdown_exception(std::string_view executor_name);
+    [[noreturn]] CRCPP_API void throw_runtime_shutdown_exception(std::string_view executor_name);
     std::string make_executor_worker_name(std::string_view executor_name);
 }  // namespace concurrencpp::details
 
 namespace concurrencpp {
-    class executor {
+    class CRCPP_API executor {
 
        private:
         template<class return_type, class executor_type, class callable_type, class... argument_types>
@@ -23,37 +23,58 @@ namespace concurrencpp {
             co_return callable(arguments...);
         }
 
+        struct accumulating_awaitable {
+            std::vector<concurrencpp::task>& accumulator;
+            bool m_interrupted = false;
+
+            accumulating_awaitable(std::vector<concurrencpp::task>& accumulator) noexcept : accumulator(accumulator) {}
+
+            constexpr bool await_ready() const noexcept {
+                return false;
+            }
+
+            void await_suspend(details::coroutine_handle<void> coro_handle) noexcept {
+                accumulator.emplace_back(details::await_via_functor(coro_handle, &m_interrupted));
+            }
+
+            void await_resume() const {
+                if (m_interrupted) {
+                    throw errors::broken_task("");
+                }
+            }
+        };
+
         template<class callable_type, class return_type = typename std::invoke_result_t<callable_type>>
-        static result<return_type> bulk_submit_bridge(details::executor_bulk_tag,
-                                                      std::vector<concurrencpp::task>& accumulator,
-                                                      callable_type callable) {
+        static result<return_type> bulk_submit_bridge(std::vector<concurrencpp::task>& accumulator, callable_type callable) {
+
+            co_await accumulating_awaitable(accumulator);
             co_return callable();
         }
 
        protected:
         template<class executor_type, class callable_type, class... argument_types>
-        static void do_post(executor_type& executor_ref, callable_type&& callable, argument_types&&... arguments) {
+        void do_post(callable_type&& callable, argument_types&&... arguments) {
             static_assert(std::is_invocable_v<callable_type, argument_types...>,
                           "concurrencpp::executor::post - <<callable_type>> is not invokable with <<argument_types...>>");
 
-            executor_ref.enqueue(
+            static_cast<executor_type*>(this)->enqueue(
                 details::bind_with_try_catch(std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...));
         }
 
         template<class executor_type, class callable_type, class... argument_types>
-        static auto do_submit(executor_type& executor_ref, callable_type&& callable, argument_types&&... arguments) {
+        auto do_submit(callable_type&& callable, argument_types&&... arguments) {
             static_assert(std::is_invocable_v<callable_type, argument_types...>,
                           "concurrencpp::executor::submit - <<callable_type>> is not invokable with <<argument_types...>>");
 
             using return_type = typename std::invoke_result_t<callable_type, argument_types...>;
             return submit_bridge<return_type>({},
-                                              executor_ref,
+                                              *static_cast<executor_type*>(this),
                                               std::forward<callable_type>(callable),
                                               std::forward<argument_types>(arguments)...);
         }
 
         template<class executor_type, class callable_type>
-        static void do_bulk_post(executor_type& executor_ref, std::span<callable_type> callable_list) {
+        void do_bulk_post(std::span<callable_type> callable_list) {
             assert(!callable_list.empty());
 
             std::vector<task> tasks;
@@ -64,12 +85,11 @@ namespace concurrencpp {
             }
 
             std::span<task> span = tasks;
-            executor_ref.enqueue(span);
+            static_cast<executor_type*>(this)->enqueue(span);
         }
 
         template<class executor_type, class callable_type, class return_type = std::invoke_result_t<callable_type>>
-        static std::vector<concurrencpp::result<return_type>> do_bulk_submit(executor_type& executor_ref,
-                                                                             std::span<callable_type> callable_list) {
+        std::vector<concurrencpp::result<return_type>> do_bulk_submit(std::span<callable_type> callable_list) {
             std::vector<task> accumulator;
             accumulator.reserve(callable_list.size());
 
@@ -77,12 +97,12 @@ namespace concurrencpp {
             results.reserve(callable_list.size());
 
             for (auto& callable : callable_list) {
-                results.emplace_back(bulk_submit_bridge<callable_type>({}, accumulator, std::move(callable)));
+                results.emplace_back(bulk_submit_bridge<callable_type>(accumulator, std::move(callable)));
             }
 
             assert(!accumulator.empty());
             std::span<task> span = accumulator;
-            executor_ref.enqueue(span);
+            static_cast<executor_type*>(this)->enqueue(span);
             return results;
         }
 
@@ -103,22 +123,22 @@ namespace concurrencpp {
 
         template<class callable_type, class... argument_types>
         void post(callable_type&& callable, argument_types&&... arguments) {
-            return do_post(*this, std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...);
+            return do_post<executor>(std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...);
         }
 
         template<class callable_type, class... argument_types>
         auto submit(callable_type&& callable, argument_types&&... arguments) {
-            return do_submit(*this, std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...);
+            return do_submit<executor>(std::forward<callable_type>(callable), std::forward<argument_types>(arguments)...);
         }
 
         template<class callable_type>
         void bulk_post(std::span<callable_type> callable_list) {
-            return do_bulk_post(*this, callable_list);
+            return do_bulk_post<executor>(callable_list);
         }
 
         template<class callable_type, class return_type = std::invoke_result_t<callable_type>>
         std::vector<concurrencpp::result<return_type>> bulk_submit(std::span<callable_type> callable_list) {
-            return do_bulk_submit(*this, callable_list);
+            return do_bulk_submit<executor>(callable_list);
         }
     };
 }  // namespace concurrencpp
diff --git a/include/concurrencpp/executors/inline_executor.h b/include/concurrencpp/executors/inline_executor.h
index 367df80e..7c23d6ca 100644
--- a/include/concurrencpp/executors/inline_executor.h
+++ b/include/concurrencpp/executors/inline_executor.h
@@ -5,7 +5,7 @@
 #include "concurrencpp/executors/constants.h"
 
 namespace concurrencpp {
-    class inline_executor final : public executor {
+    class CRCPP_API inline_executor final : public executor {
 
        private:
         std::atomic_bool m_abort;
diff --git a/include/concurrencpp/executors/manual_executor.h b/include/concurrencpp/executors/manual_executor.h
index 82247bc8..5b535eb1 100644
--- a/include/concurrencpp/executors/manual_executor.h
+++ b/include/concurrencpp/executors/manual_executor.h
@@ -5,10 +5,12 @@
 #include "concurrencpp/executors/derivable_executor.h"
 
 #include <deque>
+#include <mutex>
 #include <chrono>
+#include <condition_variable>
 
 namespace concurrencpp {
-    class alignas(CRCPP_CACHE_LINE_ALIGNMENT) manual_executor final : public derivable_executor<manual_executor> {
+    class CRCPP_API alignas(CRCPP_CACHE_LINE_ALIGNMENT) manual_executor final : public derivable_executor<manual_executor> {
 
        private:
         mutable std::mutex m_lock;
diff --git a/include/concurrencpp/executors/thread_executor.h b/include/concurrencpp/executors/thread_executor.h
index 3e5fcc53..52a61a89 100644
--- a/include/concurrencpp/executors/thread_executor.h
+++ b/include/concurrencpp/executors/thread_executor.h
@@ -11,7 +11,7 @@
 #include <condition_variable>
 
 namespace concurrencpp {
-    class alignas(CRCPP_CACHE_LINE_ALIGNMENT) thread_executor final : public derivable_executor<thread_executor> {
+    class CRCPP_API alignas(CRCPP_CACHE_LINE_ALIGNMENT) thread_executor final : public derivable_executor<thread_executor> {
 
        private:
         std::mutex m_lock;
diff --git a/include/concurrencpp/executors/thread_pool_executor.h b/include/concurrencpp/executors/thread_pool_executor.h
index fb22bf27..baf8aaee 100644
--- a/include/concurrencpp/executors/thread_pool_executor.h
+++ b/include/concurrencpp/executors/thread_pool_executor.h
@@ -3,7 +3,6 @@
 
 #include "concurrencpp/threads/thread.h"
 #include "concurrencpp/threads/cache_line.h"
-#include "concurrencpp/threads/binary_semaphore.h"
 #include "concurrencpp/executors/derivable_executor.h"
 
 #include <deque>
@@ -37,59 +36,11 @@ namespace concurrencpp::details {
 }  // namespace concurrencpp::details
 
 namespace concurrencpp::details {
-    class alignas(CRCPP_CACHE_LINE_ALIGNMENT) thread_pool_worker {
-
-       private:
-        std::deque<task> m_private_queue;
-        std::vector<size_t> m_idle_worker_list;
-        std::atomic_bool m_atomic_abort;
-        thread_pool_executor& m_parent_pool;
-        const size_t m_index;
-        const size_t m_pool_size;
-        const std::chrono::milliseconds m_max_idle_time;
-        const std::string m_worker_name;
-        alignas(CRCPP_CACHE_LINE_ALIGNMENT) std::mutex m_lock;
-        std::deque<task> m_public_queue;
-        binary_semaphore m_semaphore;
-        bool m_idle;
-        bool m_abort;
-        std::atomic_bool m_task_found_or_abort;
-        thread m_thread;
-
-        void balance_work();
-
-        bool wait_for_task(std::unique_lock<std::mutex>& lock);
-        bool drain_queue_impl();
-        bool drain_queue();
-
-        void work_loop();
-
-        void ensure_worker_active(bool first_enqueuer, std::unique_lock<std::mutex>& lock);
-
-       public:
-        thread_pool_worker(thread_pool_executor& parent_pool, size_t index, size_t pool_size, std::chrono::milliseconds max_idle_time);
-
-        thread_pool_worker(thread_pool_worker&& rhs) noexcept;
-        ~thread_pool_worker() noexcept;
-
-        void enqueue_foreign(concurrencpp::task& task);
-        void enqueue_foreign(std::span<concurrencpp::task> tasks);
-        void enqueue_foreign(std::deque<concurrencpp::task>::iterator begin, std::deque<concurrencpp::task>::iterator end);
-        void enqueue_foreign(std::span<concurrencpp::task>::iterator begin, std::span<concurrencpp::task>::iterator end);
-
-        void enqueue_local(concurrencpp::task& task);
-        void enqueue_local(std::span<concurrencpp::task> tasks);
-
-        void shutdown();
-
-        std::chrono::milliseconds max_worker_idle_time() const noexcept;
-
-        bool appears_empty() const noexcept;
-    };
+    class thread_pool_worker;
 }  // namespace concurrencpp::details
 
 namespace concurrencpp {
-    class alignas(CRCPP_CACHE_LINE_ALIGNMENT) thread_pool_executor final : public derivable_executor<thread_pool_executor> {
+    class CRCPP_API alignas(CRCPP_CACHE_LINE_ALIGNMENT) thread_pool_executor final : public derivable_executor<thread_pool_executor> {
 
         friend class details::thread_pool_worker;
 
@@ -108,6 +59,8 @@ namespace concurrencpp {
        public:
         thread_pool_executor(std::string_view pool_name, size_t pool_size, std::chrono::milliseconds max_idle_time);
 
+        ~thread_pool_executor() override;
+
         void enqueue(task task) override;
         void enqueue(std::span<task> tasks) override;
 
diff --git a/include/concurrencpp/executors/worker_thread_executor.h b/include/concurrencpp/executors/worker_thread_executor.h
index 93d118c1..92a3290a 100644
--- a/include/concurrencpp/executors/worker_thread_executor.h
+++ b/include/concurrencpp/executors/worker_thread_executor.h
@@ -3,14 +3,15 @@
 
 #include "concurrencpp/threads/thread.h"
 #include "concurrencpp/threads/cache_line.h"
-#include "concurrencpp/threads/binary_semaphore.h"
 #include "concurrencpp/executors/derivable_executor.h"
 
 #include <deque>
 #include <mutex>
+#include <semaphore>
 
 namespace concurrencpp {
-    class alignas(CRCPP_CACHE_LINE_ALIGNMENT) worker_thread_executor final : public derivable_executor<worker_thread_executor> {
+    class CRCPP_API alignas(CRCPP_CACHE_LINE_ALIGNMENT) worker_thread_executor final :
+        public derivable_executor<worker_thread_executor> {
 
        private:
         std::deque<task> m_private_queue;
@@ -18,7 +19,7 @@ namespace concurrencpp {
         details::thread m_thread;
         alignas(CRCPP_CACHE_LINE_ALIGNMENT) std::mutex m_lock;
         std::deque<task> m_public_queue;
-        details::binary_semaphore m_semaphore;
+        std::binary_semaphore m_semaphore;
         std::atomic_bool m_atomic_abort;
         bool m_abort;
 
diff --git a/include/concurrencpp/forward_declarations.h b/include/concurrencpp/forward_declarations.h
index cf9f0335..0fc97f53 100644
--- a/include/concurrencpp/forward_declarations.h
+++ b/include/concurrencpp/forward_declarations.h
@@ -32,6 +32,7 @@ namespace concurrencpp {
     class generator;
 
     class async_lock;
+    class async_condition_variable;
 }  // namespace concurrencpp
 
 #endif  // FORWARD_DECLARATIONS_H
diff --git a/include/concurrencpp/platform_defs.h b/include/concurrencpp/platform_defs.h
index 518e480c..3469b1c6 100644
--- a/include/concurrencpp/platform_defs.h
+++ b/include/concurrencpp/platform_defs.h
@@ -25,6 +25,20 @@
 #    define CRCPP_DEBUG_MODE
 #endif
 
+#if defined(CRCPP_WIN_OS)
+#    if defined(CRCPP_EXPORT_API)
+#        define CRCPP_API __declspec(dllexport)
+#    elif defined(CRCPP_IMPORT_API)
+#        define CRCPP_API __declspec(dllimport)
+#    endif
+#elif (defined(CRCPP_EXPORT_API) || defined(CRCPP_IMPORT_API)) && __has_cpp_attribute(gnu::visibility)
+#    define CRCPP_API __attribute__((visibility("default")))
+#endif
+
+#if !defined(CRCPP_API)
+#    define CRCPP_API
+#endif
+
 #include <exception>
 
 #if defined(_LIBCPP_VERSION)
diff --git a/include/concurrencpp/results/impl/consumer_context.h b/include/concurrencpp/results/impl/consumer_context.h
index b82b6cf2..807ec9e8 100644
--- a/include/concurrencpp/results/impl/consumer_context.h
+++ b/include/concurrencpp/results/impl/consumer_context.h
@@ -4,11 +4,11 @@
 #include "concurrencpp/coroutines/coroutine.h"
 #include "concurrencpp/results/result_fwd_declarations.h"
 
-#include <mutex>
-#include <condition_variable>
+#include <atomic>
+#include <semaphore>
 
 namespace concurrencpp::details {
-    class await_via_functor {
+    class CRCPP_API await_via_functor {
 
        private:
         coroutine_handle<void> m_caller_handle;
@@ -22,21 +22,7 @@ namespace concurrencpp::details {
         void operator()() noexcept;
     };
 
-    class wait_context {
-
-       private:
-        std::mutex m_lock;
-        std::condition_variable m_condition;
-        bool m_ready = false;
-
-       public:
-        void wait();
-        bool wait_for(size_t milliseconds);
-
-        void notify();
-    };
-
-    class when_any_context {
+    class CRCPP_API when_any_context {
 
        private:
         std::atomic<const result_state_base*> m_status;
@@ -56,26 +42,16 @@ namespace concurrencpp::details {
         bool resume_inline(result_state_base& completed_result) noexcept;
     };
 
-    class consumer_context {
+    class CRCPP_API consumer_context {
 
        private:
         enum class consumer_status { idle, await, wait_for, when_any };
 
         union storage {
             coroutine_handle<void> caller_handle;
-            std::shared_ptr<wait_context> wait_for_ctx;
+            std::shared_ptr<std::binary_semaphore> wait_for_ctx;
             std::shared_ptr<when_any_context> when_any_ctx;
 
-            template<class type, class... argument_type>
-            static void build(type& o, argument_type&&... arguments) noexcept {
-                new (std::addressof(o)) type(std::forward<argument_type>(arguments)...);
-            }
-
-            template<class type>
-            static void destroy(type& o) noexcept {
-                o.~type();
-            }
-
             storage() noexcept {}
             ~storage() noexcept {}
         };
@@ -93,7 +69,7 @@ namespace concurrencpp::details {
         void resume_consumer(result_state_base& self) const;
 
         void set_await_handle(coroutine_handle<void> caller_handle) noexcept;
-        void set_wait_for_context(const std::shared_ptr<wait_context>& wait_ctx) noexcept;
+        void set_wait_for_context(const std::shared_ptr<std::binary_semaphore>& wait_ctx) noexcept;
         void set_when_any_context(const std::shared_ptr<when_any_context>& when_any_ctx) noexcept;
     };
 }  // namespace concurrencpp::details
diff --git a/include/concurrencpp/results/impl/result_state.h b/include/concurrencpp/results/impl/result_state.h
index 3b9f5a3b..0e04b360 100644
--- a/include/concurrencpp/results/impl/result_state.h
+++ b/include/concurrencpp/results/impl/result_state.h
@@ -10,7 +10,7 @@
 #include <cassert>
 
 namespace concurrencpp::details {
-    class result_state_base {
+    class CRCPP_API result_state_base {
 
        public:
         enum class pc_state { idle, consumer_set, consumer_waiting, consumer_done, producer_done };
@@ -87,7 +87,7 @@ namespace concurrencpp::details {
                 return m_producer.status();
             }
 
-            const auto wait_ctx = std::make_shared<wait_context>();
+            const auto wait_ctx = std::make_shared<std::binary_semaphore>(0);
             m_consumer.set_wait_for_context(wait_ctx);
 
             auto expected_idle_state = pc_state::idle;
@@ -101,8 +101,7 @@ namespace concurrencpp::details {
                 return m_producer.status();
             }
 
-            const auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
-            if (wait_ctx->wait_for(static_cast<size_t>(ms + 1))) {
+            if (wait_ctx->try_acquire_for(duration + std::chrono::milliseconds(1))) {
                 assert_done();
                 return m_producer.status();
             }
diff --git a/include/concurrencpp/results/impl/shared_result_state.h b/include/concurrencpp/results/impl/shared_result_state.h
index 0a134d87..5c739b0a 100644
--- a/include/concurrencpp/results/impl/shared_result_state.h
+++ b/include/concurrencpp/results/impl/shared_result_state.h
@@ -21,7 +21,7 @@ namespace concurrencpp::details {
 }  // namespace concurrencpp::details
 
 namespace concurrencpp::details {
-    class shared_result_state_base {
+    class CRCPP_API shared_result_state_base {
 
        protected:
         std::atomic_bool m_ready {false};
diff --git a/include/concurrencpp/results/promises.h b/include/concurrencpp/results/promises.h
index d21850e9..b894b7b0 100644
--- a/include/concurrencpp/results/promises.h
+++ b/include/concurrencpp/results/promises.h
@@ -1,11 +1,12 @@
 #ifndef CONCURRENCPP_PROMISES_H
 #define CONCURRENCPP_PROMISES_H
 
-#include "concurrencpp/task.h"
 #include "concurrencpp/coroutines/coroutine.h"
-#include "concurrencpp/results/impl/result_state.h"
+#include "concurrencpp/executors/executor_all.h"
 #include "concurrencpp/results/impl/lazy_result_state.h"
+#include "concurrencpp/results/impl/result_state.h"
 #include "concurrencpp/results/impl/return_value_struct.h"
+#include "concurrencpp/task.h"
 
 #include <vector>
 
@@ -13,52 +14,39 @@
 #include "concurrencpp/errors.h"
 
 namespace concurrencpp::details {
-    struct coroutine_per_thread_data {
-        std::vector<task>* accumulator = nullptr;
-
-        static thread_local coroutine_per_thread_data s_tl_per_thread_data;
-    };
-
-    class initial_accumulating_awaiter : public suspend_always {
-       private:
-        bool m_interrupted = false;
-
-       public:
-        void await_suspend(coroutine_handle<void> handle) noexcept;
-        void await_resume() const;
-    };
-
     template<class executor_type>
     class initialy_rescheduled_promise {
 
-       protected:
-        static thread_local executor_type* s_tl_initial_executor;
+        executor_type& m_initial_executor;
 
         static_assert(
             std::is_base_of_v<concurrencpp::executor, executor_type>,
             "concurrencpp::initialy_rescheduled_promise<<executor_type>> - <<executor_type>> isn't driven from concurrencpp::executor.");
 
-       public:
-        template<class... argument_types>
-        initialy_rescheduled_promise(executor_tag, executor_type* executor_ptr, argument_types&&...) {
+        static executor_type& to_ref(executor_type* executor_ptr) {
             if (executor_ptr == nullptr) {
                 throw std::invalid_argument(consts::k_parallel_coroutine_null_exception_err_msg);
             }
 
-            s_tl_initial_executor = executor_ptr;
+            return *executor_ptr;
         }
 
+       public:
         template<class... argument_types>
-        initialy_rescheduled_promise(executor_tag, std::shared_ptr<executor_type> executor, argument_types&&... args) :
-            initialy_rescheduled_promise(executor_tag {}, executor.get(), std::forward<argument_types>(args)...) {}
+        initialy_rescheduled_promise(executor_tag, executor_type* executor_ptr, argument_types&&...) :
+            m_initial_executor(to_ref(executor_ptr)) {}
+
+        template<class... argument_types>
+        initialy_rescheduled_promise(executor_tag, executor_type& executor_ptr, argument_types&&...) :
+            m_initial_executor(executor_ptr) {}
 
         template<class... argument_types>
-        initialy_rescheduled_promise(executor_tag, executor_type& executor, argument_types&&... args) :
-            initialy_rescheduled_promise(executor_tag {}, std::addressof(executor), std::forward<argument_types>(args)...) {}
+        initialy_rescheduled_promise(executor_tag, std::shared_ptr<executor_type> executor, argument_types&&... args) :
+            initialy_rescheduled_promise(executor_tag {}, executor.get(), std::forward<argument_types>(args)...) {}
 
         template<class class_type, class... argument_types>
         initialy_rescheduled_promise(class_type&&, executor_tag, std::shared_ptr<executor_type> executor, argument_types&&... args) :
-            initialy_rescheduled_promise(executor_tag {}, executor.get(), std::forward<argument_types>(args)...) {}
+            initialy_rescheduled_promise(executor_tag {}, *executor, std::forward<argument_types>(args)...) {}
 
         class initial_scheduling_awaiter : public suspend_always {
 
@@ -66,9 +54,9 @@ namespace concurrencpp::details {
             bool m_interrupted = false;
 
            public:
-            void await_suspend(coroutine_handle<void> handle) {
-                auto executor = std::exchange(s_tl_initial_executor, nullptr);
-                executor->post(await_via_functor {handle, &m_interrupted});
+            template<class promise_type>
+            void await_suspend(coroutine_handle<promise_type> handle) {
+                handle.promise().m_initial_executor.post(await_via_functor {handle, &m_interrupted});
             }
 
             void await_resume() const {
@@ -83,27 +71,12 @@ namespace concurrencpp::details {
         }
     };
 
-    template<class executor_type>
-    thread_local executor_type* initialy_rescheduled_promise<executor_type>::s_tl_initial_executor = nullptr;
-
     struct initialy_resumed_promise {
         suspend_never initial_suspend() const noexcept {
             return {};
         }
     };
 
-    struct bulk_promise {
-        template<class... argument_types>
-        bulk_promise(executor_bulk_tag, std::vector<concurrencpp::task>& accumulator, argument_types&&...) {
-            assert(coroutine_per_thread_data::s_tl_per_thread_data.accumulator == nullptr);
-            coroutine_per_thread_data::s_tl_per_thread_data.accumulator = &accumulator;
-        }
-
-        initial_accumulating_awaiter initial_suspend() const noexcept {
-            return {};
-        }
-    };
-
     struct null_result_promise {
         null_result get_return_object() const noexcept {
             return {};
@@ -172,15 +145,6 @@ namespace concurrencpp::details {
         public result_coro_promise<return_type> {
         using initialy_rescheduled_promise<executor_type>::initialy_rescheduled_promise;
     };
-
-    struct bulk_null_result_promise : public bulk_promise, public null_result_promise {
-        using bulk_promise::bulk_promise;
-    };
-
-    template<class return_type>
-    struct bulk_result_promise : public bulk_promise, public result_coro_promise<return_type> {
-        using bulk_promise::bulk_promise;
-    };
 }  // namespace concurrencpp::details
 
 namespace CRCPP_COROUTINE_NAMESPACE {
@@ -228,24 +192,6 @@ namespace CRCPP_COROUTINE_NAMESPACE {
         using promise_type = concurrencpp::details::initialy_rescheduled_result_promise<type, executor_type>;
     };
 
-    // Bulk + no result
-    template<class... arguments>
-    struct coroutine_traits<::concurrencpp::null_result,
-                            concurrencpp::details::executor_bulk_tag,
-                            std::vector<concurrencpp::task>&,
-                            arguments...> {
-        using promise_type = concurrencpp::details::bulk_null_result_promise;
-    };
-
-    // Bulk + result
-    template<class type, class... arguments>
-    struct coroutine_traits<::concurrencpp::result<type>,
-                            concurrencpp::details::executor_bulk_tag,
-                            std::vector<concurrencpp::task>&,
-                            arguments...> {
-        using promise_type = concurrencpp::details::bulk_result_promise<type>;
-    };
-
     // Lazy
     template<class type, class... arguments>
     struct coroutine_traits<::concurrencpp::lazy_result<type>, arguments...> {
diff --git a/include/concurrencpp/results/result.h b/include/concurrencpp/results/result.h
index e9320299..ea487597 100644
--- a/include/concurrencpp/results/result.h
+++ b/include/concurrencpp/results/result.h
@@ -103,33 +103,37 @@ namespace concurrencpp {
                       "concurrencpp::result_promise<type> - <<type>> should be now-throw-move constructable or void.");
 
        private:
-        details::producer_result_state_ptr<type> m_state;
-        bool m_result_retrieved;
+        details::producer_result_state_ptr<type> m_producer_state;
+        details::consumer_result_state_ptr<type> m_consumer_state;
 
         void throw_if_empty(const char* message) const {
-            if (!static_cast<bool>(m_state)) {
+            if (!static_cast<bool>(m_producer_state)) {
                 throw errors::empty_result_promise(message);
             }
         }
 
         void break_task_if_needed() noexcept {
-            if (!static_cast<bool>(m_state)) {
+            if (!static_cast<bool>(m_producer_state)) {
                 return;
             }
 
-            if (!m_result_retrieved) {  // no result to break.
+            if (static_cast<bool>(m_consumer_state)) {  // no result to break.
                 return;
             }
 
             auto exception_ptr = std::make_exception_ptr(errors::broken_task(details::consts::k_broken_task_exception_error_msg));
-            m_state->set_exception(exception_ptr);
-            m_state.reset();
+            m_producer_state->set_exception(exception_ptr);
+            m_producer_state.reset();
         }
 
        public:
-        result_promise() : m_state(new details::result_state<type>()), m_result_retrieved(false) {}
+        result_promise() {
+            m_producer_state.reset(new details::result_state<type>());
+            m_consumer_state.reset(m_producer_state.get());
+        }
 
-        result_promise(result_promise&& rhs) noexcept : m_state(std::move(rhs.m_state)), m_result_retrieved(rhs.m_result_retrieved) {}
+        result_promise(result_promise&& rhs) noexcept :
+            m_producer_state(std::move(rhs.m_producer_state)), m_consumer_state(std::move(rhs.m_consumer_state)) {}
 
         ~result_promise() noexcept {
             break_task_if_needed();
@@ -138,8 +142,8 @@ namespace concurrencpp {
         result_promise& operator=(result_promise&& rhs) noexcept {
             if (this != &rhs) {
                 break_task_if_needed();
-                m_state = std::move(rhs.m_state);
-                m_result_retrieved = rhs.m_result_retrieved;
+                m_producer_state = std::move(rhs.m_producer_state);
+                m_consumer_state = std::move(rhs.m_consumer_state);
             }
 
             return *this;
@@ -149,7 +153,7 @@ namespace concurrencpp {
         result_promise& operator=(const result_promise&) = delete;
 
         explicit operator bool() const noexcept {
-            return static_cast<bool>(m_state);
+            return static_cast<bool>(m_producer_state);
         }
 
         template<class... argument_types>
@@ -159,8 +163,8 @@ namespace concurrencpp {
 
             throw_if_empty(details::consts::k_result_promise_set_result_error_msg);
 
-            m_state->set_result(std::forward<argument_types>(arguments)...);
-            m_state.reset();  // publishes the result
+            m_producer_state->set_result(std::forward<argument_types>(arguments)...);
+            m_producer_state.reset();  // publishes the result
         }
 
         void set_exception(std::exception_ptr exception_ptr) {
@@ -170,8 +174,8 @@ namespace concurrencpp {
                 throw std::invalid_argument(details::consts::k_result_promise_set_exception_null_exception_error_msg);
             }
 
-            m_state->set_exception(exception_ptr);
-            m_state.reset();  // publishes the result
+            m_producer_state->set_exception(exception_ptr);
+            m_producer_state.reset();  // publishes the result
         }
 
         template<class callable_type, class... argument_types>
@@ -183,19 +187,19 @@ namespace concurrencpp {
                 "result_promise::set_from_function() - function(args...) is not invokable or its return type can't be used to construct <<type>>");
 
             throw_if_empty(details::consts::k_result_promise_set_from_function_error_msg);
-            m_state->from_callable(details::bind(std::forward<callable_type>(callable), std::forward<argument_types>(args)...));
-            m_state.reset();  // publishes the result
+            m_producer_state->from_callable(
+                details::bind(std::forward<callable_type>(callable), std::forward<argument_types>(args)...));
+            m_producer_state.reset();  // publishes the result
         }
 
         result<type> get_result() {
             throw_if_empty(details::consts::k_result_get_error_msg);
 
-            if (m_result_retrieved) {
+            if (!static_cast<bool>(m_consumer_state)) {
                 throw errors::result_already_retrieved(details::consts::k_result_promise_get_result_already_retrieved_error_msg);
             }
 
-            m_result_retrieved = true;
-            return result<type>(m_state.get());
+            return result<type>(std::move(m_consumer_state));
         }
     };
 }  // namespace concurrencpp
diff --git a/include/concurrencpp/results/result_fwd_declarations.h b/include/concurrencpp/results/result_fwd_declarations.h
index 2a6fe782..20ff5822 100644
--- a/include/concurrencpp/results/result_fwd_declarations.h
+++ b/include/concurrencpp/results/result_fwd_declarations.h
@@ -47,8 +47,6 @@ namespace concurrencpp::details {
     template<class type>
     class lazy_result_state;
 
-    struct executor_bulk_tag {};
-
     class when_result_helper;
     struct shared_result_helper;
 }  // namespace concurrencpp::details
diff --git a/include/concurrencpp/results/resume_on.h b/include/concurrencpp/results/resume_on.h
index 6bae6c2f..a1e2c3f2 100644
--- a/include/concurrencpp/results/resume_on.h
+++ b/include/concurrencpp/results/resume_on.h
@@ -58,4 +58,4 @@ namespace concurrencpp {
     }
 }  // namespace concurrencpp
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/concurrencpp/runtime/constants.h b/include/concurrencpp/runtime/constants.h
index b87269cf..6120a2a7 100644
--- a/include/concurrencpp/runtime/constants.h
+++ b/include/concurrencpp/runtime/constants.h
@@ -12,7 +12,7 @@ namespace concurrencpp::details::consts {
 
     constexpr static unsigned int k_concurrencpp_version_major = 0;
     constexpr static unsigned int k_concurrencpp_version_minor = 1;
-    constexpr static unsigned int k_concurrencpp_version_revision = 5;
+    constexpr static unsigned int k_concurrencpp_version_revision = 6;
 }  // namespace concurrencpp::details::consts
 
 #endif
diff --git a/include/concurrencpp/runtime/runtime.h b/include/concurrencpp/runtime/runtime.h
index 4a337957..1e820a26 100644
--- a/include/concurrencpp/runtime/runtime.h
+++ b/include/concurrencpp/runtime/runtime.h
@@ -3,6 +3,7 @@
 
 #include "concurrencpp/runtime/constants.h"
 #include "concurrencpp/forward_declarations.h"
+#include "concurrencpp/platform_defs.h"
 
 #include <memory>
 #include <mutex>
@@ -10,7 +11,7 @@
 #include <chrono>
 
 namespace concurrencpp::details {
-    class executor_collection {
+    class CRCPP_API executor_collection {
 
        private:
         std::mutex m_lock;
@@ -23,7 +24,7 @@ namespace concurrencpp::details {
 }  // namespace concurrencpp::details
 
 namespace concurrencpp {
-    struct runtime_options {
+    struct CRCPP_API runtime_options {
         size_t max_cpu_threads;
         std::chrono::milliseconds max_thread_pool_executor_waiting_time;
 
@@ -38,7 +39,7 @@ namespace concurrencpp {
         runtime_options& operator=(const runtime_options&) noexcept = default;
     };
 
-    class runtime {
+    class CRCPP_API runtime {
 
        private:
         std::shared_ptr<inline_executor> m_inline_executor;
diff --git a/include/concurrencpp/task.h b/include/concurrencpp/task.h
index 84df1ef4..f5ab4873 100644
--- a/include/concurrencpp/task.h
+++ b/include/concurrencpp/task.h
@@ -34,7 +34,7 @@ namespace concurrencpp::details {
             return move_fn == nullptr;
         }
 
-        static constexpr bool trivially_destructable(decltype(destroy_fn) destroy_fn) noexcept {
+        static constexpr bool trivially_destructible(decltype(destroy_fn) destroy_fn) noexcept {
             return destroy_fn == nullptr;
         }
     };
@@ -92,13 +92,14 @@ namespace concurrencpp::details {
             void (*move_destroy_fn)(void* src, void* dst) noexcept = nullptr;
             void (*destroy_fn)(void* target) noexcept = nullptr;
 
-            if constexpr (std::is_trivially_copy_constructible_v<callable_type> && std::is_trivially_destructible_v<callable_type>) {
+            if constexpr (std::is_trivially_copy_constructible_v<callable_type> && std::is_trivially_destructible_v<callable_type> &&
+                          is_inlinable()) {
                 move_destroy_fn = nullptr;
             } else {
                 move_destroy_fn = move_destroy;
             }
 
-            if constexpr (std::is_trivially_destructible_v<callable_type>) {
+            if constexpr (std::is_trivially_destructible_v<callable_type> && is_inlinable()) {
                 destroy_fn = nullptr;
             } else {
                 destroy_fn = destroy;
@@ -174,41 +175,10 @@ namespace concurrencpp::details {
         static constexpr inline vtable s_vtable = make_vtable();
     };
 
-    class coroutine_handle_functor {
-
-       private:
-        coroutine_handle<void> m_coro_handle;
-
-       public:
-        coroutine_handle_functor() noexcept : m_coro_handle() {}
-
-        coroutine_handle_functor(const coroutine_handle_functor&) = delete;
-        coroutine_handle_functor& operator=(const coroutine_handle_functor&) = delete;
-
-        coroutine_handle_functor(coroutine_handle<void> coro_handle) noexcept : m_coro_handle(coro_handle) {}
-
-        coroutine_handle_functor(coroutine_handle_functor&& rhs) noexcept : m_coro_handle(std::exchange(rhs.m_coro_handle, {})) {}
-
-        ~coroutine_handle_functor() noexcept {
-            if (static_cast<bool>(m_coro_handle)) {
-                m_coro_handle.destroy();
-            }
-        }
-
-        void execute_destroy() noexcept {
-            auto coro_handle = std::exchange(m_coro_handle, {});
-            coro_handle();
-        }
-
-        void operator()() noexcept {
-            execute_destroy();
-        }
-    };
-
 }  // namespace concurrencpp::details
 
 namespace concurrencpp {
-    class task {
+    class CRCPP_API task {
 
        private:
         alignas(std::max_align_t) std::byte m_buffer[details::task_constants::buffer_size];
@@ -230,9 +200,12 @@ namespace concurrencpp {
             return vtable == &details::callable_vtable<callable_type>::s_vtable;
         }
 
+        bool contains_coroutine_handle() const noexcept;
+
        public:
         task() noexcept;
         task(task&& rhs) noexcept;
+        task(details::coroutine_handle<void> coro_handle) noexcept;
 
         template<class callable_type>
         task(callable_type&& callable) {
@@ -257,7 +230,7 @@ namespace concurrencpp {
             using decayed_type = typename std::decay_t<callable_type>;
 
             if constexpr (std::is_same_v<decayed_type, details::coroutine_handle<void>>) {
-                return contains<details::coroutine_handle_functor>();
+                return contains_coroutine_handle();
             }
 
             return m_vtable == &details::callable_vtable<decayed_type>::s_vtable;
@@ -265,4 +238,4 @@ namespace concurrencpp {
     };
 }  // namespace concurrencpp
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/concurrencpp/threads/async_condition_variable.h b/include/concurrencpp/threads/async_condition_variable.h
new file mode 100644
index 00000000..4cfa28a7
--- /dev/null
+++ b/include/concurrencpp/threads/async_condition_variable.h
@@ -0,0 +1,82 @@
+#ifndef CONCURRENCPP_ASYNC_CONDITION_VARIABLE_H
+#define CONCURRENCPP_ASYNC_CONDITION_VARIABLE_H
+
+#include "concurrencpp/utils/slist.h"
+#include "concurrencpp/threads/async_lock.h"
+#include "concurrencpp/results/lazy_result.h"
+#include "concurrencpp/coroutines/coroutine.h"
+#include "concurrencpp/forward_declarations.h"
+
+namespace concurrencpp::details {
+    class CRCPP_API cv_awaiter {
+       private:
+        async_condition_variable& m_parent;
+        scoped_async_lock& m_lock;
+        coroutine_handle<void> m_caller_handle;
+
+       public:
+        cv_awaiter* next = nullptr;
+
+        cv_awaiter(async_condition_variable& parent, scoped_async_lock& lock) noexcept;
+
+        constexpr bool await_ready() const noexcept {
+            return false;
+        }
+
+        void await_suspend(details::coroutine_handle<void> caller_handle);
+        void await_resume() const noexcept {}
+        void resume() noexcept;
+    };
+}  // namespace concurrencpp::details
+
+namespace concurrencpp {
+    class CRCPP_API async_condition_variable {
+
+        friend details::cv_awaiter;
+
+       private:
+        template<class predicate_type>
+        lazy_result<void> await_impl(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock, predicate_type pred) {
+            while (true) {
+                assert(lock.owns_lock());
+                if (pred()) {
+                    break;
+                }
+
+                co_await await_impl(resume_executor, lock);
+            }
+        }
+
+       private:
+        std::mutex m_lock;
+        details::slist<details::cv_awaiter> m_awaiters;
+
+        static void verify_await_params(const std::shared_ptr<executor>& resume_executor, const scoped_async_lock& lock);
+
+        lazy_result<void> await_impl(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock);
+
+       public:
+        async_condition_variable() noexcept = default;
+        ~async_condition_variable() noexcept;
+
+        async_condition_variable(const async_condition_variable&) noexcept = delete;
+        async_condition_variable(async_condition_variable&&) noexcept = delete;
+
+        lazy_result<void> await(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock);
+
+        template<class predicate_type>
+        lazy_result<void> await(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock, predicate_type pred) {
+            static_assert(
+                std::is_invocable_r_v<bool, predicate_type>,
+                "concurrencpp::async_condition_variable::await - given predicate isn't invocable with no arguments, or does not return a type which is or convertible to bool.");
+
+            verify_await_params(resume_executor, lock);
+            return await_impl(std::move(resume_executor), lock, pred);
+        }
+
+        void notify_one();
+        void notify_all();
+    };
+}  // namespace concurrencpp
+
+#endif
\ No newline at end of file
diff --git a/include/concurrencpp/threads/async_lock.h b/include/concurrencpp/threads/async_lock.h
index cb77d870..7e03f51d 100644
--- a/include/concurrencpp/threads/async_lock.h
+++ b/include/concurrencpp/threads/async_lock.h
@@ -1,35 +1,57 @@
 #ifndef CONCURRENCPP_ASYNC_LOCK_H
 #define CONCURRENCPP_ASYNC_LOCK_H
 
+#include "concurrencpp/utils/slist.h"
 #include "concurrencpp/platform_defs.h"
+#include "concurrencpp/executors/executor.h"
 #include "concurrencpp/results/lazy_result.h"
 #include "concurrencpp/forward_declarations.h"
 
 namespace concurrencpp::details {
-    class async_lock_awaiter;
+    class async_lock_awaiter {
+
+        friend class concurrencpp::async_lock;
+
+       private:
+        async_lock& m_parent;
+        std::unique_lock<std::mutex> m_lock;
+        coroutine_handle<void> m_resume_handle;
+
+       public:
+        async_lock_awaiter* next = nullptr;
+
+       public:
+        async_lock_awaiter(async_lock& parent, std::unique_lock<std::mutex>& lock) noexcept;
+
+        constexpr bool await_ready() const noexcept {
+            return false;
+        }
+
+        void await_suspend(coroutine_handle<void> handle);
+
+        constexpr void await_resume() const noexcept {}
+
+        void retry() noexcept;
+    };
 }  // namespace concurrencpp::details
 
 namespace concurrencpp {
     class scoped_async_lock;
 
-    class async_lock {
+    class CRCPP_API async_lock {
 
         friend class scoped_async_lock;
         friend class details::async_lock_awaiter;
 
        private:
         std::mutex m_awaiter_lock;
-        details::async_lock_awaiter* m_head = nullptr;
-        details::async_lock_awaiter* m_tail = nullptr;
+        details::slist<details::async_lock_awaiter> m_awaiters;
         bool m_locked = false;
 
 #ifdef CRCPP_DEBUG_MODE
         std::atomic_intptr_t m_thread_count_in_critical_section {0};
 #endif
 
-        void enqueue_awaiter(std::unique_lock<std::mutex>& lock, details::async_lock_awaiter& awaiter_node) noexcept;
-        details::async_lock_awaiter* try_dequeue_awaiter(std::unique_lock<std::mutex>& lock) noexcept;
-
         lazy_result<scoped_async_lock> lock_impl(std::shared_ptr<executor> resume_executor, bool with_raii_guard);
 
        public:
@@ -40,7 +62,7 @@ namespace concurrencpp {
         void unlock();
     };
 
-    class scoped_async_lock {
+    class CRCPP_API scoped_async_lock {
 
        private:
         async_lock* m_lock = nullptr;
@@ -68,4 +90,4 @@ namespace concurrencpp {
     };
 }  // namespace concurrencpp
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/concurrencpp/threads/binary_semaphore.h b/include/concurrencpp/threads/binary_semaphore.h
deleted file mode 100644
index 72a94fd7..00000000
--- a/include/concurrencpp/threads/binary_semaphore.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef CONCURRENCPP_BINARY_SEMAPHORE_H
-#define CONCURRENCPP_BINARY_SEMAPHORE_H
-
-#include "concurrencpp/platform_defs.h"
-
-#if defined(CRCPP_MAC_OS) && defined(CRCPP_LIBCPP_LIB)
-
-#    include <mutex>
-#    include <chrono>
-#    include <condition_variable>
-
-#    include <cstddef>
-
-namespace concurrencpp::details {
-
-    class binary_semaphore {
-
-       private:
-        std::mutex m_lock;
-        std::condition_variable m_condition;
-        bool m_is_signaled;
-
-        bool try_acquire_until_impl(const std::chrono::time_point<std::chrono::system_clock>& abs_time);
-
-       public:
-        binary_semaphore(std::ptrdiff_t desired);
-
-        void release(std::ptrdiff_t update = 1);
-        void acquire();
-        bool try_acquire() noexcept;
-
-        template<class Rep, class Period>
-        bool try_acquire_for(const std::chrono::duration<Rep, Period>& rel_time) {
-            const auto deadline = std::chrono::system_clock::now() + rel_time;
-            return try_acquire_until_impl(deadline);
-        }
-
-        template<class Clock, class Duration>
-        bool try_acquire_until(const std::chrono::time_point<Clock, Duration>& abs_time) {
-            const auto src_now = Clock::now();
-            const auto dst_now = std::chrono::system_clock::now();
-            const auto deadline = dst_now + std::chrono::duration_cast<std::chrono::milliseconds>(abs_time - src_now);
-            return try_acquire_until_impl(deadline);
-        }
-    };
-
-}  // namespace concurrencpp::details
-
-#else
-
-#    include <semaphore>
-
-namespace concurrencpp::details {
-    using binary_semaphore = std::binary_semaphore;
-}
-
-#endif
-
-#endif
diff --git a/include/concurrencpp/threads/constants.h b/include/concurrencpp/threads/constants.h
index 92063072..2fb17acd 100644
--- a/include/concurrencpp/threads/constants.h
+++ b/include/concurrencpp/threads/constants.h
@@ -20,6 +20,12 @@ namespace concurrencpp::details::consts {
     inline const char* k_scoped_async_lock_unlock_invalid_lock_err_msg =
         "scoped_async_lock::unlock() - trying to unlock an unowned lock.";
 
+    inline const char* k_async_condition_variable_await_invalid_resume_executor_err_msg =
+        "async_condition_variable::await() - resume_executor is null.";
+
+    inline const char* k_async_condition_variable_await_lock_unlocked_err_msg =
+        "async_condition_variable::await() - lock is unlocked.";
+
 }  // namespace concurrencpp::details::consts
 
 #endif
\ No newline at end of file
diff --git a/include/concurrencpp/threads/thread.h b/include/concurrencpp/threads/thread.h
index 1e13c11c..b7d82d27 100644
--- a/include/concurrencpp/threads/thread.h
+++ b/include/concurrencpp/threads/thread.h
@@ -1,11 +1,13 @@
 #ifndef CONCURRENCPP_THREAD_H
 #define CONCURRENCPP_THREAD_H
 
+#include "concurrencpp/platform_defs.h"
+
 #include <string_view>
 #include <thread>
 
 namespace concurrencpp::details {
-    class thread {
+    class CRCPP_API thread {
 
        private:
         std::thread m_thread;
diff --git a/include/concurrencpp/timers/timer.h b/include/concurrencpp/timers/timer.h
index 6c4731c7..93a28f26 100644
--- a/include/concurrencpp/timers/timer.h
+++ b/include/concurrencpp/timers/timer.h
@@ -2,13 +2,14 @@
 #define CONCURRENCPP_TIMER_H
 
 #include "concurrencpp/forward_declarations.h"
+#include "concurrencpp/platform_defs.h"
 
 #include <atomic>
 #include <memory>
 #include <chrono>
 
 namespace concurrencpp::details {
-    class timer_state_base : public std::enable_shared_from_this<timer_state_base> {
+    class CRCPP_API timer_state_base : public std::enable_shared_from_this<timer_state_base> {
 
        public:
         using clock_type = std::chrono::high_resolution_clock;
@@ -110,7 +111,7 @@ namespace concurrencpp::details {
 }  // namespace concurrencpp::details
 
 namespace concurrencpp {
-    class timer {
+    class CRCPP_API timer {
 
        private:
         std::shared_ptr<details::timer_state_base> m_state;
diff --git a/include/concurrencpp/timers/timer_queue.h b/include/concurrencpp/timers/timer_queue.h
index 5570fa71..038fd4d5 100644
--- a/include/concurrencpp/timers/timer_queue.h
+++ b/include/concurrencpp/timers/timer_queue.h
@@ -21,7 +21,7 @@ namespace concurrencpp::details {
 }
 
 namespace concurrencpp {
-    class timer_queue : public std::enable_shared_from_this<timer_queue> {
+    class CRCPP_API timer_queue : public std::enable_shared_from_this<timer_queue> {
 
        public:
         using timer_ptr = std::shared_ptr<details::timer_state_base>;
diff --git a/include/concurrencpp/utils/slist.h b/include/concurrencpp/utils/slist.h
new file mode 100644
index 00000000..a41347e5
--- /dev/null
+++ b/include/concurrencpp/utils/slist.h
@@ -0,0 +1,65 @@
+#ifndef CONCURRENCPP_SLIST_H
+#define CONCURRENCPP_SLIST_H
+
+#include <cassert>
+
+namespace concurrencpp::details {
+    template<class node_type>
+    class slist {
+
+       private:
+        node_type* m_head = nullptr;
+        node_type* m_tail = nullptr;
+
+        void assert_state() const noexcept {
+            if (m_head == nullptr) {
+                assert(m_tail == nullptr);
+                return;
+            }
+
+            assert(m_tail != nullptr);
+        }
+
+       public:
+        slist() noexcept = default;
+
+        slist(slist&& rhs) noexcept : m_head(rhs.m_head), m_tail(rhs.m_tail) {
+            rhs.m_head = nullptr;
+            rhs.m_tail = nullptr;
+        }
+
+        bool empty() const noexcept {
+            assert_state();
+            return m_head == nullptr;
+        }
+
+        void push_back(node_type& node) noexcept {
+            assert_state();
+
+            if (m_head == nullptr) {
+                m_head = m_tail = &node;
+                return;
+            }
+
+            m_tail->next = &node;
+            m_tail = &node;
+        }
+
+        node_type* pop_front() noexcept {
+            assert_state();
+            const auto node = m_head;
+            if (node == nullptr) {
+                return nullptr;
+            }
+
+            m_head = m_head->next;
+            if (m_head == nullptr) {
+                m_tail = nullptr;
+            }
+
+            return node;
+        }
+    };
+}  // namespace concurrencpp::details
+
+#endif
\ No newline at end of file
diff --git a/source/executors/thread_pool_executor.cpp b/source/executors/thread_pool_executor.cpp
index 390b7e3b..c640f104 100644
--- a/source/executors/thread_pool_executor.cpp
+++ b/source/executors/thread_pool_executor.cpp
@@ -1,5 +1,6 @@
 #include "concurrencpp/executors/thread_pool_executor.h"
 
+#include <semaphore>
 #include <algorithm>
 
 using concurrencpp::thread_pool_executor;
@@ -7,22 +8,74 @@ using concurrencpp::details::idle_worker_set;
 using concurrencpp::details::thread_pool_worker;
 
 namespace concurrencpp::details {
-    struct thread_pool_per_thread_data {
-        thread_pool_worker* this_worker;
-        size_t this_thread_index;
-        const size_t this_thread_hashed_id;
-
-        static size_t calculate_hashed_id() noexcept {
-            const auto this_thread_id = thread::get_current_virtual_id();
-            const std::hash<size_t> hash;
-            return hash(this_thread_id);
-        }
+    namespace {
+        struct thread_pool_per_thread_data {
+            thread_pool_worker* this_worker;
+            size_t this_thread_index;
+            const size_t this_thread_hashed_id;
+
+            static size_t calculate_hashed_id() noexcept {
+                const auto this_thread_id = thread::get_current_virtual_id();
+                const std::hash<size_t> hash;
+                return hash(this_thread_id);
+            }
 
-        thread_pool_per_thread_data() noexcept :
-            this_worker(nullptr), this_thread_index(static_cast<size_t>(-1)), this_thread_hashed_id(calculate_hashed_id()) {}
-    };
+            thread_pool_per_thread_data() noexcept :
+                this_worker(nullptr), this_thread_index(static_cast<size_t>(-1)), this_thread_hashed_id(calculate_hashed_id()) {}
+        };
+
+        thread_local thread_pool_per_thread_data s_tl_thread_pool_data;
+    }  // namespace
+
+    class alignas(CRCPP_CACHE_LINE_ALIGNMENT) thread_pool_worker {
+
+       private:
+        std::deque<task> m_private_queue;
+        std::vector<size_t> m_idle_worker_list;
+        std::atomic_bool m_atomic_abort;
+        thread_pool_executor& m_parent_pool;
+        const size_t m_index;
+        const size_t m_pool_size;
+        const std::chrono::milliseconds m_max_idle_time;
+        const std::string m_worker_name;
+        alignas(CRCPP_CACHE_LINE_ALIGNMENT) std::mutex m_lock;
+        std::deque<task> m_public_queue;
+        std::binary_semaphore m_semaphore;
+        bool m_idle;
+        bool m_abort;
+        std::atomic_bool m_task_found_or_abort;
+        thread m_thread;
+
+        void balance_work();
+
+        bool wait_for_task(std::unique_lock<std::mutex>& lock);
+        bool drain_queue_impl();
+        bool drain_queue();
+
+        void work_loop();
+
+        void ensure_worker_active(bool first_enqueuer, std::unique_lock<std::mutex>& lock);
 
-    static thread_local thread_pool_per_thread_data s_tl_thread_pool_data;
+       public:
+        thread_pool_worker(thread_pool_executor& parent_pool, size_t index, size_t pool_size, std::chrono::milliseconds max_idle_time);
+
+        thread_pool_worker(thread_pool_worker&& rhs) noexcept;
+        ~thread_pool_worker() noexcept;
+
+        void enqueue_foreign(concurrencpp::task& task);
+        void enqueue_foreign(std::span<concurrencpp::task> tasks);
+        void enqueue_foreign(std::deque<concurrencpp::task>::iterator begin, std::deque<concurrencpp::task>::iterator end);
+        void enqueue_foreign(std::span<concurrencpp::task>::iterator begin, std::span<concurrencpp::task>::iterator end);
+
+        void enqueue_local(concurrencpp::task& task);
+        void enqueue_local(std::span<concurrencpp::task> tasks);
+
+        void shutdown();
+
+        std::chrono::milliseconds max_worker_idle_time() const noexcept;
+
+        bool appears_empty() const noexcept;
+    };
 }  // namespace concurrencpp::details
 
 idle_worker_set::idle_worker_set(size_t size) : m_approx_size(0), m_idle_flags(std::make_unique<padded_flag[]>(size)), m_size(size) {}
@@ -457,6 +510,8 @@ thread_pool_executor::thread_pool_executor(std::string_view pool_name, size_t po
     }
 }
 
+thread_pool_executor::~thread_pool_executor() = default;
+
 void thread_pool_executor::find_idle_workers(size_t caller_index, std::vector<size_t>& buffer, size_t max_count) noexcept {
     m_idle_workers.find_idle_workers(caller_index, buffer, max_count);
 }
@@ -562,4 +617,4 @@ void thread_pool_executor::shutdown() {
 
 std::chrono::milliseconds thread_pool_executor::max_worker_idle_time() const noexcept {
     return m_workers[0].max_worker_idle_time();
-}
\ No newline at end of file
+}
diff --git a/source/results/impl/consumer_context.cpp b/source/results/impl/consumer_context.cpp
index cfbf7116..a27ea785 100644
--- a/source/results/impl/consumer_context.cpp
+++ b/source/results/impl/consumer_context.cpp
@@ -2,12 +2,25 @@
 
 #include "concurrencpp/executors/executor.h"
 
-using concurrencpp::details::wait_context;
 using concurrencpp::details::when_any_context;
 using concurrencpp::details::consumer_context;
 using concurrencpp::details::await_via_functor;
 using concurrencpp::details::result_state_base;
 
+namespace concurrencpp::details {
+    namespace {
+        template<class type, class... argument_type>
+        void build(type& o, argument_type&&... arguments) noexcept {
+            new (std::addressof(o)) type(std::forward<argument_type>(arguments)...);
+        }
+
+        template<class type>
+        void destroy(type& o) noexcept {
+            o.~type();
+        }
+    }  // namespace
+}  // namespace concurrencpp::details
+
 /*
  * await_via_functor
  */
@@ -37,33 +50,6 @@ void await_via_functor::operator()() noexcept {
     m_caller_handle();
 }
 
-/*
- * wait_context
- */
-
-void wait_context::wait() {
-    std::unique_lock<std::mutex> lock(m_lock);
-    m_condition.wait(lock, [this] {
-        return m_ready;
-    });
-}
-
-bool wait_context::wait_for(size_t milliseconds) {
-    std::unique_lock<std::mutex> lock(m_lock);
-    return m_condition.wait_for(lock, std::chrono::milliseconds(milliseconds), [this] {
-        return m_ready;
-    });
-}
-
-void wait_context::notify() {
-    {
-        std::unique_lock<std::mutex> lock(m_lock);
-        m_ready = true;
-    }
-
-    m_condition.notify_all();
-}
-
 /*
  * when_any_context
  */
@@ -171,15 +157,15 @@ void consumer_context::destroy() noexcept {
         }
 
         case consumer_status::await: {
-            return storage::destroy(m_storage.caller_handle);
+            return details::destroy(m_storage.caller_handle);
         }
 
         case consumer_status::wait_for: {
-            return storage::destroy(m_storage.wait_for_ctx);
+            return details::destroy(m_storage.wait_for_ctx);
         }
 
         case consumer_status::when_any: {
-            return storage::destroy(m_storage.when_any_ctx);
+            return details::destroy(m_storage.when_any_ctx);
         }
     }
 
@@ -194,19 +180,19 @@ void consumer_context::clear() noexcept {
 void consumer_context::set_await_handle(coroutine_handle<void> caller_handle) noexcept {
     assert(m_status == consumer_status::idle);
     m_status = consumer_status::await;
-    storage::build(m_storage.caller_handle, caller_handle);
+    details::build(m_storage.caller_handle, caller_handle);
 }
 
-void consumer_context::set_wait_for_context(const std::shared_ptr<wait_context>& wait_ctx) noexcept {
+void consumer_context::set_wait_for_context(const std::shared_ptr<std::binary_semaphore>& wait_ctx) noexcept {
     assert(m_status == consumer_status::idle);
     m_status = consumer_status::wait_for;
-    storage::build(m_storage.wait_for_ctx, wait_ctx);
+    details::build(m_storage.wait_for_ctx, wait_ctx);
 }
 
 void consumer_context::set_when_any_context(const std::shared_ptr<when_any_context>& when_any_ctx) noexcept {
     assert(m_status == consumer_status::idle);
     m_status = consumer_status::when_any;
-    storage::build(m_storage.when_any_ctx, when_any_ctx);
+    details::build(m_storage.when_any_ctx, when_any_ctx);
 }
 
 void consumer_context::resume_consumer(result_state_base& self) const {
@@ -225,7 +211,7 @@ void consumer_context::resume_consumer(result_state_base& self) const {
         case consumer_status::wait_for: {
             const auto wait_ctx = m_storage.wait_for_ctx;
             assert(static_cast<bool>(wait_ctx));
-            return wait_ctx->notify();
+            return wait_ctx->release();
         }
 
         case consumer_status::when_any: {
diff --git a/source/results/promises.cpp b/source/results/promises.cpp
index 12b087dc..8b979fa1 100644
--- a/source/results/promises.cpp
+++ b/source/results/promises.cpp
@@ -1,21 +1,2 @@
 #include "concurrencpp/results/promises.h"
 #include "concurrencpp/coroutines/coroutine.h"
-
-using concurrencpp::details::coroutine_per_thread_data;
-
-thread_local coroutine_per_thread_data coroutine_per_thread_data::s_tl_per_thread_data;
-
-void concurrencpp::details::initial_accumulating_awaiter::await_suspend(coroutine_handle<void> handle) noexcept {
-    auto& per_thread_data = coroutine_per_thread_data::s_tl_per_thread_data;
-    auto accumulator = std::exchange(per_thread_data.accumulator, nullptr);
-
-    assert(accumulator != nullptr);
-    assert(accumulator->capacity() > accumulator->size());  // so it's always noexcept
-    accumulator->emplace_back(await_via_functor {handle, &m_interrupted});
-}
-
-void concurrencpp::details::initial_accumulating_awaiter::await_resume() const {
-    if (m_interrupted) {
-        throw errors::broken_task(consts::k_broken_task_exception_error_msg);
-    }
-}
diff --git a/source/runtime/runtime.cpp b/source/runtime/runtime.cpp
index 8bf47d18..ebda121b 100644
--- a/source/runtime/runtime.cpp
+++ b/source/runtime/runtime.cpp
@@ -13,15 +13,17 @@
 #include <algorithm>
 
 namespace concurrencpp::details {
-    size_t default_max_cpu_workers() noexcept {
-        return static_cast<size_t>(thread::hardware_concurrency() * consts::k_cpu_threadpool_worker_count_factor);
-    }
+    namespace {
+        size_t default_max_cpu_workers() noexcept {
+            return static_cast<size_t>(thread::hardware_concurrency() * consts::k_cpu_threadpool_worker_count_factor);
+        }
 
-    size_t default_max_background_workers() noexcept {
-        return static_cast<size_t>(thread::hardware_concurrency() * consts::k_background_threadpool_worker_count_factor);
-    }
+        size_t default_max_background_workers() noexcept {
+            return static_cast<size_t>(thread::hardware_concurrency() * consts::k_background_threadpool_worker_count_factor);
+        }
 
-    constexpr static auto k_default_max_worker_wait_time = std::chrono::seconds(consts::k_max_threadpool_worker_waiting_time_sec);
+        constexpr auto k_default_max_worker_wait_time = std::chrono::seconds(consts::k_max_threadpool_worker_waiting_time_sec);
+    }  // namespace
 }  // namespace concurrencpp::details
 
 using concurrencpp::runtime;
diff --git a/source/task.cpp b/source/task.cpp
index eff6e9fe..6660a128 100644
--- a/source/task.cpp
+++ b/source/task.cpp
@@ -11,6 +11,43 @@ static_assert(sizeof(task) == concurrencpp::details::task_constants::total_size,
 
 using concurrencpp::details::callable_vtable;
 using concurrencpp::details::await_via_functor;
+
+namespace concurrencpp::details {
+    namespace {
+        class coroutine_handle_functor {
+
+           private:
+            coroutine_handle<void> m_coro_handle;
+
+           public:
+            coroutine_handle_functor() noexcept : m_coro_handle() {}
+
+            coroutine_handle_functor(const coroutine_handle_functor&) = delete;
+            coroutine_handle_functor& operator=(const coroutine_handle_functor&) = delete;
+
+            coroutine_handle_functor(coroutine_handle<void> coro_handle) noexcept : m_coro_handle(coro_handle) {}
+
+            coroutine_handle_functor(coroutine_handle_functor&& rhs) noexcept : m_coro_handle(std::exchange(rhs.m_coro_handle, {})) {}
+
+            ~coroutine_handle_functor() noexcept {
+                if (static_cast<bool>(m_coro_handle)) {
+                    m_coro_handle.destroy();
+                }
+            }
+
+            void execute_destroy() noexcept {
+                auto coro_handle = std::exchange(m_coro_handle, {});
+                coro_handle();
+            }
+
+            void operator()() noexcept {
+                execute_destroy();
+            }
+        };
+    }  // namespace
+
+}  // namespace concurrencpp::details
+
 using concurrencpp::details::coroutine_handle_functor;
 
 void task::build(task&& rhs) noexcept {
@@ -40,12 +77,20 @@ void task::build(details::coroutine_handle<void> coro_handle) noexcept {
     build(details::coroutine_handle_functor {coro_handle});
 }
 
+bool task::contains_coroutine_handle() const noexcept {
+    return contains<details::coroutine_handle_functor>();
+}
+
 task::task() noexcept : m_buffer(), m_vtable(nullptr) {}
 
 task::task(task&& rhs) noexcept {
     build(std::move(rhs));
 }
 
+task::task(details::coroutine_handle<void> coro_handle) noexcept {
+    build(coro_handle);
+}
+
 task::~task() noexcept {
     clear();
 }
@@ -93,7 +138,7 @@ void task::clear() noexcept {
     }
 
     auto destroy_fn = vtable->destroy_fn;
-    if (vtable::trivially_destructable(destroy_fn)) {
+    if (vtable::trivially_destructible(destroy_fn)) {
         return;
     }
 
@@ -102,4 +147,4 @@ void task::clear() noexcept {
 
 task::operator bool() const noexcept {
     return m_vtable != nullptr;
-}
\ No newline at end of file
+}
diff --git a/source/threads/async_condition_variable.cpp b/source/threads/async_condition_variable.cpp
new file mode 100644
index 00000000..4953965c
--- /dev/null
+++ b/source/threads/async_condition_variable.cpp
@@ -0,0 +1,89 @@
+#include "concurrencpp/results/resume_on.h"
+#include "concurrencpp/threads/constants.h"
+#include "concurrencpp/threads/async_condition_variable.h"
+
+using concurrencpp::executor;
+using concurrencpp::lazy_result;
+using concurrencpp::scoped_async_lock;
+using concurrencpp::async_condition_variable;
+
+using concurrencpp::details::cv_awaiter;
+
+/*
+    cv_awaiter
+*/
+
+cv_awaiter::cv_awaiter(async_condition_variable& parent, scoped_async_lock& lock) noexcept : m_parent(parent), m_lock(lock) {}
+
+void cv_awaiter::await_suspend(details::coroutine_handle<void> caller_handle) {
+    m_caller_handle = caller_handle;
+
+    std::unique_lock<std::mutex> lock(m_parent.m_lock);
+    m_lock.unlock();
+
+    m_parent.m_awaiters.push_back(*this);
+}
+
+void cv_awaiter::resume() noexcept {
+    assert(static_cast<bool>(m_caller_handle));
+    assert(!m_caller_handle.done());
+    m_caller_handle();
+}
+
+/*
+    async_condition_variable
+*/
+
+async_condition_variable::~async_condition_variable() noexcept {
+#ifdef CRCPP_DEBUG_MODE
+    std::unique_lock<std::mutex> lock(m_lock);
+    assert(m_awaiters.empty() && "concurrencpp::async_condition_variable is deleted while being used.");
+#endif
+}
+
+void async_condition_variable::verify_await_params(const std::shared_ptr<executor>& resume_executor, const scoped_async_lock& lock) {
+    if (!static_cast<bool>(resume_executor)) {
+        throw std::invalid_argument(details::consts::k_async_condition_variable_await_invalid_resume_executor_err_msg);
+    }
+
+    if (!lock.owns_lock()) {
+        throw std::invalid_argument(details::consts::k_async_condition_variable_await_lock_unlocked_err_msg);
+    }
+}
+
+lazy_result<void> async_condition_variable::await_impl(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock) {
+    co_await details::cv_awaiter(*this, lock);
+    assert(!lock.owns_lock());
+    co_await resume_on(resume_executor);  // TODO: optimize this when get_current_executor is available
+    co_await lock.lock(resume_executor);
+}
+
+lazy_result<void> async_condition_variable::await(std::shared_ptr<executor> resume_executor, scoped_async_lock& lock) {
+    verify_await_params(resume_executor, lock);
+    return await_impl(std::move(resume_executor), lock);
+}
+
+void async_condition_variable::notify_one() {
+    std::unique_lock<std::mutex> lock(m_lock);
+    const auto awaiter = m_awaiters.pop_front();
+    lock.unlock();
+
+    if (awaiter != nullptr) {
+        awaiter->resume();
+    }
+}
+
+void async_condition_variable::notify_all() {
+    std::unique_lock<std::mutex> lock(m_lock);
+    auto awaiters = std::move(m_awaiters);
+    lock.unlock();
+
+    while (true) {
+        const auto awaiter = awaiters.pop_front();
+        if (awaiter == nullptr) {
+            return;  // no more awaiters
+        }
+
+        awaiter->resume();
+    }
+}
\ No newline at end of file
diff --git a/source/threads/async_lock.cpp b/source/threads/async_lock.cpp
index 8253edbe..347801e6 100644
--- a/source/threads/async_lock.cpp
+++ b/source/threads/async_lock.cpp
@@ -3,48 +3,36 @@
 #include "concurrencpp/threads/async_lock.h"
 #include "concurrencpp/executors/executor.h"
 
-namespace concurrencpp::details {
-    class async_lock_awaiter {
-
-        friend class concurrencpp::async_lock;
-
-       private:
-        async_lock& m_parent;
-        std::unique_lock<std::mutex> m_lock;
-        async_lock_awaiter* m_next = nullptr;
-        coroutine_handle<void> m_resume_handle;
+using concurrencpp::async_lock;
+using concurrencpp::scoped_async_lock;
+using concurrencpp::details::async_lock_awaiter;
 
-       public:
-        async_lock_awaiter(async_lock& parent, std::unique_lock<std::mutex>& lock) noexcept :
-            m_parent(parent), m_lock(std::move(lock)) {}
+/*
+    async_lock_awaiter
+*/
 
-        static bool await_ready() noexcept {
-            return false;
-        }
+async_lock_awaiter::async_lock_awaiter(async_lock& parent, std::unique_lock<std::mutex>& lock) noexcept :
+    m_parent(parent), m_lock(std::move(lock)) {}
 
-        void await_suspend(coroutine_handle<void> handle) {
-            assert(static_cast<bool>(handle));
-            assert(!handle.done());
-            assert(!static_cast<bool>(m_resume_handle));
-            assert(m_lock.owns_lock());
+void async_lock_awaiter::await_suspend(coroutine_handle<void> handle) {
+    assert(static_cast<bool>(handle));
+    assert(!handle.done());
+    assert(!static_cast<bool>(m_resume_handle));
+    assert(m_lock.owns_lock());
 
-            m_resume_handle = handle;
-            m_parent.enqueue_awaiter(m_lock, *this);
+    m_resume_handle = handle;
+    m_parent.m_awaiters.push_back(*this);
 
-            auto lock = std::move(m_lock);  // will unlock underlying lock
-        }
-
-        static void await_resume() noexcept {}
+    auto lock = std::move(m_lock);  // will unlock underlying lock
+}
 
-        void retry() noexcept {
-            m_resume_handle.resume();
-        }
-    };
-}  // namespace concurrencpp::details
+void async_lock_awaiter::retry() noexcept {
+    m_resume_handle.resume();
+}
 
-using concurrencpp::async_lock;
-using concurrencpp::scoped_async_lock;
-using concurrencpp::details::async_lock_awaiter;
+/*
+    async_lock
+*/
 
 async_lock::~async_lock() noexcept {
 #ifdef CRCPP_DEBUG_MODE
@@ -53,35 +41,6 @@ async_lock::~async_lock() noexcept {
 #endif
 }
 
-void async_lock::enqueue_awaiter(std::unique_lock<std::mutex>& lock, async_lock_awaiter& awaiter_node) noexcept {
-    assert(lock.owns_lock());
-
-    if (m_head == nullptr) {
-        assert(m_tail == nullptr);
-        m_head = m_tail = &awaiter_node;
-        return;
-    }
-
-    m_tail->m_next = &awaiter_node;
-    m_tail = &awaiter_node;
-}
-
-async_lock_awaiter* async_lock::try_dequeue_awaiter(std::unique_lock<std::mutex>& lock) noexcept {
-    assert(lock.owns_lock());
-
-    const auto node = m_head;
-    if (node == nullptr) {
-        return nullptr;
-    }
-
-    m_head = m_head->m_next;
-    if (m_head == nullptr) {
-        m_tail = nullptr;
-    }
-
-    return node;
-}
-
 concurrencpp::lazy_result<scoped_async_lock> async_lock::lock_impl(std::shared_ptr<executor> resume_executor, bool with_raii_guard) {
     auto resume_synchronously = true;  // indicates if the locking coroutine managed to lock the lock on first attempt
 
@@ -106,7 +65,7 @@ concurrencpp::lazy_result<scoped_async_lock> async_lock::lock_impl(std::shared_p
             std::unique_lock<std::mutex> lock(m_awaiter_lock);
             assert(m_locked);
             m_locked = false;
-            const auto awaiter = try_dequeue_awaiter(lock);
+            const auto awaiter = m_awaiters.pop_front();
             lock.unlock();
 
             if (awaiter != nullptr) {
@@ -175,7 +134,7 @@ void async_lock::unlock() {
     assert(current_count == 1);
 #endif
 
-    const auto awaiter = try_dequeue_awaiter(lock);
+    const auto awaiter = m_awaiters.pop_front();
     lock.unlock();
 
     if (awaiter != nullptr) {
@@ -266,4 +225,4 @@ async_lock* scoped_async_lock::release() noexcept {
 
 async_lock* scoped_async_lock::mutex() const noexcept {
     return m_lock;
-}
+}
\ No newline at end of file
diff --git a/source/threads/binary_semaphore.cpp b/source/threads/binary_semaphore.cpp
deleted file mode 100644
index 60b60f3d..00000000
--- a/source/threads/binary_semaphore.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "concurrencpp/threads/binary_semaphore.h"
-
-#if defined(CRCPP_MAC_OS) && defined(CRCPP_LIBCPP_LIB)
-
-#    include <cassert>
-
-using concurrencpp::details::binary_semaphore;
-
-binary_semaphore::binary_semaphore(std::ptrdiff_t desired) : m_is_signaled(desired != 0) {}
-
-void binary_semaphore::release(std::ptrdiff_t update) {
-    auto was_signaled = false;
-
-    {
-        std::unique_lock<std::mutex> lock(m_lock);
-        was_signaled = m_is_signaled;
-        m_is_signaled = true;
-    }
-
-    if (!was_signaled) {
-        m_condition.notify_one();
-    }
-}
-
-void binary_semaphore::acquire() {
-    std::unique_lock<std::mutex> lock(m_lock);
-    m_condition.wait(lock, [this] {
-        return m_is_signaled;
-    });
-
-    assert(m_is_signaled);
-    m_is_signaled = false;
-}
-
-bool binary_semaphore::try_acquire() noexcept {
-    std::unique_lock<std::mutex> lock(m_lock);
-    if (m_is_signaled) {
-        m_is_signaled = false;
-        return true;
-    }
-
-    return false;
-}
-
-bool binary_semaphore::try_acquire_until_impl(const std::chrono::time_point<std::chrono::system_clock>& abs_time) {
-    std::unique_lock<std::mutex> lock(m_lock);
-    m_condition.wait_until(lock, abs_time, [this] {
-        return m_is_signaled;
-    });
-
-    if (m_is_signaled) {
-        m_is_signaled = false;
-        return true;
-    }
-
-    return false;
-}
-
-#endif
diff --git a/source/threads/thread.cpp b/source/threads/thread.cpp
index 0cb9a9b2..b300853b 100644
--- a/source/threads/thread.cpp
+++ b/source/threads/thread.cpp
@@ -9,16 +9,18 @@
 using concurrencpp::details::thread;
 
 namespace concurrencpp::details {
-    std::uintptr_t generate_thread_id() noexcept {
-        static std::atomic_uintptr_t s_id_seed = 1;
-        return s_id_seed.fetch_add(1, std::memory_order_relaxed);
-    }
-
-    struct thread_per_thread_data {
-        const std::uintptr_t id = generate_thread_id();
-    };
-
-    static thread_local thread_per_thread_data s_tl_thread_per_data;
+    namespace {
+        std::uintptr_t generate_thread_id() noexcept {
+            static std::atomic_uintptr_t s_id_seed = 1;
+            return s_id_seed.fetch_add(1, std::memory_order_relaxed);
+        }
+
+        struct thread_per_thread_data {
+            const std::uintptr_t id = generate_thread_id();
+        };
+
+        thread_local thread_per_thread_data s_tl_thread_per_data;
+    }  // namespace
 }  // namespace concurrencpp::details
 
 std::thread::id thread::get_id() const noexcept {
diff --git a/source/timers/timer_queue.cpp b/source/timers/timer_queue.cpp
index 649db20e..3835be8f 100644
--- a/source/timers/timer_queue.cpp
+++ b/source/timers/timer_queue.cpp
@@ -20,125 +20,127 @@ using time_point = timer_queue::time_point;
 using request_queue = timer_queue::request_queue;
 
 namespace concurrencpp::details {
-    struct deadline_comparator {
-        bool operator()(const timer_ptr& a, const timer_ptr& b) const noexcept {
-            return a->get_deadline() < b->get_deadline();
-        }
-    };
-
-    class timer_queue_internal {
-        using timer_set = std::multiset<timer_ptr, deadline_comparator>;
-        using timer_set_iterator = typename timer_set::iterator;
-        using iterator_map = std::unordered_map<timer_ptr, timer_set_iterator>;
-
-       private:
-        timer_set m_timers;
-        iterator_map m_iterator_mapper;
-
-        void add_timer_internal(timer_ptr new_timer) {
-            assert(m_iterator_mapper.find(new_timer) == m_iterator_mapper.end());
-            auto timer_it = m_timers.emplace(new_timer);
-            m_iterator_mapper.emplace(std::move(new_timer), timer_it);
-        }
-
-        void remove_timer_internal(timer_ptr existing_timer) {
-            auto timer_it = m_iterator_mapper.find(existing_timer);
-            if (timer_it == m_iterator_mapper.end()) {
-                assert(existing_timer->is_oneshot() || existing_timer->cancelled());  // the timer was already deleted by
-                                                                                      // the queue when it was fired.
-                return;
+    namespace {
+        struct deadline_comparator {
+            bool operator()(const timer_ptr& a, const timer_ptr& b) const noexcept {
+                return a->get_deadline() < b->get_deadline();
             }
+        };
 
-            auto set_iterator = timer_it->second;
-            m_timers.erase(set_iterator);
-            m_iterator_mapper.erase(timer_it);
-        }
+        class timer_queue_internal {
+            using timer_set = std::multiset<timer_ptr, deadline_comparator>;
+            using timer_set_iterator = typename timer_set::iterator;
+            using iterator_map = std::unordered_map<timer_ptr, timer_set_iterator>;
 
-        void process_request_queue(request_queue& queue) {
-            for (auto& request : queue) {
-                auto& timer_ptr = request.first;
-                const auto opt = request.second;
+           private:
+            timer_set m_timers;
+            iterator_map m_iterator_mapper;
 
-                if (opt == timer_request::add) {
-                    add_timer_internal(std::move(timer_ptr));
-                } else {
-                    remove_timer_internal(std::move(timer_ptr));
-                }
+            void add_timer_internal(timer_ptr new_timer) {
+                assert(m_iterator_mapper.find(new_timer) == m_iterator_mapper.end());
+                auto timer_it = m_timers.emplace(new_timer);
+                m_iterator_mapper.emplace(std::move(new_timer), timer_it);
             }
-        }
-
-        void reset_containers_memory() noexcept {
-            assert(empty());
-            timer_set timers;
-            std::swap(m_timers, timers);
-            iterator_map iterator_mapper;
-            std::swap(m_iterator_mapper, iterator_mapper);
-        }
-
-       public:
-        bool empty() const noexcept {
-            assert(m_iterator_mapper.size() == m_timers.size());
-            return m_timers.empty();
-        }
-
-        ::time_point process_timers(request_queue& queue) {
-            process_request_queue(queue);
-
-            const auto now = high_resolution_clock::now();
 
-            while (true) {
-                if (m_timers.empty()) {
-                    break;
+            void remove_timer_internal(timer_ptr existing_timer) {
+                auto timer_it = m_iterator_mapper.find(existing_timer);
+                if (timer_it == m_iterator_mapper.end()) {
+                    assert(existing_timer->is_oneshot() || existing_timer->cancelled());  // the timer was already deleted by
+                                                                                          // the queue when it was fired.
+                    return;
                 }
 
-                timer_set temp_set;
+                auto set_iterator = timer_it->second;
+                m_timers.erase(set_iterator);
+                m_iterator_mapper.erase(timer_it);
+            }
 
-                auto first_timer_it = m_timers.begin();  // closest deadline
-                auto timer_ptr = *first_timer_it;
-                const auto is_oneshot = timer_ptr->is_oneshot();
+            void process_request_queue(request_queue& queue) {
+                for (auto& request : queue) {
+                    auto& timer_ptr = request.first;
+                    const auto opt = request.second;
 
-                if (!timer_ptr->expired(now)) {
-                    // if this timer is not expired, the next ones are guaranteed not to, as
-                    // the set is ordered by deadlines.
-                    break;
+                    if (opt == timer_request::add) {
+                        add_timer_internal(std::move(timer_ptr));
+                    } else {
+                        remove_timer_internal(std::move(timer_ptr));
+                    }
                 }
+            }
 
-                // we are going to modify the timer, so first we extract it
-                auto timer_node = m_timers.extract(first_timer_it);
+            void reset_containers_memory() noexcept {
+                assert(empty());
+                timer_set timers;
+                std::swap(m_timers, timers);
+                iterator_map iterator_mapper;
+                std::swap(m_iterator_mapper, iterator_mapper);
+            }
 
-                // we cannot use the naked node_handle according to the standard. it must
-                // be contained somewhere.
-                auto temp_it = temp_set.insert(std::move(timer_node));
+           public:
+            bool empty() const noexcept {
+                assert(m_iterator_mapper.size() == m_timers.size());
+                return m_timers.empty();
+            }
 
-                // we fire it only if it's not cancelled
-                const auto cancelled = timer_ptr->cancelled();
-                if (!cancelled) {
-                    (*temp_it)->fire();
+            ::time_point process_timers(request_queue& queue) {
+                process_request_queue(queue);
+
+                const auto now = high_resolution_clock::now();
+
+                while (true) {
+                    if (m_timers.empty()) {
+                        break;
+                    }
+
+                    timer_set temp_set;
+
+                    auto first_timer_it = m_timers.begin();  // closest deadline
+                    auto timer_ptr = *first_timer_it;
+                    const auto is_oneshot = timer_ptr->is_oneshot();
+
+                    if (!timer_ptr->expired(now)) {
+                        // if this timer is not expired, the next ones are guaranteed not to, as
+                        // the set is ordered by deadlines.
+                        break;
+                    }
+
+                    // we are going to modify the timer, so first we extract it
+                    auto timer_node = m_timers.extract(first_timer_it);
+
+                    // we cannot use the naked node_handle according to the standard. it must
+                    // be contained somewhere.
+                    auto temp_it = temp_set.insert(std::move(timer_node));
+
+                    // we fire it only if it's not cancelled
+                    const auto cancelled = timer_ptr->cancelled();
+                    if (!cancelled) {
+                        (*temp_it)->fire();
+                    }
+
+                    if (is_oneshot || cancelled) {
+                        m_iterator_mapper.erase(timer_ptr);
+                        continue;  // let the timer die inside temp_set
+                    }
+
+                    // regular timer, re-insert into the right position
+                    timer_node = temp_set.extract(temp_it);
+                    auto new_it = m_timers.insert(std::move(timer_node));
+                    // AppleClang doesn't have std::unordered_map::contains yet
+                    assert(m_iterator_mapper.find(timer_ptr) != m_iterator_mapper.end());
+                    m_iterator_mapper[timer_ptr] = new_it;  // update the iterator map, multiset::extract invalidates the
+                    // timer
                 }
 
-                if (is_oneshot || cancelled) {
-                    m_iterator_mapper.erase(timer_ptr);
-                    continue;  // let the timer die inside temp_set
+                if (m_timers.empty()) {
+                    reset_containers_memory();
+                    return now + std::chrono::hours(24);
                 }
 
-                // regular timer, re-insert into the right position
-                timer_node = temp_set.extract(temp_it);
-                auto new_it = m_timers.insert(std::move(timer_node));
-                // AppleClang doesn't have std::unordered_map::contains yet
-                assert(m_iterator_mapper.find(timer_ptr) != m_iterator_mapper.end());
-                m_iterator_mapper[timer_ptr] = new_it;  // update the iterator map, multiset::extract invalidates the
-                // timer
-            }
-
-            if (m_timers.empty()) {
-                reset_containers_memory();
-                return now + std::chrono::hours(24);
+                // get the closest deadline.
+                return (**m_timers.begin()).get_deadline();
             }
-
-            // get the closest deadline.
-            return (**m_timers.begin()).get_deadline();
-        }
-    };
+        };
+    }  // namespace
 }  // namespace concurrencpp::details
 
 timer_queue::timer_queue(milliseconds max_waiting_time) :
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 6d03670d..49e84a69 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,9 +1,18 @@
-cmake_minimum_required(VERSION 3.16)
+cmake_minimum_required(VERSION 3.24)
 
 project(concurrencppTests LANGUAGES CXX)
 
 include(../cmake/coroutineOptions.cmake)
 
+if(NOT DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY)
+  get_cmake_property(GENERATOR_IS_MULTI_CONFIG GENERATOR_IS_MULTI_CONFIG)
+  if(GENERATOR_IS_MULTI_CONFIG)
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/$<CONFIG>/bin)
+  else()
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
+  endif()
+endif()
+
 # ---- Add root project ----
 
 option(ENABLE_THREAD_SANITIZER "\
@@ -48,7 +57,7 @@ set(test_headers
         include/utils/test_ready_result.h
         include/utils/test_ready_lazy_result.h)
 
-add_library(concurrencpp_test_infra ${test_headers} ${test_sources})
+add_library(concurrencpp_test_infra STATIC ${test_headers} ${test_sources})
 
 target_compile_features(concurrencpp_test_infra PRIVATE cxx_std_20)
 target_include_directories(concurrencpp_test_infra PRIVATE "${PROJECT_SOURCE_DIR}/../include")
@@ -88,11 +97,8 @@ function(add_test)
   # Call the original add_test
   _add_test(NAME ${test_name} COMMAND ${target})
 
-  set_property(TEST ${test_name} PROPERTY RUN_SERIAL YES)
-
-  if(TEST_PROPERTIES)
-    set_tests_properties(${test_name} PROPERTIES ${TEST_PROPERTIES})
-  endif()
+  set_tests_properties(${test_name} PROPERTIES RUN_SERIAL YES
+                                               ${TEST_PROPERTIES})
 endfunction()
 
 add_test(NAME task_tests PATH source/tests/task_tests.cpp)
@@ -127,6 +133,7 @@ add_test(NAME coroutine_tests PATH source/tests/coroutine_tests/coroutine_tests.
 
 add_test(NAME async_lock_tests PATH source/tests/async_lock_tests.cpp)
 add_test(NAME scoped_async_lock_tests PATH source/tests/scoped_async_lock_tests.cpp)
+add_test(NAME async_condition_variable_tests PATH source/tests/async_condition_variable_tests.cpp)
 
 add_test(NAME timer_queue_tests PATH source/tests/timer_tests/timer_queue_tests.cpp)
 add_test(NAME timer_tests PATH source/tests/timer_tests/timer_tests.cpp)
@@ -145,3 +152,4 @@ add_test(NAME tsan_lazy_fibonacci PATH source/thread_sanitizer/lazy_fibonacci.cp
 add_test(NAME tsan_quick_sort PATH source/thread_sanitizer/quick_sort.cpp)
 add_test(NAME tsan_matrix_multiplication PATH source/thread_sanitizer/matrix_multiplication.cpp)
 add_test(NAME tsan_async_lock PATH source/thread_sanitizer/async_lock.cpp)
+add_test(NAME tsan_async_condition_variable PATH source/thread_sanitizer/async_condition_variable.cpp)
diff --git a/test/include/utils/test_ready_lazy_result.h b/test/include/utils/test_ready_lazy_result.h
index 0734192a..d8dfe92a 100644
--- a/test/include/utils/test_ready_lazy_result.h
+++ b/test/include/utils/test_ready_lazy_result.h
@@ -60,7 +60,7 @@ namespace concurrencpp::tests {
 
         try {
             co_await result;
-        } catch (custom_exception e) {
+        } catch (const custom_exception& e) {
             assert_equal(e.id, id);
             co_return;
         } catch (...) {
diff --git a/test/include/utils/test_ready_result.h b/test/include/utils/test_ready_result.h
index dadc0314..89b08ce6 100644
--- a/test/include/utils/test_ready_result.h
+++ b/test/include/utils/test_ready_result.h
@@ -124,7 +124,7 @@ namespace concurrencpp::tests {
 
         try {
             result.get();
-        } catch (custom_exception e) {
+        } catch (const custom_exception& e) {
             return assert_equal(e.id, id);
         } catch (...) {
         }
@@ -140,7 +140,7 @@ namespace concurrencpp::tests {
         for (size_t i = 0; i < 10; i++) {
             try {
                 result.get();
-            } catch (custom_exception e) {
+            } catch (const custom_exception& e) {
                 assert_equal(e.id, id);
                 if (i == 9) {
                     return;
diff --git a/test/source/tests/async_condition_variable_tests.cpp b/test/source/tests/async_condition_variable_tests.cpp
new file mode 100644
index 00000000..2f5b4b83
--- /dev/null
+++ b/test/source/tests/async_condition_variable_tests.cpp
@@ -0,0 +1,235 @@
+#include "concurrencpp/concurrencpp.h"
+
+#include "infra/tester.h"
+#include "infra/assertions.h"
+#include "utils/executor_shutdowner.h"
+
+#include "concurrencpp/threads/constants.h"
+
+using namespace concurrencpp;
+
+namespace concurrencpp::tests {
+    void test_async_condition_variable_await_null_resume_executor();
+    void test_async_condition_variable_await_unlocked_scoped_async_lock();
+    void test_async_condition_variable_await();
+
+    void test_async_condition_variable_await_pred_null_resume_executor();
+    void test_async_condition_variable_await_pred_unlocked_scoped_async_lock();
+    void test_async_condition_variable_await_pred();
+
+    void test_async_condition_variable_notify_one();
+    void test_async_condition_variable_notify_all();
+}  // namespace concurrencpp::tests
+
+using namespace concurrencpp::tests;
+
+void tests::test_async_condition_variable_await_null_resume_executor() {
+    async_lock lock;
+    async_condition_variable cv;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    auto scoped_lock = lock.lock(executor).run().get();
+
+    assert_throws_with_error_message<std::invalid_argument>(
+        [&] {
+            cv.await({}, scoped_lock).run().get();
+        },
+        concurrencpp::details::consts::k_async_condition_variable_await_invalid_resume_executor_err_msg);
+}
+
+void tests::test_async_condition_variable_await_unlocked_scoped_async_lock() {
+    async_condition_variable cv;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    assert_throws_with_error_message<std::invalid_argument>(
+        [&] {
+            scoped_async_lock sal;
+            cv.await(executor, sal).run().get();
+        },
+        concurrencpp::details::consts::k_async_condition_variable_await_lock_unlocked_err_msg);
+}
+
+void tests::test_async_condition_variable_await() {
+    test_async_condition_variable_await_null_resume_executor();
+    test_async_condition_variable_await_unlocked_scoped_async_lock();
+
+    async_lock lock;
+    async_condition_variable cv;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    auto task = [&]() -> result<void> {
+        auto sal = co_await lock.lock(executor);
+        co_await cv.await(executor, sal);
+    };
+
+    auto res = task();
+
+    for (size_t i = 0; i < 5; i++) {
+        assert_equal(res.status(), result_status::idle);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    cv.notify_one();
+    assert_equal(res.status(), result_status::value);
+    res.get();
+}
+
+void tests::test_async_condition_variable_await_pred_null_resume_executor() {
+    async_lock lock;
+    async_condition_variable cv;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    auto scoped_lock = lock.lock(executor).run().get();
+
+    assert_throws_with_error_message<std::invalid_argument>(
+        [&] {
+            cv.await({}, scoped_lock, [] {
+                return true;
+            });
+        },
+        concurrencpp::details::consts::k_async_condition_variable_await_invalid_resume_executor_err_msg);
+}
+
+void tests::test_async_condition_variable_await_pred_unlocked_scoped_async_lock() {
+    async_lock lock;
+    async_condition_variable cv;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    assert_throws_with_error_message<std::invalid_argument>(
+        [&] {
+            scoped_async_lock sal;
+            cv.await(executor, sal, [] {
+                return true;
+            });
+        },
+        concurrencpp::details::consts::k_async_condition_variable_await_lock_unlocked_err_msg);
+}
+
+void tests::test_async_condition_variable_await_pred() {
+    test_async_condition_variable_await_pred_null_resume_executor();
+    test_async_condition_variable_await_pred_unlocked_scoped_async_lock();
+
+    async_lock lock;
+    async_condition_variable cv;
+    auto running = true;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    auto task = [&]() -> result<void> {
+        auto sal = co_await lock.lock(executor);
+        co_await cv.await(executor, sal, [&] {
+            return !running;
+        });
+    };
+
+    auto res = task();
+
+    for (size_t i = 0; i < 5; i++) {
+        assert_equal(res.status(), result_status::idle);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    cv.notify_one();
+
+    for (size_t i = 0; i < 5; i++) {
+        assert_equal(res.status(), result_status::idle);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    auto task0 = [&]() -> result<void> {
+        auto sal = co_await lock.lock(executor);
+        running = false;
+    };
+
+    task0().get();
+
+    for (size_t i = 0; i < 5; i++) {
+        assert_equal(res.status(), result_status::idle);
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    cv.notify_one();
+
+    assert_equal(res.status(), result_status::value);
+    res.get();
+}
+
+void tests::test_async_condition_variable_notify_one() {
+    async_lock lock;
+    async_condition_variable cv;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    auto task = [&]() -> result<void> {
+        auto sal = co_await lock.lock(executor);
+        co_await cv.await(executor, sal);
+    };
+
+    std::vector<result<void>> results;
+    results.reserve(64);
+
+    for (size_t i = 0; i < 64; i++) {
+        results.emplace_back(task());
+    }
+
+    for (auto& result : results) {
+        assert_equal(result.status(), result_status::idle);
+    }
+
+    while (!results.empty()) {
+        cv.notify_one();
+        assert_equal(results[0].status(), result_status::value);
+
+        results.erase(results.begin());
+
+        for (const auto& result : results) {
+            assert_equal(result.status(), result_status::idle);
+        }
+    }
+}
+
+void tests::test_async_condition_variable_notify_all() {
+    async_lock lock;
+    async_condition_variable cv;
+    const auto executor = std::make_shared<inline_executor>();
+    executor_shutdowner es(executor);
+
+    auto task = [&]() -> result<void> {
+        auto sal = co_await lock.lock(executor);
+        co_await cv.await(executor, sal);
+    };
+
+    std::vector<result<void>> results;
+    results.reserve(64);
+
+    for (size_t i = 0; i < 64; i++) {
+        results.emplace_back(task());
+    }
+
+    for (auto& result : results) {
+        assert_equal(result.status(), result_status::idle);
+    }
+
+    cv.notify_all();
+
+    for (auto& result : results) {
+        assert_equal(result.status(), result_status::value);
+    }
+}
+
+int main() {
+    tester tester("async_condition_variable test");
+
+    tester.add_step("await", test_async_condition_variable_await);
+    tester.add_step("await + pred", test_async_condition_variable_await_pred);
+    tester.add_step("notify_one", test_async_condition_variable_notify_one);
+    tester.add_step("notify_all", test_async_condition_variable_notify_all);
+
+    tester.launch_test();
+    return 0;
+}
diff --git a/test/source/tests/executor_tests/thread_pool_executor_tests.cpp b/test/source/tests/executor_tests/thread_pool_executor_tests.cpp
index 859edc77..aba41a48 100644
--- a/test/source/tests/executor_tests/thread_pool_executor_tests.cpp
+++ b/test/source/tests/executor_tests/thread_pool_executor_tests.cpp
@@ -406,18 +406,18 @@ void concurrencpp::tests::test_thread_pool_executor_enqueue_algorithm() {
     {
         object_observer observer;
         const size_t worker_count = 6;
-        auto wc = std::make_shared<concurrencpp::details::wait_context>();
+        auto wc = std::make_shared<std::counting_semaphore<>>(0);
         auto executor = std::make_shared<thread_pool_executor>("threadpool", worker_count, std::chrono::seconds(10));
         executor_shutdowner shutdown(executor);
 
         for (size_t i = 0; i < worker_count; i++) {
             executor->post([wc, stub = observer.get_testing_stub()]() mutable {
-                wc->wait();  // make sure this thread is not idle by imitating endless work
+                wc->acquire();  // make sure this thread is not idle by imitating endless work
                 stub();
             });
         }
 
-        wc->notify();
+        wc->release(worker_count);
 
         observer.wait_execution_count(worker_count, std::chrono::seconds(6));
 
@@ -434,12 +434,12 @@ void concurrencpp::tests::test_thread_pool_executor_enqueue_algorithm() {
     // self
     {
         object_observer observer;
-        auto wc = std::make_shared<concurrencpp::details::wait_context>();
+        auto wc = std::make_shared<std::binary_semaphore>(0);
         auto executor = std::make_shared<thread_pool_executor>("threadpool", 2, std::chrono::seconds(10));
         executor_shutdowner shutdown(executor);
 
         executor->post([wc]() {
-            wc->wait();
+            wc->acquire();
         });
 
         constexpr size_t task_count = 1'024;
@@ -455,7 +455,7 @@ void concurrencpp::tests::test_thread_pool_executor_enqueue_algorithm() {
 
         assert_equal(observer.get_execution_map().size(), static_cast<size_t>(1));
 
-        wc->notify();
+        wc->release();
     }
 
     // case 3 : if (2) is false, choose a worker using round robin
@@ -463,13 +463,13 @@ void concurrencpp::tests::test_thread_pool_executor_enqueue_algorithm() {
         const size_t task_count = 4'024;
         const size_t worker_count = 4;
         object_observer observer;
-        auto wc = std::make_shared<concurrencpp::details::wait_context>();
+        auto wc = std::make_shared<std::counting_semaphore<>>(0);
         auto executor = std::make_shared<thread_pool_executor>("threadpool", worker_count, std::chrono::seconds(10));
         executor_shutdowner shutdown(executor);
 
         for (size_t i = 0; i < worker_count; i++) {
             executor->post([wc]() {
-                wc->wait();
+                wc->acquire();
             });
         }
 
@@ -478,7 +478,7 @@ void concurrencpp::tests::test_thread_pool_executor_enqueue_algorithm() {
             executor->post(observer.get_testing_stub());
         }
 
-        wc->notify();
+        wc->release(worker_count);
 
         observer.wait_execution_count(task_count, std::chrono::minutes(1));
         observer.wait_destruction_count(task_count, std::chrono::minutes(1));
diff --git a/test/source/tests/timer_tests/timer_tests.cpp b/test/source/tests/timer_tests/timer_tests.cpp
index 2c336125..3cc6107b 100644
--- a/test/source/tests/timer_tests/timer_tests.cpp
+++ b/test/source/tests/timer_tests/timer_tests.cpp
@@ -409,17 +409,17 @@ void concurrencpp::tests::test_timer_cancel_before_due_time() {
 
 void concurrencpp::tests::test_timer_cancel_after_due_time_before_beat() {
     object_observer observer;
-    concurrencpp::details::wait_context wc;
+    std::binary_semaphore wc(0);
     auto timer_queue = std::make_shared<concurrencpp::timer_queue>(120s);
     auto ex = std::make_shared<concurrencpp::inline_executor>();
 
     auto timer = timer_queue->make_timer(100ms, 200ms, ex, [&wc, stub = observer.get_testing_stub()]() mutable {
         stub();
-        wc.notify();
+        wc.release();
     });
 
     // will be released after the first beat.
-    wc.wait();
+    wc.acquire();
     timer.cancel();
 
     std::this_thread::sleep_for(2s);
@@ -431,7 +431,7 @@ void concurrencpp::tests::test_timer_cancel_after_due_time_before_beat() {
 
 void concurrencpp::tests::test_timer_cancel_after_due_time_after_beat() {
     object_observer observer;
-    concurrencpp::details::wait_context wc;
+    std::binary_semaphore wc(0);
     auto timer_queue = std::make_shared<concurrencpp::timer_queue>(120s);
     auto ex = std::make_shared<concurrencpp::inline_executor>();
     constexpr size_t max_invocation_count = 4;
@@ -442,12 +442,12 @@ void concurrencpp::tests::test_timer_cancel_after_due_time_after_beat() {
 
         const auto c = invocation_counter.fetch_add(1, std::memory_order_relaxed) + 1;
         if (c == max_invocation_count) {
-            wc.notify();
+            wc.release();
         }
     });
 
     // will be released after the first beat.
-    wc.wait();
+    wc.acquire();
     timer.cancel();
 
     std::this_thread::sleep_for(2s);
diff --git a/test/source/thread_sanitizer/async_condition_variable.cpp b/test/source/thread_sanitizer/async_condition_variable.cpp
new file mode 100644
index 00000000..77360ed8
--- /dev/null
+++ b/test/source/thread_sanitizer/async_condition_variable.cpp
@@ -0,0 +1,195 @@
+#include "concurrencpp/concurrencpp.h"
+
+#include <chrono>
+#include <algorithm>
+#include <iostream>
+
+void test_async_cv_await();
+void test_async_cv_await_pred();
+void test_async_cv_notify_one();
+void test_async_cv_notify_all();
+
+int main() {
+    std::cout << "Starting async_condition_variable test" << std::endl;
+    std::cout << "================================" << std::endl;
+
+    std::cout << "async_condition_variable::await test" << std::endl;
+    test_async_cv_await();
+    std::cout << "================================" << std::endl;
+
+    std::cout << "async_condition_variable::await(pred) test" << std::endl;
+    test_async_cv_await_pred();
+    std::cout << "================================" << std::endl;
+
+    std::cout << "async_condition_variable::notify_one test" << std::endl;
+    test_async_cv_notify_one();
+    std::cout << "================================" << std::endl;
+
+    std::cout << "async_condition_variable::notify_all test" << std::endl;
+    test_async_cv_notify_all();
+    std::cout << "================================" << std::endl;
+
+    std::cout << "done" << std::endl;
+    std::cout << "================================" << std::endl;
+}
+
+using namespace concurrencpp;
+using namespace std::chrono;
+
+void test_async_cv_await() {
+    concurrencpp::runtime runtime;
+
+    async_lock lock;
+    async_condition_variable cv;
+
+    constexpr size_t task_count = 512;
+    const auto deadline = system_clock::now() + seconds(5);
+
+    std::vector<result<void>> results;
+    results.reserve(task_count);
+
+    auto task = [&](executor_tag, std::shared_ptr<worker_thread_executor> te) -> result<void> {
+        std::this_thread::sleep_until(deadline);
+
+        auto guard = co_await lock.lock(te);
+        co_await cv.await(te, guard);
+    };
+
+    for (size_t i = 0; i < task_count; i++) {
+        auto wte = runtime.make_worker_thread_executor();
+        results.emplace_back(task({}, wte));
+    }
+
+    std::this_thread::sleep_until(deadline + std::chrono::seconds(5));
+
+    cv.notify_all();
+
+    for (auto& result : results) {
+        result.get();
+    }
+}
+
+void test_async_cv_await_pred() {
+    concurrencpp::runtime runtime;
+
+    int pred = 10;
+    async_lock lock;
+    async_condition_variable cv;
+
+    constexpr size_t task_count = 512;
+    const auto deadline = system_clock::now() + seconds(5);
+
+    std::vector<result<void>> results;
+    results.reserve(task_count);
+
+    auto task = [&](executor_tag, std::shared_ptr<worker_thread_executor> te) -> result<void> {
+        std::this_thread::sleep_until(deadline);
+
+        auto guard = co_await lock.lock(te);
+        co_await cv.await(te, guard, [&] {
+            return pred < 10;
+        });
+    };
+
+    for (size_t i = 0; i < task_count; i++) {
+        auto wte = runtime.make_worker_thread_executor();
+        results.emplace_back(task({}, wte));
+    }
+
+    std::this_thread::sleep_until(deadline + std::chrono::seconds(5));
+
+    cv.notify_all();
+
+    std::this_thread::sleep_for(std::chrono::seconds(5));
+
+    [&](executor_tag, std::shared_ptr<concurrencpp::thread_executor> te) -> result<void> {
+        auto guard = co_await lock.lock(te);
+        pred = -10;
+        guard.unlock();
+    }({}, runtime.thread_executor())
+                                                                                .get();
+
+    cv.notify_all();
+
+    for (auto& result : results) {
+        result.get();
+    }
+}
+
+void test_async_cv_notify_one() {
+    concurrencpp::runtime runtime;
+
+    async_lock lock;
+    async_condition_variable cv;
+
+    constexpr size_t task_count = 512;
+    const auto deadline = system_clock::now() + seconds(5);
+
+    std::vector<result<void>> results;
+    results.reserve(task_count * 2);
+
+    auto waiter_task = [&](executor_tag, std::shared_ptr<worker_thread_executor> te) -> result<void> {
+        std::this_thread::sleep_until(deadline);
+
+        auto guard = co_await lock.lock(te);
+        co_await cv.await(te, guard);
+    };
+
+    for (size_t i = 0; i < task_count; i++) {
+        auto wte = runtime.make_worker_thread_executor();
+        results.emplace_back(waiter_task({}, wte));
+    }
+
+    auto notifier_task = [&](executor_tag, std::shared_ptr<thread_executor> te) -> result<void> {
+        std::this_thread::sleep_until(deadline + milliseconds(1));
+        cv.notify_one();
+        co_return;
+    };
+
+    for (size_t i = 0; i < task_count; i++) {
+        results.emplace_back(notifier_task({}, runtime.thread_executor()));
+    }
+
+    for (auto& result : results) {
+        result.get();
+    }
+}
+
+void test_async_cv_notify_all() {
+    concurrencpp::runtime runtime;
+
+    async_lock lock;
+    async_condition_variable cv;
+
+    constexpr size_t task_count = 512;
+    const auto deadline = system_clock::now() + seconds(5);
+
+    std::vector<result<void>> results;
+    results.reserve(task_count * 2);
+
+    auto waiter_task = [&](executor_tag, std::shared_ptr<worker_thread_executor> te) -> result<void> {
+        std::this_thread::sleep_until(deadline);
+
+        auto guard = co_await lock.lock(te);
+        co_await cv.await(te, guard);
+    };
+
+    for (size_t i = 0; i < task_count; i++) {
+        auto wte = runtime.make_worker_thread_executor();
+        results.emplace_back(waiter_task({}, wte));
+    }
+
+    auto notifier_task = [&](executor_tag, std::shared_ptr<thread_executor> te) -> result<void> {
+        std::this_thread::sleep_until(deadline + milliseconds(1));
+        cv.notify_all();
+        co_return;
+    };
+
+    for (size_t i = 0; i < task_count; i++) {
+        results.emplace_back(notifier_task({}, runtime.thread_executor()));
+    }
+
+    for (auto& result : results) {
+        result.get();
+    }
+}
diff --git a/test/source/thread_sanitizer/async_lock.cpp b/test/source/thread_sanitizer/async_lock.cpp
index ee41e9d1..e7934fdd 100644
--- a/test/source/thread_sanitizer/async_lock.cpp
+++ b/test/source/thread_sanitizer/async_lock.cpp
@@ -1,5 +1,6 @@
 #include "concurrencpp/concurrencpp.h"
 
+#include <algorithm>
 #include <iostream>
 
 void async_increment(concurrencpp::runtime& runtime);