Merge remote-tracking branch 'origin/master' into t5

fairydreaming · Jun 20, 2024 · b3e4332 · b3e4332
2 parents 704b160 + abd894a
commit b3e4332
Show file tree

Hide file tree

Showing 130 changed files with 48,504 additions and 42,667 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -26,3 +26,6 @@ indent_size = 2
 
 [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
 indent_style = tab
+
+[examples/cvector-generator/*.txt]
+insert_final_newline = unset
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -42,7 +42,6 @@ build:
             - cmake/**
             - CMakeLists.txt
             - CMakePresets.json
-            - codecov.yml
 examples:
     - changed-files:
         - any-glob-to-any-file: examples/**

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -1,5 +1,7 @@
-- Self Reported Review Complexity:
-    - [ ] Review Complexity : Low
-    - [ ] Review Complexity : Medium
-    - [ ] Review Complexity : High
-- [ ] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
+
+
+- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
+- Self-reported review complexity:
+  - [ ] Low
+  - [ ] Medium
+  - [ ] High
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -84,7 +84,7 @@ jobs:
           name: llama-bin-macos-arm64.zip
 
   macOS-latest-cmake-x64:
-    runs-on: macos-latest
+    runs-on: macos-12
 
     steps:
       - name: Clone

diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -87,8 +87,22 @@ jobs:
             exit 1
           fi
 
+      - name: Build (no OpenMP)
+        id: cmake_build_no_openmp
+        if: ${{ matrix.sanitizer == 'THREAD' }}
+        run: |
+          cmake -B build \
+              -DLLAMA_NATIVE=OFF \
+              -DLLAMA_BUILD_SERVER=ON \
+              -DLLAMA_CURL=ON \
+              -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
+              -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
+              -DLLAMA_OPENMP=OFF ;
+          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
+
       - name: Build
         id: cmake_build
+        if: ${{ matrix.sanitizer != 'THREAD' }}
         run: |
           cmake -B build \
               -DLLAMA_NATIVE=OFF \

diff --git a/.gitignore b/.gitignore
@@ -1,90 +1,123 @@
-*.o
+# Extensions
+
 *.a
-*.so
-*.gguf
-*.gguf.json
+*.bat
 *.bin
-*.exe
 *.dll
-*.log
-*.gcov
-*.gcno
-*.gcda
 *.dot
-*.bat
-*.tmp
-*.metallib
 *.etag
+*.exe
+*.gcda
+*.gcno
+*.gcov
+*.gguf
+*.gguf.json
 *.lastModified
-.DS_Store
-.build/
+*.log
+*.metallib
+*.o
+*.so
+*.tmp
+
+# IDE / OS
+
 .cache/
 .ccls-cache/
 .direnv/
+.DS_Store
 .envrc
+.idea/
 .swiftpm
-.venv
-.clang-tidy
 .vs/
 .vscode/
-.idea/
+nppBackup
 
-ggml-metal-embed.metal
 
-lcov-report/
+# Coverage
+
 gcovr-report/
+lcov-report/
+
+# Build Artifacts
 
 tags
+.build/
 build*
+!build-info.cmake
+!build-info.cpp.in
+!build-info.sh
 !build.zig
-cmake-build-*
+/libllama.so
+/llama-*
 android-ndk-*
+arm_neon.h
+cmake-build-*
+CMakeSettings.json
+compile_commands.json
+ggml-metal-embed.metal
+llama-batched-swift
 out/
 tmp/
 
+# CI
+
+!.github/workflows/*.yml
+
+# Models
+
 models/*
 models-mnt
+!models/.editorconfig
+!models/ggml-vocab-*.gguf*
 
-/Pipfile
-/libllama.so
-/llama-*
-llama-batched-swift
-/common/build-info.cpp
-arm_neon.h
-compile_commands.json
-CMakeSettings.json
-
-__pycache__
-dist
+# Zig
 
 zig-out/
 zig-cache/
 
+# Logs
+
 ppl-*.txt
 qnt-*.txt
 perf-*.txt
 
+# Examples
+
 examples/jeopardy/results.txt
+examples/server/*.css.hpp
 examples/server/*.html.hpp
 examples/server/*.js.hpp
 examples/server/*.mjs.hpp
-examples/server/*.css.hpp
+!build_64.sh
+!examples/*.bat
+!examples/*/*.kts
+!examples/*/*/*.kts
+!examples/sycl/*.bat
+!examples/sycl/*.sh
 
+# Python
+
+__pycache__
+.venv
+/Pipfile
+dist
 poetry.lock
 poetry.toml
-nppBackup
 
 # Test binaries
-/tests/test-grammar-parser
-/tests/test-llama-grammar
+/tests/test-backend-ops
 /tests/test-double-float
 /tests/test-grad0
+/tests/test-grammar-parser
+/tests/test-llama-grammar
 /tests/test-opt
 /tests/test-quantize-fns
 /tests/test-quantize-perf
+/tests/test-rope
 /tests/test-sampling
 /tests/test-tokenizer-0
-/tests/test-tokenizer-1-spm
 /tests/test-tokenizer-1-bpe
-/tests/test-rope
-/tests/test-backend-ops
+/tests/test-tokenizer-1-spm
+
+# Scripts
+!/scripts/install-oneapi.bat
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -119,6 +119,7 @@ option(LLAMA_HIP_UMA                         "llama: use HIP unified memory arch
 option(LLAMA_VULKAN                          "llama: use Vulkan"                                OFF)
 option(LLAMA_VULKAN_CHECK_RESULTS            "llama: run Vulkan op checks"                      OFF)
 option(LLAMA_VULKAN_DEBUG                    "llama: enable Vulkan debug output"                OFF)
+option(LLAMA_VULKAN_MEMORY_DEBUG             "llama: enable Vulkan memory debug output"         OFF)
 option(LLAMA_VULKAN_VALIDATE                 "llama: enable Vulkan validation"                  OFF)
 option(LLAMA_VULKAN_RUN_TESTS                "llama: run Vulkan tests"                          OFF)
 option(LLAMA_METAL                           "llama: use Metal"                                 ${LLAMA_METAL_DEFAULT})
@@ -534,6 +535,10 @@ if (LLAMA_VULKAN)
             add_compile_definitions(GGML_VULKAN_DEBUG)
         endif()
 
+        if (LLAMA_VULKAN_MEMORY_DEBUG)
+            add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG)
+        endif()
+
         if (LLAMA_VULKAN_VALIDATE)
             add_compile_definitions(GGML_VULKAN_VALIDATE)
         endif()
@@ -660,6 +665,7 @@ if (LLAMA_SYCL)
     #todo: AOT
 
     find_package(IntelSYCL REQUIRED)
+    find_package(MKL REQUIRED)
 
     message(STATUS "SYCL found")
 
@@ -674,21 +680,22 @@ if (LLAMA_SYCL)
     endif()
 
     add_compile_options(-I./) #include DPCT
-    add_compile_options(-I/${SYCL_INCLUDE_DIR})
 
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
     if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
     endif()
 
     set(GGML_HEADERS_SYCL ggml-sycl.h)
-    set(GGML_SOURCES_SYCL ggml-sycl.cpp)
+    file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
+    list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
 
     if (WIN32)
-        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
+        set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
     else()
+        add_compile_options(-I/${SYCL_INCLUDE_DIR})
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
         if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
             set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
         elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")

diff --git a/CMakePresets.json b/CMakePresets.json
@@ -11,9 +11,21 @@
             "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
         }
     },
-
+    {
+        "name": "sycl-base",
+        "hidden": true,
+        "generator": "Ninja",
+        "binaryDir": "${sourceDir}/build-${presetName}",
+        "cacheVariables": {
+            "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+            "CMAKE_CXX_COMPILER": "icx",
+            "LLAMA_SYCL": "ON",
+            "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+        }
+    },
     { "name": "debug",   "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
-    { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
+    { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
+    { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
     { "name": "static",  "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },
 
     {
@@ -35,15 +47,18 @@
     },
 
     { "name": "arm64-windows-llvm-debug"  , "inherits": [ "base", "arm64-windows-llvm",  "debug"   ] },
-    { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm",  "release" ] },
-    { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm",  "release", "static" ] },
+    { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm",  "reldbg" ] },
+    { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm",  "reldbg", "static" ] },
 
     { "name": "arm64-windows-msvc-debug"  , "inherits": [ "base", "arm64-windows-msvc",  "debug"   ] },
-    { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc",  "release" ] },
-    { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc",  "release", "static" ] },
+    { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc",  "reldbg" ] },
+    { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc",  "reldbg", "static" ] },
 
     { "name": "x64-windows-msvc-debug"  , "inherits": [ "base", "debug"   ] },
-    { "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] },
-    { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] }
+    { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
+    { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
+
+    { "name": "x64-windows-sycl-debug"  , "inherits": [ "sycl-base", "debug"   ] },
+    { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
   ]
 }