From 9e5a4eaa84156084ed7bbb91e6efcc91dc6217bc Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sat, 5 Jul 2025 02:15:10 -0400
Subject: [PATCH 01/11] fix: Update reference to  in Llama.embed. Closes #2037

---
 llama_cpp/llama.py  |  4 ++--
 tests/test_llama.py | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index cdc05c7ad..2e93670e6 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -1041,7 +1041,7 @@ def embed(
         data: Union[List[List[float]], List[List[List[float]]]] = []
 
         def decode_batch(seq_sizes: List[int]):
-            llama_cpp.llama_kv_cache_clear(self._ctx.ctx)
+            llama_cpp.llama_kv_self_clear(self._ctx.ctx)
             self._ctx.decode(self._batch)
             self._batch.reset()
 
@@ -1112,7 +1112,7 @@ def decode_batch(seq_sizes: List[int]):
 
         output = data[0] if isinstance(input, str) else data
 
-        llama_cpp.llama_kv_cache_clear(self._ctx.ctx)
+        llama_cpp.llama_kv_self_clear(self._ctx.ctx)
         self.reset()
 
         if return_count:
diff --git a/tests/test_llama.py b/tests/test_llama.py
index fc182ae20..0a1a9f5ad 100644
--- a/tests/test_llama.py
+++ b/tests/test_llama.py
@@ -216,3 +216,19 @@ def logit_processor_func(input_ids, logits):
 
     assert number_1 != number_2
     assert number_1 == number_3
+
+
+def test_real_llama_embeddings(llama_cpp_model_path):
+    model = llama_cpp.Llama(
+        llama_cpp_model_path,
+        n_ctx=32,
+        n_batch=32,
+        n_ubatch=32,
+        n_threads=multiprocessing.cpu_count(),
+        n_threads_batch=multiprocessing.cpu_count(),
+        logits_all=False,
+        flash_attn=True,
+        embedding=True
+    )
+    # Smoke test for now
+    model.embed("Hello World")

From ae54cde83aafbbef6d203fa18627a7f6b97b705f Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sat, 5 Jul 2025 02:20:25 -0400
Subject: [PATCH 02/11] fix(ci): Update cuda build action to use ubuntu 22.04

---
 .github/workflows/build-wheels-cuda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 745b2e602..63b4c26ea 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -20,7 +20,7 @@ jobs:
         id: set-matrix
         run: |
           $matrix = @{
-              'os' = @('ubuntu-latest', 'windows-2019')
+              'os' = @('ubuntu-22.04', 'windows-2022')
               'pyver' = @("3.9", "3.10", "3.11", "3.12")
               'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
               'releasetag' = @("basic")

From 083fcf657dc36e0614f77c0ca468429a876a0862 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sat, 5 Jul 2025 02:21:05 -0400
Subject: [PATCH 03/11] fix(ci): Add git to package list

---
 docker/simple/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/simple/Dockerfile b/docker/simple/Dockerfile
index 3594df1a5..06483d44e 100644
--- a/docker/simple/Dockerfile
+++ b/docker/simple/Dockerfile
@@ -9,6 +9,7 @@ ARG IMAGE
 
 # Update and upgrade the existing packages 
 RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
+    git \
     python3 \
     python3-pip \
     ninja-build \

From 11d28df7d86c7ae34816cf68fc0ca1ac5023b59d Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sat, 5 Jul 2025 02:23:36 -0400
Subject: [PATCH 04/11] fix(ci): Remove macos-13 builds to fix cross
 compilation error

---
 .github/workflows/build-wheels-metal.yaml | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
index 9b97bf2f5..98f511e4a 100644
--- a/.github/workflows/build-wheels-metal.yaml
+++ b/.github/workflows/build-wheels-metal.yaml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [macos-13, macos-14, macos-15]
+        os: [macos-14, macos-15]
 
     steps:
       - uses: actions/checkout@v4
@@ -23,32 +23,21 @@ jobs:
         with:
           python-version: "3.12"
           cache: 'pip'
-          
+
       - name: Install dependencies (Linux/MacOS)
-        if: runner.os != 'Windows'
         run: |
           python -m pip install --upgrade pip
           python -m pip install uv
           RUST_LOG=trace python -m uv pip install -e .[all] --verbose
         shell: bash
 
-      - name: Install dependencies (Windows)
-        if: runner.os == 'Windows'
-        env:
-          RUST_LOG: trace        
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install uv
-          python -m uv pip install -e .[all] --verbose
-        shell: cmd
-
       - name: Build wheels
         uses: pypa/cibuildwheel@v2.22.0
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "arm64"
-          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on"
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on -DCMAKE_CROSSCOMPILING=ON"
           CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
         with:
           package-dir: .
@@ -69,7 +58,7 @@ jobs:
         with:
           merge-multiple: true
           path: dist2
-          
+
       - uses: softprops/action-gh-release@v2
         with:
           files: dist2/*

From 1580839fe8fa01324a0d33bc52d63279df368eec Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sat, 5 Jul 2025 02:25:41 -0400
Subject: [PATCH 05/11] chore: Bump version

---
 CHANGELOG.md          | 4 ++++
 llama_cpp/__init__.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e08e52c10..6e336962f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.11]
+
+- fix: Update reference to `llama_kv_cache_clear` in Llama.embed. Closes #2037 by @abetlen in 9e5a4eaa84156084ed7bbb91e6efcc91dc6217bc
+
 ## [0.3.10]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@8846aace4934ad29651ea61b8c7e3f6b0556e3d2
diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py
index 11a511390..e35c5014e 100644
--- a/llama_cpp/__init__.py
+++ b/llama_cpp/__init__.py
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.3.10"
+__version__ = "0.3.11"

From 82ad829c4c95af4435c30cd07dfa51025e404712 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sat, 5 Jul 2025 02:35:56 -0400
Subject: [PATCH 06/11] fix(ci): update runners for cpu builds

---
 .github/workflows/build-and-release.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml
index 7307c85ab..29a6913c2 100644
--- a/.github/workflows/build-and-release.yaml
+++ b/.github/workflows/build-and-release.yaml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04, windows-2019, macos-13]
+        os: [ubuntu-22.04, windows-2022, macos-14, macos-15]
 
     steps:
       - uses: actions/checkout@v4

From 7011bc198ddc6a3fb2756c554915d4137893e21c Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 6 Jul 2025 00:45:59 -0400
Subject: [PATCH 07/11] fix(ci): Update docker runner

---
 .github/workflows/build-docker.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml
index b5c7346db..b290f6273 100644
--- a/.github/workflows/build-docker.yaml
+++ b/.github/workflows/build-docker.yaml
@@ -9,7 +9,7 @@ permissions:
 jobs:
   docker:
     name: Build and push Docker image
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - name: Checkout
         uses: actions/checkout@v4

From b39e9d4518e1d747dc02fd3bb611ea3d22a9393e Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 6 Jul 2025 00:52:13 -0400
Subject: [PATCH 08/11] feat: Update llama.cpp

---
 vendor/llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 8846aace4..a0374a67e 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 8846aace4934ad29651ea61b8c7e3f6b0556e3d2
+Subproject commit a0374a67e2924f2e845cdc59dd67d9a44065a89c

From 98fda8cdf9097a5cd8d59347ea4b8e7012f32a8a Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 6 Jul 2025 00:55:34 -0400
Subject: [PATCH 09/11] fix(ci): Temporarily disable windows cuda wheels

---
 .github/workflows/build-wheels-cuda.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 63b4c26ea..07b30cfc0 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -8,7 +8,7 @@ permissions:
 jobs:
   define_matrix:
     name: Define Build Matrix
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     defaults:
@@ -20,7 +20,7 @@ jobs:
         id: set-matrix
         run: |
           $matrix = @{
-              'os' = @('ubuntu-22.04', 'windows-2022')
+              'os' = @('ubuntu-22.04') #, 'windows-2022')
               'pyver' = @("3.9", "3.10", "3.11", "3.12")
               'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
               'releasetag' = @("basic")

From 8866fbd7159736d776e1269ce778872a20e25cc2 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 6 Jul 2025 00:56:40 -0400
Subject: [PATCH 10/11] chore: Bump version

---
 CHANGELOG.md          | 4 ++++
 llama_cpp/__init__.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6e336962f..6017812bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.12]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@a0374a67e2924f2e845cdc59dd67d9a44065a89c
+
 ## [0.3.11]
 
 - fix: Update reference to `llama_kv_cache_clear` in Llama.embed. Closes #2037 by @abetlen in 9e5a4eaa84156084ed7bbb91e6efcc91dc6217bc
diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py
index e35c5014e..b16bb7dc9 100644
--- a/llama_cpp/__init__.py
+++ b/llama_cpp/__init__.py
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.3.11"
+__version__ = "0.3.12"

From cce48873166e6b6fbfe0d944a6184ef11858e735 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 6 Jul 2025 01:32:07 -0400
Subject: [PATCH 11/11] fix(ci): Fix macos cpu builds

---
 .github/workflows/build-and-release.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml
index 29a6913c2..7eaf017fb 100644
--- a/.github/workflows/build-and-release.yaml
+++ b/.github/workflows/build-and-release.yaml
@@ -74,6 +74,7 @@ jobs:
           CIBW_SKIP: "*musllinux* pp*"
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "aarch64"
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DCMAKE_CROSSCOMPILING=ON"
           CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
         with:
           output-dir: wheelhouse