kherud · kherud · Apr 21, 2024 · Apr 21, 2024 · Apr 21, 2024 · Apr 21, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,8 +4,8 @@
 name: Continuous Integration
 on: [ "pull_request", "workflow_dispatch" ]
 env:
-  MODEL_URL: "https://huggingface.co/afrideva/Llama-160M-Chat-v1-GGUF/resolve/main/llama-160m-chat-v1.q2_k.gguf"
-  MODEL_NAME: "llama-160m-chat-v1.q2_k.gguf"
+  MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf"
+  MODEL_NAME: "codellama-7b.Q2_K.gguf"
 jobs:
 
   # don't split build and test jobs to keep the workflow simple

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -9,8 +9,8 @@ on:
   release:
     types: [ created ]
 env:
-  MODEL_URL: "https://huggingface.co/afrideva/Llama-160M-Chat-v1-GGUF/resolve/main/llama-160m-chat-v1.q2_k.gguf"
-  MODEL_NAME: "llama-160m-chat-v1.q2_k.gguf"
+  MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf"
+  MODEL_NAME: "codellama-7b.Q2_K.gguf"
 jobs:
 
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,7 +22,7 @@ FetchContent_MakeAvailable(json)
 FetchContent_Declare(
 	llama.cpp
 	GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
-	GIT_TAG        b2665
+	GIT_TAG        b2702
 )
 FetchContent_MakeAvailable(llama.cpp)
 

diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 ![Java 11+](https://img.shields.io/badge/Java-11%2B-informational)
-![llama.cpp b2619](https://img.shields.io/badge/llama.cpp-%23b2619-informational)
+![llama.cpp b2702](https://img.shields.io/badge/llama.cpp-%23b2702-informational)
 
 # Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp)
 
@@ -17,6 +17,9 @@ This repository provides Java bindings for the C++ library.
     2.3 [Infilling](#infilling)  
 3. [Android](#importing-in-android)
 
+> [!NOTE]
+> Now  with Llama 3 support
+
 ## Quick Start
 
 Access this library via Maven:

diff --git a/build-args.cmake b/build-args.cmake
diff --git a/pom.xml b/pom.xml
@@ -4,7 +4,7 @@
 
 	<groupId>de.kherud</groupId>
 	<artifactId>llama</artifactId>
-	<version>3.0.0</version>
+	<version>3.0.1</version>
 	<packaging>jar</packaging>
 
 	<name>${project.groupId}:${project.artifactId}</name>

diff --git a/src/main/cpp/jllama.cpp b/src/main/cpp/jllama.cpp
@@ -99,7 +99,7 @@ jbyteArray parse_jbytes(JNIEnv *env, const std::string &string)
  * only requires JNI version `JNI_VERSION_1_1`. If the VM does not recognize the version number returned by
  `JNI_OnLoad`, the VM will unload the library and act as if the library was never loaded.
  */
-JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, __attribute__((unused)) void *reserved)
+JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved)
 {
     JNIEnv *env = nullptr;
 
@@ -220,7 +220,7 @@ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, __attribute__((unused)) void *rese
  * Note that `JNI_OnLoad` and `JNI_OnUnload` are two functions optionally supplied by JNI libraries, not exported from
  * the VM.
  */
-JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, __attribute__((unused)) void *reserved)
+JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved)
 {
     JNIEnv *env = nullptr;
 

diff --git a/src/main/cpp/server.hpp b/src/main/cpp/server.hpp
@@ -1306,7 +1306,7 @@ struct server_context
                                             });
         }
 
-        if (result.tok == llama_token_eos(model))
+        if (llama_token_is_eog(model, result.tok))
         {
             slot.stopped_eos = true;
             slot.has_next_token = false;

diff --git a/src/test/java/de/kherud/llama/LlamaModelTest.java b/src/test/java/de/kherud/llama/LlamaModelTest.java
@@ -20,7 +20,8 @@ public class LlamaModelTest {
 	public static void setup() {
 		model = new LlamaModel(
 				new ModelParameters()
-						.setModelFilePath("models/llama-160m-chat-v1.q2_k.gguf")
+						.setModelFilePath("models/codellama-7b.Q2_K.gguf")
+//						.setModelUrl("https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf")
 						.setNGpuLayers(43)
 						.setEmbedding(true)
 		);
@@ -45,6 +46,7 @@ public void testGenerateAnswer() {
 
 		int generated = 0;
 		for (LlamaModel.Output ignored : model.generate(params)) {
+			System.out.println(ignored);
 			generated++;
 		}
 		// todo: currently, after generating nPredict tokens, there is an additional empty output
@@ -67,6 +69,7 @@ public void testGenerateInfill() {
 		int generated = 0;
 		for (LlamaModel.Output ignored : model.generate(params)) {
 			generated++;
+			System.out.println(ignored);
 		}
 		Assert.assertTrue(generated > 0 && generated <= nPredict + 1);
 	}
@@ -133,7 +136,7 @@ public void testCompleteGrammar() {
 	@Test
 	public void testEmbedding() {
 		float[] embedding = model.embed(prefix);
-		Assert.assertEquals(768, embedding.length);
+		Assert.assertEquals(4096, embedding.length);
 	}
 
 	@Test