Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Update to llama.cpp b702 #58

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
name: Continuous Integration
on: [ "pull_request", "workflow_dispatch" ]
env:
MODEL_URL: "https://huggingface.co/afrideva/Llama-160M-Chat-v1-GGUF/resolve/main/llama-160m-chat-v1.q2_k.gguf"
MODEL_NAME: "llama-160m-chat-v1.q2_k.gguf"
MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf"
MODEL_NAME: "codellama-7b.Q2_K.gguf"
jobs:

# don't split build and test jobs to keep the workflow simple
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ on:
release:
types: [ created ]
env:
MODEL_URL: "https://huggingface.co/afrideva/Llama-160M-Chat-v1-GGUF/resolve/main/llama-160m-chat-v1.q2_k.gguf"
MODEL_NAME: "llama-160m-chat-v1.q2_k.gguf"
MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf"
MODEL_NAME: "codellama-7b.Q2_K.gguf"
jobs:


Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ FetchContent_MakeAvailable(json)
FetchContent_Declare(
llama.cpp
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
GIT_TAG b2665
GIT_TAG b2702
)
FetchContent_MakeAvailable(llama.cpp)

Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
![Java 11+](https://img.shields.io/badge/Java-11%2B-informational)
![llama.cpp b2619](https://img.shields.io/badge/llama.cpp-%23b2619-informational)
![llama.cpp b2702](https://img.shields.io/badge/llama.cpp-%23b2702-informational)

# Java Bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp)

Expand All @@ -17,6 +17,9 @@ This repository provides Java bindings for the C++ library.
2.3 [Infilling](#infilling)
3. [Android](#importing-in-android)

> [!NOTE]
> Now with Llama 3 support

## Quick Start

Access this library via Maven:
Expand Down
1,085 changes: 0 additions & 1,085 deletions build-args.cmake

This file was deleted.

2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>de.kherud</groupId>
<artifactId>llama</artifactId>
<version>3.0.0</version>
<version>3.0.1</version>
<packaging>jar</packaging>

<name>${project.groupId}:${project.artifactId}</name>
Expand Down
4 changes: 2 additions & 2 deletions src/main/cpp/jllama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jbyteArray parse_jbytes(JNIEnv *env, const std::string &string)
* only requires JNI version `JNI_VERSION_1_1`. If the VM does not recognize the version number returned by
`JNI_OnLoad`, the VM will unload the library and act as if the library was never loaded.
*/
JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, __attribute__((unused)) void *reserved)
JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved)
{
JNIEnv *env = nullptr;

Expand Down Expand Up @@ -220,7 +220,7 @@ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, __attribute__((unused)) void *rese
* Note that `JNI_OnLoad` and `JNI_OnUnload` are two functions optionally supplied by JNI libraries, not exported from
* the VM.
*/
JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, __attribute__((unused)) void *reserved)
JNIEXPORT void JNICALL JNI_OnUnload(JavaVM *vm, void *reserved)
{
JNIEnv *env = nullptr;

Expand Down
2 changes: 1 addition & 1 deletion src/main/cpp/server.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,7 @@ struct server_context
});
}

if (result.tok == llama_token_eos(model))
if (llama_token_is_eog(model, result.tok))
{
slot.stopped_eos = true;
slot.has_next_token = false;
Expand Down
7 changes: 5 additions & 2 deletions src/test/java/de/kherud/llama/LlamaModelTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ public class LlamaModelTest {
public static void setup() {
model = new LlamaModel(
new ModelParameters()
.setModelFilePath("models/llama-160m-chat-v1.q2_k.gguf")
.setModelFilePath("models/codellama-7b.Q2_K.gguf")
// .setModelUrl("https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf")
.setNGpuLayers(43)
.setEmbedding(true)
);
Expand All @@ -45,6 +46,7 @@ public void testGenerateAnswer() {

int generated = 0;
for (LlamaModel.Output ignored : model.generate(params)) {
System.out.println(ignored);
generated++;
}
// todo: currently, after generating nPredict tokens, there is an additional empty output
Expand All @@ -67,6 +69,7 @@ public void testGenerateInfill() {
int generated = 0;
for (LlamaModel.Output ignored : model.generate(params)) {
generated++;
System.out.println(ignored);
}
Assert.assertTrue(generated > 0 && generated <= nPredict + 1);
}
Expand Down Expand Up @@ -133,7 +136,7 @@ public void testCompleteGrammar() {
@Test
public void testEmbedding() {
float[] embedding = model.embed(prefix);
Assert.assertEquals(768, embedding.length);
Assert.assertEquals(4096, embedding.length);
}

@Test
Expand Down