Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit bfc0124

Browse files
TheFlipbooktybalex
authored andcommitted
llama : support negative ith in llama_get_ API (ggml-org#6519)
* llama_sampling_sample with default args is more naively usable * Batches populated by either llama_batch_get_one or llama_batch_add work with default args * Previously get_one could use the default argument * Previously add should usually have used the last index where logits[idx] == true * This hopefully encourages the use of llama_batch_add * By giving expected results when using default arguments. * Adds "negative indexing" feature to llama_get_logits_ith and llama_get_embeddings_ith * Believed to work with any currently well behaved program * Default arg now works for both cases (previously would give strange results for add case) * Any non-negative number is unaffected and behaves as previously * Negative arguments were previously invalid. * Implemented as a special case of indexing as suggested by @compilade in ggml-org#6519 * Fixed mismatch type errors * cited in macOS CI tests * Missed in original updates based on PR feedback in ggml-org#6519
1 parent b34cbff commit bfc0124

File tree

3 files changed

+34
-12
lines changed

3 files changed

+34
-12
lines changed

common/sampling.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ llama_token llama_sampling_sample(
129129
struct llama_sampling_context * ctx_sampling,
130130
struct llama_context * ctx_main,
131131
struct llama_context * ctx_cfg,
132-
int idx = 0);
132+
int idx = -1);
133133

134134
// Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters.
135135
llama_token_data_array llama_sampling_prepare(

llama.cpp

+29-9
Original file line numberDiff line numberDiff line change
@@ -2177,7 +2177,7 @@ struct llama_context {
21772177

21782178
std::vector<int32_t> output_ids; // map batch token positions to ids of the logits and embd buffers
21792179
size_t output_size = 0; // capacity (of tokens positions) for the output buffers
2180-
int32_t n_outputs = 0; // number of actually-used outputs in the current ubatch
2180+
int32_t n_outputs = 0; // number of actually-used outputs in the current ubatch or last logical batch
21812181

21822182
bool logits_all = false;
21832183

@@ -10411,6 +10411,9 @@ static int llama_decode_internal(
1041110411
n_outputs_prev += lctx.n_outputs;
1041210412
}
1041310413

10414+
// set to total number of outputs in the batch, for use in llama_get_logits_ith
10415+
lctx.n_outputs = n_outputs;
10416+
1041410417
// wait for the computation to finish (automatically done when obtaining the model output)
1041510418
//llama_synchronize(&lctx);
1041610419

@@ -15944,23 +15947,31 @@ float * llama_get_logits(struct llama_context * ctx) {
1594415947
}
1594515948

1594615949
float * llama_get_logits_ith(struct llama_context * ctx, int32_t i) {
15950+
int32_t j = -1;
1594715951
llama_synchronize(ctx);
1594815952

1594915953
try {
1595015954
if (ctx->logits == nullptr) {
1595115955
throw std::runtime_error("no logits");
1595215956
}
15953-
if ((size_t) i >= ctx->output_ids.size()) {
15957+
15958+
if (i < 0) {
15959+
j = ctx->n_outputs + i;
15960+
if (j < 0) {
15961+
throw std::runtime_error(format("negative index out of range [0, %d)", ctx->n_outputs));
15962+
}
15963+
} else if ((size_t) i >= ctx->output_ids.size()) {
1595415964
throw std::runtime_error(format("out of range [0, %lu)", ctx->output_ids.size()));
15965+
} else {
15966+
j = ctx->output_ids[i];
1595515967
}
15956-
const int32_t j = ctx->output_ids[i];
1595715968

1595815969
if (j < 0) {
1595915970
throw std::runtime_error(format("batch.logits[%d] != true", i));
1596015971
}
15961-
if ((size_t) j >= ctx->output_size) {
15972+
if (j >= ctx->n_outputs) {
1596215973
// This should not happen
15963-
throw std::runtime_error(format("corrupt output buffer (j=%d, output_size=%lu)", j, ctx->output_size));
15974+
throw std::runtime_error(format("corrupt output buffer (j=%d, n_outputs=%d)", j, ctx->n_outputs));
1596415975
}
1596515976

1596615977
return ctx->logits + j*ctx->model.hparams.n_vocab;
@@ -15980,23 +15991,32 @@ float * llama_get_embeddings(struct llama_context * ctx) {
1598015991
}
1598115992

1598215993
float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i) {
15994+
int32_t j = -1;
15995+
1598315996
llama_synchronize(ctx);
1598415997

1598515998
try {
1598615999
if (ctx->embd == nullptr) {
1598716000
throw std::runtime_error("no embeddings");
1598816001
}
15989-
if ((size_t) i >= ctx->output_ids.size()) {
16002+
16003+
if (i < 0) {
16004+
j = ctx->n_outputs + i;
16005+
if (j < 0) {
16006+
throw std::runtime_error(format("negative index out of range [0, %d)", ctx->n_outputs));
16007+
}
16008+
} else if ((size_t) i >= ctx->output_ids.size()) {
1599016009
throw std::runtime_error(format("out of range [0, %lu)", ctx->output_ids.size()));
16010+
} else {
16011+
j = ctx->output_ids[i];
1599116012
}
15992-
const int32_t j = ctx->output_ids[i];
1599316013

1599416014
if (j < 0) {
1599516015
throw std::runtime_error(format("batch.logits[%d] != true", i));
1599616016
}
15997-
if ((size_t) j >= ctx->output_size) {
16017+
if (j >= ctx->n_outputs) {
1599816018
// This should not happen
15999-
throw std::runtime_error(format("corrupt output buffer (j=%d, output_size=%lu)", j, ctx->output_size));
16019+
throw std::runtime_error(format("corrupt output buffer (j=%d, n_outputs=%d)", j, ctx->n_outputs));
1600016020
}
1600116021

1600216022
return ctx->embd + j*ctx->model.hparams.n_embd;

llama.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -747,8 +747,9 @@ extern "C" {
747747
// Cols: n_vocab
748748
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
749749

750-
// Logits for the ith token. Equivalent to:
750+
// Logits for the ith token. For positive indices, Equivalent to:
751751
// llama_get_logits(ctx) + ctx->output_ids[i]*n_vocab
752+
// Negative indicies can be used to access logits in reverse order, -1 is the last logit.
752753
// returns NULL for invalid ids.
753754
LLAMA_API float * llama_get_logits_ith(struct llama_context * ctx, int32_t i);
754755

@@ -760,8 +761,9 @@ extern "C" {
760761
// Otherwise, returns NULL.
761762
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
762763

763-
// Get the embeddings for the ith token. Equivalent to:
764+
// Get the embeddings for the ith token. For positive indices, Equivalent to:
764765
// llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
766+
// Negative indicies can be used to access embeddings in reverse order, -1 is the last embedding.
765767
// shape: [n_embd] (1-dimensional)
766768
// returns NULL for invalid ids.
767769
LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);

0 commit comments

Comments
 (0)