@@ -2475,7 +2475,6 @@ static bool llama_kv_cache_init(
24752475static bool llama_kv_cache_find_slot(
24762476 struct llama_kv_cache & cache,
24772477 const struct llama_batch & batch) {
2478- const uint32_t n_ctx = cache.size;
24792478 const uint32_t n_tokens = batch.n_tokens;
24802479
24812480 if (cache.recurrent) {
@@ -2526,16 +2525,16 @@ static bool llama_kv_cache_find_slot(
25262525 }
25272526 // otherwise, one cell per token.
25282527
2529- if (n_tokens > n_ctx ) {
2530- LLAMA_LOG_ERROR("%s: n_tokens=%d > n_ctx =%d\n", __func__, n_tokens, n_ctx );
2528+ if (n_tokens > cache.size ) {
2529+ LLAMA_LOG_ERROR("%s: n_tokens=%d > cache.size =%d\n", __func__, n_tokens, cache.size );
25312530 return false;
25322531 }
25332532
25342533 uint32_t n_tested = 0;
25352534
25362535 while (true) {
2537- if (cache.head + n_tokens > n_ctx ) {
2538- n_tested += n_ctx - cache.head;
2536+ if (cache.head + n_tokens > cache.size ) {
2537+ n_tested += cache.size - cache.head;
25392538 cache.head = 0;
25402539 continue;
25412540 }
@@ -2554,7 +2553,7 @@ static bool llama_kv_cache_find_slot(
25542553 break;
25552554 }
25562555
2557- if (n_tested >= n_ctx ) {
2556+ if (n_tested >= cache.size ) {
25582557 //LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens);
25592558 return false;
25602559 }
0 commit comments