@@ -256,12 +256,14 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
256
256
# LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
257
257
# LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
258
258
# LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
259
- # LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
259
+ # LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors
260
260
# LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
261
261
# LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
262
262
# LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
263
263
# LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
264
264
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
265
+ # LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
266
+ # LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
265
267
266
268
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
267
269
# };
@@ -285,12 +287,14 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
285
287
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19
286
288
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20
287
289
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21
288
- LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22
290
+ LLAMA_FTYPE_MOSTLY_IQ3_XS = 22
289
291
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23
290
292
LLAMA_FTYPE_MOSTLY_IQ1_S = 24
291
293
LLAMA_FTYPE_MOSTLY_IQ4_NL = 25
292
294
LLAMA_FTYPE_MOSTLY_IQ3_S = 26
293
295
LLAMA_FTYPE_MOSTLY_IQ3_M = 27
296
+ LLAMA_FTYPE_MOSTLY_IQ2_S = 28
297
+ LLAMA_FTYPE_MOSTLY_IQ2_M = 29
294
298
LLAMA_FTYPE_GUESSED = 1024
295
299
296
300
# enum llama_rope_scaling_type {
0 commit comments