Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c48e77d

Browse files
committed
Add fallback to Converse API for models without streaming support
Add automatic fallback to the non-streaming Converse API when a model doesn't support ConverseStream. - Detect streaming validation errors and retry with Converse API - Remember non-streaming models to avoid future retry attempts This change allows llmcli to work with newly added gpt-oss models.
1 parent 75d3157 commit c48e77d

File tree

2 files changed

+119
-5
lines changed

2 files changed

+119
-5
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ llmcli -h
1818

1919
## Requirements
2020

21-
- AWS account with Bedrock access and at least one [model that supports ConverseStream API and system prompt](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html) enabled.
21+
- AWS account with Bedrock access and at least one [model that supports ConverseStream (or Converse) API and system prompt](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html) enabled.
2222
Configure which model to use with `LLMCLI_MODEL` environment variable (example: `us.amazon.nova-micro-v1:0`).
2323
- Properly configured AWS credentials.
2424
This tool tries to use AWS profile named “llmcli”, and falls back to default AWS credentials if profile is not found.

main.go

Lines changed: 118 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,16 +190,42 @@ func run(ctx context.Context, args runArgs) error {
190190
if args.t != nil && args.budget != 0 { // “Thinking isn’t compatible with temperature”
191191
input.InferenceConfig = &types.InferenceConfiguration{Temperature: args.t}
192192
}
193-
out, err := cl.ConverseStream(ctx, input)
194-
if err != nil {
195-
return err
193+
var rc modelResponseConsumer
194+
if !modelSupportsStreaming(*input.ModelId) {
195+
out, err := cl.Converse(ctx, nonStreamInput(input))
196+
if err != nil {
197+
return err
198+
}
199+
rc = newBasicResponseConsumer(out)
200+
} else {
201+
out, err := cl.ConverseStream(ctx, input)
202+
var ve *types.ValidationException
203+
switch {
204+
case err == nil:
205+
rc = newResponseConsumer(out)
206+
case errors.As(err, &ve) && ve.Message != nil && *ve.Message == "The model is unsupported for streaming.":
207+
if args.v {
208+
log.Printf("model %q is not supported for streaming, trying non-streaming mode", *input.ModelId)
209+
}
210+
if out, err := cl.Converse(ctx, nonStreamInput(input)); err == nil {
211+
markModelNonStreaming(*input.ModelId)
212+
rc = newBasicResponseConsumer(out)
213+
break
214+
} else {
215+
if args.v {
216+
log.Printf("fallback attempt to use Converse API: %v", err)
217+
}
218+
}
219+
return err // intentionally return the error from failed ConverseStream attempt
220+
default:
221+
return err
222+
}
196223
}
197224
var buf bytes.Buffer
198225
var wr io.Writer = os.Stdout
199226
if args.web {
200227
wr = io.MultiWriter(os.Stdout, &buf)
201228
}
202-
rc := newResponseConsumer(out)
203229
if args.budget != 0 {
204230
// just in case we changed stdout formatting at the start of “thinking”
205231
// but failed mid-way before resetting formatting
@@ -294,6 +320,61 @@ func (r *responseConsumer) Chunks() iter.Seq[chunk] {
294320
}
295321
}
296322

323+
type modelResponseConsumer interface {
324+
Chunks() iter.Seq[chunk]
325+
Usage() *types.TokenUsage
326+
Err() error
327+
}
328+
329+
func newBasicResponseConsumer(r *bedrockruntime.ConverseOutput) *basicResponseConsumer {
330+
return &basicResponseConsumer{res: r}
331+
}
332+
333+
type basicResponseConsumer struct {
334+
res *bedrockruntime.ConverseOutput
335+
}
336+
337+
func (r *basicResponseConsumer) Usage() *types.TokenUsage { return r.res.Usage }
338+
func (r *basicResponseConsumer) Err() error {
339+
if s := r.res.StopReason; s != types.StopReasonEndTurn {
340+
return fmt.Errorf("stop reason: %s", s)
341+
}
342+
return nil
343+
}
344+
func (r *basicResponseConsumer) Chunks() iter.Seq[chunk] {
345+
return func(yield func(chunk) bool) {
346+
for _, block := range r.res.Output.(*types.ConverseOutputMemberMessage).Value.Content {
347+
switch v := block.(type) {
348+
case *types.ContentBlockMemberText:
349+
if !yield(chunk{text: v.Value}) {
350+
return
351+
}
352+
case *types.ContentBlockMemberReasoningContent:
353+
if ent, ok := v.Value.(*types.ReasoningContentBlockMemberReasoningText); ok && !yield(chunk{text: *ent.Value.Text, thinking: true}) {
354+
return
355+
} else if _, ok := v.Value.(*types.ReasoningContentBlockMemberRedactedContent); ok && !yield(chunk{text: "\n[…redacted thinking…]\n", thinking: true}) {
356+
return
357+
}
358+
default:
359+
log.Printf("unknown block type: %T %+v", v, v)
360+
}
361+
}
362+
if !yield(chunk{text: "\n"}) {
363+
return
364+
}
365+
}
366+
}
367+
368+
// nonStreamInput convers prepared ConverseStream input into plain Converse input.
369+
func nonStreamInput(csi *bedrockruntime.ConverseStreamInput) *bedrockruntime.ConverseInput {
370+
return &bedrockruntime.ConverseInput{
371+
ModelId: csi.ModelId,
372+
System: csi.System,
373+
Messages: csi.Messages,
374+
InferenceConfig: csi.InferenceConfig,
375+
}
376+
}
377+
297378
func readPrompt(args runArgs) (string, error) {
298379
stdinIsTerminal := term.IsTerminal(int(os.Stdin.Fd()))
299380
var pb strings.Builder
@@ -594,3 +675,36 @@ func (g *group) Wait() error {
594675
g.cancel()
595676
return g.err
596677
}
678+
679+
var cacheDir = sync.OnceValue(func() string {
680+
d, err := os.UserCacheDir()
681+
if err != nil {
682+
d = os.TempDir()
683+
}
684+
return filepath.Join(d, "llmcli")
685+
})
686+
687+
const noStreamingSupportFile = "llmcli-no-streaming.txt"
688+
689+
func modelSupportsStreaming(model string) bool {
690+
data, err := os.ReadFile(filepath.Join(cacheDir(), noStreamingSupportFile))
691+
if err != nil {
692+
return true
693+
}
694+
for line := range strings.Lines(string(data)) {
695+
if model == strings.TrimSpace(line) {
696+
return false
697+
}
698+
}
699+
return true
700+
}
701+
702+
func markModelNonStreaming(model string) {
703+
_ = os.MkdirAll(cacheDir(), 0755)
704+
f, err := os.OpenFile(filepath.Join(cacheDir(), noStreamingSupportFile), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
705+
if err != nil {
706+
return
707+
}
708+
defer f.Close()
709+
_, _ = f.WriteString(model + "\n")
710+
}

0 commit comments

Comments
 (0)