Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f30ed48

Browse files
authored
Title gen: swap Llama-3.1-8B for gpt-oss-120b via Cerebras (#58)
1 parent 849c88c commit f30ed48

1 file changed

Lines changed: 8 additions & 5 deletions

File tree

‎backend/routes/agent.py‎

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,11 @@ async def generate_title(
166166
) -> dict:
167167
"""Generate a short title for a chat session based on the first user message.
168168
169-
Always uses Llama-3.1-8B-Instruct via Cerebras on the HF router. The tab
170-
headline renders as plain text, so the model is told to avoid markdown
171-
and any stray formatting characters are stripped before returning.
169+
Always uses gpt-oss-120b via Cerebras on the HF router. The tab headline
170+
renders as plain text, so the model is told to avoid markdown and any
171+
stray formatting characters are stripped before returning. gpt-oss is a
172+
reasoning model — reasoning_effort=low keeps the reasoning budget small
173+
so the 60-token output budget isn't consumed before the title is written.
172174
"""
173175
api_key = (
174176
os.environ.get("INFERENCE_TOKEN")
@@ -179,7 +181,7 @@ async def generate_title(
179181
response = await acompletion(
180182
# Double openai/ prefix: LiteLLM strips the first as its provider
181183
# prefix, leaving the HF model id on the wire for the router.
182-
model="openai/meta-llama/Llama-3.1-8B-Instruct:cerebras",
184+
model="openai/openai/gpt-oss-120b:cerebras",
183185
api_base="https://router.huggingface.co/v1",
184186
api_key=api_key,
185187
messages=[
@@ -195,9 +197,10 @@ async def generate_title(
195197
},
196198
{"role": "user", "content": request.text[:500]},
197199
],
198-
max_tokens=20,
200+
max_tokens=60,
199201
temperature=0.3,
200202
timeout=10,
203+
reasoning_effort="low",
201204
)
202205
title = response.choices[0].message.content.strip().strip('"').strip("'")
203206
title = title.translate(_TITLE_STRIP_CHARS).strip()

0 commit comments

Comments
 (0)