-
Couldn't load subscription status.
- Fork 58
Add ui for low quality, max threshold more lax, enhance pcm before se… #340
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,8 +4,6 @@ | |
| * Run 'bun generate:constants' to regenerate | ||
| */ | ||
|
|
||
| import { START_CONTEXT_MARKER, END_CONTEXT_MARKER } from './markers.js' | ||
|
|
||
| export const DEFAULT_ADVANCED_SETTINGS = { | ||
| // ASR (Automatic Speech Recognition) settings | ||
| asrProvider: 'groq', | ||
|
|
@@ -37,27 +35,20 @@ Cleaned output: | |
|
|
||
| When you receive a transcript, immediately return the polished version following these rules. | ||
| `, | ||
| editingPrompt: ` | ||
| You are an AI assistant helping to edit documents based on user commands. These documents may be emails, notes, or any other text-based content in any application. You will be given the current document content (marked by {START_CONTEXT_MARKER} and {END_CONTEXT_MARKER}) and a user command (marked by {USER_COMMAND_MARKER}). | ||
| The document may be empty. | ||
|
|
||
| IMPORTANT: Your response MUST contain ONLY the modified document text that should replace the original content. DO NOT include: | ||
| - Any markers like ${START_CONTEXT_MARKER} or ${END_CONTEXT_MARKER} | ||
| - Any explanations, apologies, or additional text | ||
| - Any formatting markers like --- | ||
|
|
||
| FORMATTING RULES: | ||
| 1. Use proper formatting: | ||
| - Use actual line breaks, not spaces | ||
| - For bullet points, use "- " at the start of lines | ||
| - Maintain consistent indentation | ||
|
|
||
| For example, if you're editing an email, only return the email text itself, with all formatting preserved. If you're editing a document, only return the document content with exact formatting. The application will handle the context. | ||
|
|
||
| Your response should start with the very first character of the modified content and end with the very last character. | ||
| editingPrompt: ` You are a Command-Interpreter assistant. Your job is to take a raw speech transcript-complete with hesitations, false starts, "umm"s and self-corrections-and treat it as the user issuing a high-level instruction. Instead of merely polishing their words, you must: | ||
| 1. Extract the intent: identify the action the user is asking for (e.g. "write me a GitHub issue," "draft a sorry-I-missed-our-meeting email," "produce a summary of X," etc.). | ||
| 2. Ignore disfluencies: strip out "uh," "um," false starts and filler so you see only the core command. | ||
| 3. Map to a template: choose an appropriate standard format (GitHub issue markdown template, professional email, bullet-point agenda, etc.) that matches the intent. | ||
| 4. Generate the deliverable: produce a fully-formed document in that format, filling in placeholders sensibly from any details in the transcript. | ||
| 5. Do not add new intent: if the transcript doesn't specify something (e.g. title, recipients, date), use reasonable defaults (e.g. "Untitled Issue," "To: [Recipient]") or prompt the user for the missing piece. | ||
| 6. Produce only the final document: no commentary, apologies, or side-notes-just the completed issue/email/summary/etc. | ||
| 7. Your response MUST contain ONLY the resultant text. DO NOT include: | ||
| - Any markers like [START/END CURRENT NOTES CONTENT] | ||
| - Any explanations, apologies, or additional text | ||
| - Any formatting markers like --- or \`\`\` | ||
|
Comment on lines
+38
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this to try to keep the prompt from leaking? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was just the result of running |
||
| `, | ||
|
|
||
| // Audio quality thresholds | ||
| noSpeechThreshold: 0.6, | ||
| lowQualityThreshold: -0.55, | ||
| lowQualityThreshold: -0.75, | ||
| } as const | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| /** | ||
| * Light audio enhancement for 16-bit PCM mono at a given sample rate. | ||
| * - Removes DC offset | ||
| * - Applies a gentle high-pass filter (~80 Hz) | ||
| * - Peak normalizes to ~-3 dBFS with a capped gain | ||
| */ | ||
| export function enhancePcm16(pcm: Buffer, sampleRate: number): Buffer { | ||
| if (!pcm || pcm.length < 2) return pcm | ||
|
|
||
| const sampleCount = Math.floor(pcm.length / 2) | ||
| if (sampleCount <= 0) return pcm | ||
|
|
||
| // Read int16 samples | ||
| const samples = new Int16Array(sampleCount) | ||
| for (let i = 0; i < sampleCount; i++) { | ||
| samples[i] = pcm.readInt16LE(i * 2) | ||
| } | ||
|
|
||
| // DC offset removal | ||
| let sum = 0 | ||
| for (let i = 0; i < sampleCount; i++) sum += samples[i] | ||
| const mean = Math.trunc(sum / sampleCount) | ||
| if (mean !== 0) { | ||
| for (let i = 0; i < sampleCount; i++) { | ||
| samples[i] = (samples[i] - mean) as unknown as Int16Array[number] | ||
| } | ||
| } | ||
|
|
||
| // Gentle high-pass filter (~80 Hz) | ||
| const fc = 80 | ||
| const a = Math.exp((-2 * Math.PI * fc) / sampleRate) | ||
| let prevX = 0 | ||
| let prevY = 0 | ||
| const filtered = new Float32Array(sampleCount) | ||
| for (let i = 0; i < sampleCount; i++) { | ||
| const x = samples[i] | ||
| const y = a * (prevY + x - prevX) | ||
| filtered[i] = y | ||
| prevX = x | ||
| prevY = y | ||
| } | ||
|
|
||
| // Peak normalize to ~-3 dBFS, cap max gain to ~+12 dB | ||
| let peak = 1 | ||
| for (let i = 0; i < sampleCount; i++) { | ||
| const v = Math.abs(filtered[i]) | ||
| if (v > peak) peak = v | ||
| } | ||
| const target = 0.707 * 32767 // ≈ -3 dBFS | ||
| const rawGain = target / peak | ||
| const gain = Math.min(rawGain, 4.0) | ||
|
|
||
| const out = Buffer.alloc(sampleCount * 2) | ||
| if (gain > 1.05) { | ||
| for (let i = 0; i < sampleCount; i++) { | ||
| const v = Math.round(filtered[i] * gain) | ||
| const clamped = Math.max(-32768, Math.min(32767, v)) | ||
| out.writeInt16LE(clamped, i * 2) | ||
| } | ||
| } else { | ||
| for (let i = 0; i < sampleCount; i++) { | ||
| const v = Math.round(filtered[i]) | ||
| const clamped = Math.max(-32768, Math.min(32767, v)) | ||
| out.writeInt16LE(clamped, i * 2) | ||
| } | ||
| } | ||
|
|
||
| return out | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
didn't we fix this before?