diff --git a/Cargo.lock b/Cargo.lock index 9867318..05dcf9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3633,7 +3633,7 @@ dependencies = [ [[package]] name = "whispers" -version = "0.1.1" +version = "0.2.1" dependencies = [ "base64 0.22.1", "clap", diff --git a/Cargo.toml b/Cargo.toml index ae9b685..3df316e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "whispers" -version = "0.1.1" +version = "0.2.1" edition = "2024" rust-version = "1.85" description = "Speech-to-text dictation tool for Wayland" diff --git a/README.md b/README.md index 57893e8..09436b0 100644 --- a/README.md +++ b/README.md @@ -51,9 +51,6 @@ whispers setup # one-shot dictation whispers - -# live mode -whispers voice ``` Default config path: @@ -86,9 +83,8 @@ bindsym $mod+Alt+d exec whispers # setup whispers setup -# dictation +# one-shot dictation whispers -whispers voice whispers transcribe audio.wav # ASR models diff --git a/config.example.toml b/config.example.toml index ec37c8c..12d3e65 100644 --- a/config.example.toml +++ b/config.example.toml @@ -33,7 +33,7 @@ flash_attn = true idle_timeout_ms = 120000 [postprocess] -# "raw" (default), "advanced_local", "agentic_rewrite", or "legacy_basic" for deprecated cleanup configs +# "raw" (default), "rewrite", or "legacy_basic" for deprecated cleanup configs mode = "raw" [session] @@ -59,7 +59,7 @@ snippet_trigger = "insert" backend = "local" # Cloud fallback behavior ("local" or "none") fallback = "local" -# Managed rewrite model name for advanced_local mode +# Managed rewrite model name for rewrite mode selected_model = "qwen-3.5-4b-q4_k_m" # Manual GGUF path override (empty = use selected managed model) # Custom rewrite models should be chat-capable GGUFs with an embedded @@ -77,11 +77,9 @@ idle_timeout_ms = 120000 max_output_chars = 1200 # Maximum tokens to generate for rewritten output max_tokens = 256 - -[agentic_rewrite] -# App-aware rewrite policy rules used by postprocess.mode = "agentic_rewrite" +# App-aware rewrite policy rules used by postprocess.mode = "rewrite" policy_path = "~/.local/share/whispers/app-rewrite-policy.toml" -# Technical glossary used by postprocess.mode = "agentic_rewrite" +# Technical glossary used by postprocess.mode = "rewrite" glossary_path = "~/.local/share/whispers/technical-glossary.toml" # Default correction policy ("conservative", "balanced", or "aggressive") default_correction_policy = "balanced" diff --git a/src/agentic_rewrite/admin.rs b/src/agentic_rewrite/admin.rs index 5037c93..641d798 100644 --- a/src/agentic_rewrite/admin.rs +++ b/src/agentic_rewrite/admin.rs @@ -8,19 +8,19 @@ use super::{AppRule, ContextMatcher, GlossaryEntry, store}; pub(super) fn print_app_rule_path(config_override: Option<&Path>) -> Result<()> { let config = Config::load(config_override)?; - println!("{}", config.resolved_agentic_policy_path().display()); + println!("{}", config.resolved_rewrite_policy_path().display()); Ok(()) } pub(super) fn print_glossary_path(config_override: Option<&Path>) -> Result<()> { let config = Config::load(config_override)?; - println!("{}", config.resolved_agentic_glossary_path().display()); + println!("{}", config.resolved_rewrite_glossary_path().display()); Ok(()) } pub(super) fn list_app_rules(config_override: Option<&Path>) -> Result<()> { let config = Config::load(config_override)?; - let rules = store::read_policy_file(&config.resolved_agentic_policy_path())?; + let rules = store::read_policy_file(&config.resolved_rewrite_policy_path())?; if rules.is_empty() { println!("No app rules configured."); return Ok(()); @@ -49,7 +49,7 @@ pub(super) fn add_app_rule( correction_policy: Option, ) -> Result<()> { let config = Config::load(config_override)?; - let path = config.resolved_agentic_policy_path(); + let path = config.resolved_rewrite_policy_path(); let mut rules = store::read_policy_file(&path)?; store::upsert_app_rule( &mut rules, @@ -68,7 +68,7 @@ pub(super) fn add_app_rule( pub(super) fn remove_app_rule(config_override: Option<&Path>, name: &str) -> Result<()> { let config = Config::load(config_override)?; - let path = config.resolved_agentic_policy_path(); + let path = config.resolved_rewrite_policy_path(); let mut rules = store::read_policy_file(&path)?; let removed = store::remove_app_rule_entry(&mut rules, name); store::write_policy_file(&path, &rules)?; @@ -83,7 +83,7 @@ pub(super) fn remove_app_rule(config_override: Option<&Path>, name: &str) -> Res pub(super) fn list_glossary(config_override: Option<&Path>) -> Result<()> { let config = Config::load(config_override)?; - let entries = store::read_glossary_file(&config.resolved_agentic_glossary_path())?; + let entries = store::read_glossary_file(&config.resolved_rewrite_glossary_path())?; if entries.is_empty() { println!("No glossary entries configured."); return Ok(()); @@ -113,7 +113,7 @@ pub(super) fn add_glossary_entry( matcher: ContextMatcher, ) -> Result<()> { let config = Config::load(config_override)?; - let path = config.resolved_agentic_glossary_path(); + let path = config.resolved_rewrite_glossary_path(); let mut entries = store::read_glossary_file(&path)?; store::upsert_glossary_entry( &mut entries, @@ -131,7 +131,7 @@ pub(super) fn add_glossary_entry( pub(super) fn remove_glossary_entry(config_override: Option<&Path>, term: &str) -> Result<()> { let config = Config::load(config_override)?; - let path = config.resolved_agentic_glossary_path(); + let path = config.resolved_rewrite_glossary_path(); let mut entries = store::read_glossary_file(&path)?; let removed = store::remove_glossary_entry_by_term(&mut entries, term); store::write_glossary_file(&path, &entries)?; diff --git a/src/agentic_rewrite/mod.rs b/src/agentic_rewrite/mod.rs index b387c7b..0b6d394 100644 --- a/src/agentic_rewrite/mod.rs +++ b/src/agentic_rewrite/mod.rs @@ -57,12 +57,12 @@ pub fn default_glossary_path() -> &'static str { } pub fn apply_runtime_policy(config: &Config, transcript: &mut RewriteTranscript) { - let policy_rules = store::load_policy_file_for_runtime(&config.resolved_agentic_policy_path()); + let policy_rules = store::load_policy_file_for_runtime(&config.resolved_rewrite_policy_path()); let glossary_entries = - store::load_glossary_file_for_runtime(&config.resolved_agentic_glossary_path()); + store::load_glossary_file_for_runtime(&config.resolved_rewrite_glossary_path()); let policy_context = runtime::resolve_policy_context( - config.agentic_rewrite.default_correction_policy, + config.rewrite.default_correction_policy, transcript.typing_context.as_ref(), &transcript.rewrite_candidates, &policy_rules, @@ -175,6 +175,7 @@ mod tests { text: "type script and sir dee json".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), } } @@ -193,7 +194,7 @@ mod tests { crate::test_support::remove_env("XDG_DATA_HOME"); let config = Config::default(); - let glossary_path = config.resolved_agentic_glossary_path(); + let glossary_path = config.resolved_rewrite_glossary_path(); store::write_glossary_file( &glossary_path, &[GlossaryEntry { @@ -242,7 +243,7 @@ mod tests { ) .expect("add app rule"); let config = Config::load(None).expect("config"); - let rules = store::read_policy_file(&config.resolved_agentic_policy_path()).expect("rules"); + let rules = store::read_policy_file(&config.resolved_rewrite_policy_path()).expect("rules"); assert_eq!(rules.len(), 1); add_glossary_entry( @@ -253,15 +254,15 @@ mod tests { ) .expect("add glossary entry"); let entries = - store::read_glossary_file(&config.resolved_agentic_glossary_path()).expect("entries"); + store::read_glossary_file(&config.resolved_rewrite_glossary_path()).expect("entries"); assert_eq!(entries.len(), 1); remove_app_rule(None, "zed").expect("remove app rule"); remove_glossary_entry(None, "serde_json").expect("remove glossary entry"); - let rules = store::read_policy_file(&config.resolved_agentic_policy_path()).expect("rules"); + let rules = store::read_policy_file(&config.resolved_rewrite_policy_path()).expect("rules"); let entries = - store::read_glossary_file(&config.resolved_agentic_glossary_path()).expect("entries"); + store::read_glossary_file(&config.resolved_rewrite_glossary_path()).expect("entries"); assert!(rules.is_empty()); assert!(entries.is_empty()); } diff --git a/src/agentic_rewrite/runtime.rs b/src/agentic_rewrite/runtime.rs index ea00a2f..d3bcdc6 100644 --- a/src/agentic_rewrite/runtime.rs +++ b/src/agentic_rewrite/runtime.rs @@ -620,6 +620,7 @@ mod tests { text: "type script and sir dee json".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), } } @@ -862,6 +863,7 @@ mod tests { text: "I'm currently using the window manager hyperland.".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; hyperland_transcript.policy_context.correction_policy = @@ -890,6 +892,7 @@ mod tests { text: "I'm switching from Sui to Hyperland.".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; switch_transcript.policy_context.correction_policy = RewriteCorrectionPolicy::Conservative; @@ -920,6 +923,7 @@ mod tests { text: "cargo clipy".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; transcript.policy_context.correction_policy = RewriteCorrectionPolicy::Conservative; diff --git a/src/agentic_rewrite/store.rs b/src/agentic_rewrite/store.rs index dc099c9..a54c0b3 100644 --- a/src/agentic_rewrite/store.rs +++ b/src/agentic_rewrite/store.rs @@ -2,7 +2,7 @@ use std::path::Path; use serde::{Deserialize, Serialize}; -use crate::config::{Config, PostprocessMode}; +use crate::config::Config; use crate::error::{Result, WhsprError}; use super::{AppRule, GlossaryEntry}; @@ -10,7 +10,7 @@ use super::{AppRule, GlossaryEntry}; const DEFAULT_POLICY_PATH: &str = "~/.local/share/whispers/app-rewrite-policy.toml"; const DEFAULT_GLOSSARY_PATH: &str = "~/.local/share/whispers/technical-glossary.toml"; -const POLICY_STARTER: &str = r#"# App-aware rewrite policy for whispers agentic_rewrite mode. +const POLICY_STARTER: &str = r#"# App-aware rewrite policy for whispers rewrite mode. # Rules are layered, not first-match. Matching rules apply in this order: # global defaults, surface_kind, app_id, window_title_contains, browser_domain_contains. # Later, more specific rules override earlier fields. @@ -35,7 +35,7 @@ const POLICY_STARTER: &str = r#"# App-aware rewrite policy for whispers agentic_ # instructions = "Preserve identifiers, filenames, snake_case, camelCase, and Rust terminology." "#; -const GLOSSARY_STARTER: &str = r#"# Technical glossary for whispers agentic_rewrite mode. +const GLOSSARY_STARTER: &str = r#"# Technical glossary for whispers rewrite mode. # Each entry defines a canonical term plus likely spoken or mis-transcribed aliases. # # Uncomment and edit the examples below. @@ -77,17 +77,17 @@ pub(super) fn default_glossary_path() -> &'static str { } pub(super) fn ensure_starter_files(config: &Config) -> Result> { - if config.postprocess.mode != PostprocessMode::AgenticRewrite { + if !config.postprocess.mode.uses_rewrite() { return Ok(Vec::new()); } let mut created = Vec::new(); - let policy_path = config.resolved_agentic_policy_path(); + let policy_path = config.resolved_rewrite_policy_path(); if ensure_text_file(&policy_path, POLICY_STARTER)? { created.push(policy_path.display().to_string()); } - let glossary_path = config.resolved_agentic_glossary_path(); + let glossary_path = config.resolved_rewrite_glossary_path(); if ensure_text_file(&glossary_path, GLOSSARY_STARTER)? { created.push(glossary_path.display().to_string()); } diff --git a/src/bin/whispers-osd.rs b/src/bin/whispers-osd.rs index 9fe2d92..cd66dc2 100644 --- a/src/bin/whispers-osd.rs +++ b/src/bin/whispers-osd.rs @@ -469,27 +469,10 @@ fn render_frame( } fn render_meter_overlay(pixels: &mut [u8], w: u32, h: u32, bars: &BarState) { - let shell_x = 8; - let shell_y = 10; - let shell_w = w.saturating_sub(shell_x * 2); - let shell_h = h.saturating_sub(shell_y * 2 + 2); - let shell_radius = shell_h / 2; - - draw_surface_shell( - pixels, - w, - h, - shell_x, - shell_y, - shell_w, - shell_h, - shell_radius, - ); - - let track_x = shell_x + 14; - let track_y = shell_y + 10; - let track_w = shell_w.saturating_sub(28); - let track_h = shell_h.saturating_sub(20); + let track_x = 22; + let track_y = 20; + let track_w = w.saturating_sub(track_x * 2); + let track_h = h.saturating_sub(track_y * 2 + 2); draw_track_shell( pixels, w, diff --git a/src/bin/whispers-rewrite-worker/prompt.rs b/src/bin/whispers-rewrite-worker/prompt.rs index 3ef6d54..9f2b751 100644 --- a/src/bin/whispers-rewrite-worker/prompt.rs +++ b/src/bin/whispers-rewrite-worker/prompt.rs @@ -127,8 +127,9 @@ fn agentic_latitude_contract( pub(crate) fn rewrite_instructions(profile: ResolvedRewriteProfile) -> &'static str { let base = "You clean up dictated speech into the final text the user meant to type. \ Return only the finished text. Do not explain anything. Remove obvious disfluencies when natural. \ -Use the correction-aware transcript as the primary source of truth unless structured edit signals say the \ -utterance may still be ambiguous. The raw transcript may still contain spoken editing phrases or canceled wording. \ +Use the correction-aware transcript as strong heuristic evidence. When structured edit signals are present, treat it \ +as advisory rather than absolute and resolve ambiguity using the raw transcript, session context, and candidates. The \ +raw transcript may still contain spoken editing phrases or canceled wording. \ Never reintroduce text that was removed by an explicit spoken correction cue. Respect any structured edit intents \ provided alongside the transcript. If structured edit signals or edit hypotheses are present, use the candidate \ interpretations as bounded options, choose the best interpretation, and lightly refine it only when needed for natural \ @@ -137,10 +138,13 @@ explicit correction says otherwise. Do not normalize names into more common spel When the utterance clearly refers to software, tools, APIs, libraries, Linux components, product names, or other \ technical concepts, prefer the most plausible intended technical term or proper name over a phonetically similar common \ word. Use nearby category words like window manager, editor, language, library, package manager, shell, or terminal \ -tool to disambiguate technical names. If the utterance remains genuinely ambiguous, stay close to the transcript rather \ +tool to disambiguate technical names. When a dictated word is an obvious phonetic near-miss for a likely technical term \ +and the surrounding context clearly identifies the category, correct it to the canonical technical spelling instead of \ +echoing the miss. If multiple plausible interpretations remain similarly credible, stay close to the transcript rather \ than inventing a niche term. \ -If an edit intent says to replace or cancel previous wording, preserve that edit and do not keep the spoken correction \ -phrase itself unless the transcript clearly still intends it. Examples:\n\ +If an edit intent says to replace or cancel previous wording, preserve that edit when the utterance or same-session \ +context clearly supports it. Preserve utterance-initial courtesy or apology wording when the raw transcript still \ +clearly intends it. Examples:\n\ - raw: Hello there. Scratch that. Hi.\n correction-aware: Hi.\n final: Hi.\n\ - raw: I'll bring cookies, scratch that, brownies.\n correction-aware: I'll bring brownies.\n final: I'll bring brownies.\n\ - raw: My name is Notes, scratch that my name is Jonatan.\n correction-aware: My my name is Jonatan.\n aggressive correction-aware: My name is Jonatan.\n final: My name is Jonatan.\n\ @@ -149,6 +153,7 @@ phrase itself unless the transcript clearly still intends it. Examples:\n\ - raw: Let's meet tomorrow, or rather Friday.\n correction-aware: Let's meet Friday.\n final: Let's meet Friday.\n\ - raw: I'm currently using the window manager Hyperland.\n correction-aware: I'm currently using the window manager Hyperland.\n final: I'm currently using the window manager Hyprland.\n\ - raw: I'm switching from Sui to Hyperland.\n correction-aware: I'm switching from Sui to Hyperland.\n final: I'm switching from Sway to Hyprland.\n\ +- raw: I moved back to the window manager neary.\n correction-aware: I moved back to the window manager neary.\n final: I moved back to the window manager niri.\n\ - raw: I use type script for backend tooling.\n correction-aware: I use type script for backend tooling.\n final: I use TypeScript for backend tooling.\n\ - raw: I edit the config in neo vim.\n correction-aware: I edit the config in neo vim.\n final: I edit the config in Neovim."; @@ -156,8 +161,9 @@ phrase itself unless the transcript clearly still intends it. Examples:\n\ ResolvedRewriteProfile::Qwen => { "You clean up dictated speech into the final text the user meant to type. \ Return only the finished text. Do not explain anything. Do not emit reasoning, think tags, or XML wrappers. \ -Remove obvious disfluencies when natural. Use the correction-aware transcript as the primary source of truth unless \ -structured edit signals say the utterance may still be ambiguous. The raw transcript may still contain spoken editing \ +Remove obvious disfluencies when natural. Use the correction-aware transcript as strong heuristic evidence. When \ +structured edit signals are present, treat it as advisory rather than absolute and resolve ambiguity using the raw \ +transcript, session context, and candidates. The raw transcript may still contain spoken editing \ phrases or canceled wording. Never reintroduce text that was removed by an explicit spoken correction cue. Respect \ any structured edit intents provided alongside the transcript. If structured edit signals or edit hypotheses are \ present, use the candidate interpretations as bounded options, choose the best interpretation, and lightly refine it \ @@ -166,9 +172,10 @@ unless a user dictionary or explicit correction says otherwise. Do not normalize because they look familiar. When the utterance clearly refers to software, tools, APIs, libraries, Linux components, \ product names, or other technical concepts, prefer the most plausible intended technical term or proper name over a \ phonetically similar common word. Use nearby category words like window manager, editor, language, library, package \ -manager, shell, or terminal tool to disambiguate technical names. If the utterance remains genuinely ambiguous, stay \ -close to the transcript rather than inventing a niche term. If an edit intent says to replace or cancel previous wording, preserve that edit and do \ -not keep the spoken correction phrase itself unless the transcript clearly still intends it. Examples:\n\ +manager, shell, or terminal tool to disambiguate technical names. When a dictated word is an obvious phonetic near-miss \ +for a likely technical term and the surrounding context clearly identifies the category, correct it to the canonical \ +technical spelling instead of echoing the miss. If multiple plausible interpretations remain similarly credible, stay \ +close to the transcript rather than inventing a niche term. If an edit intent says to replace or cancel previous wording, preserve that edit when the utterance or same-session context clearly supports it. Preserve utterance-initial courtesy or apology wording when the raw transcript still clearly intends it. Examples:\n\ - raw: Hello there. Scratch that. Hi.\n correction-aware: Hi.\n final: Hi.\n\ - raw: I'll bring cookies, scratch that, brownies.\n correction-aware: I'll bring brownies.\n final: I'll bring brownies.\n\ - raw: My name is Notes, scratch that my name is Jonatan.\n correction-aware: My my name is Jonatan.\n aggressive correction-aware: My name is Jonatan.\n final: My name is Jonatan.\n\ @@ -177,6 +184,7 @@ not keep the spoken correction phrase itself unless the transcript clearly still - raw: Let's meet tomorrow, or rather Friday.\n correction-aware: Let's meet Friday.\n final: Let's meet Friday.\n\ - raw: I'm currently using the window manager Hyperland.\n correction-aware: I'm currently using the window manager Hyperland.\n final: I'm currently using the window manager Hyprland.\n\ - raw: I'm switching from Sui to Hyperland.\n correction-aware: I'm switching from Sui to Hyperland.\n final: I'm switching from Sway to Hyprland.\n\ +- raw: I moved back to the window manager neary.\n correction-aware: I moved back to the window manager neary.\n final: I moved back to the window manager niri.\n\ - raw: I use type script for backend tooling.\n correction-aware: I use type script for backend tooling.\n final: I use TypeScript for backend tooling.\n\ - raw: I edit the config in neo vim.\n correction-aware: I edit the config in neo vim.\n final: I edit the config in Neovim." } @@ -190,6 +198,7 @@ pub(crate) fn build_user_message(transcript: &RewriteTranscript) -> String { let raw = transcript.raw_text.trim(); let edit_intents = render_edit_intents(transcript); let edit_signals = render_edit_signals(transcript); + let edit_context = render_edit_context(transcript); let agentic_context = render_agentic_context(transcript); let route = rewrite_route(transcript); tracing::debug!( @@ -228,12 +237,15 @@ Active typing context:\n\ Recent dictation session:\n\ {recent_session_entries}\ {agentic_policy_context}\ +Structured cue context:\n\ +{edit_context}\ Session backtrack candidates:\n\ {session_candidates}\ {recommended_session_candidate}\ The user may be correcting the most recent prior dictation entry rather than appending new text.\n\ If the recommended session candidate says replace_last_entry, treat your final text as the replacement text for that previous dictation entry, not as newly appended text.\n\ Prefer the recommended session candidate unless another listed session candidate is clearly better.\n\ +If the utterance begins with a courtesy-prefixed correction cue and the session evidence is weak, preserve the courtesy wording instead of assuming replacement.\n\ {surface_guidance}\ Current utterance correction candidate:\n\ {correction_aware}\n\ @@ -251,9 +263,12 @@ Final text:" let recent_segments = render_recent_segments(transcript, 4); let aggressive_candidate = render_aggressive_candidate(transcript); let exact_cue_guidance = if has_strong_explicit_edit_cue(transcript) { - "A strong explicit spoken edit cue was detected. The literal raw transcript probably contains canceled wording. \ -Prefer a candidate interpretation that removes the cue and canceled wording unless doing so would clearly lose intended meaning. \ -If the cue is an exact strong match for phrases like scratch that, never mind, or wait no, do not keep the literal cue text in the final output.\n" + if opening_cue_requires_literal_bias(transcript) { + "A strong edit cue appears at the beginning of the utterance without strong same-session replacement evidence. \ +Do not assume it cancels earlier text. If the opening includes courtesy language such as sorry or my apologies, preserve that courtesy wording unless another candidate is clearly better.\n" + } else { + "A strong explicit spoken edit cue was detected. Prefer a candidate interpretation that preserves the intended edit when the cue clearly corrects earlier same-utterance wording or the most recent same-session dictation.\n" + } } else { "" }; @@ -268,12 +283,13 @@ Structured edit signals:\n\ {edit_signals}\ Structured edit intents:\n\ {edit_intents}\ +Structured cue context:\n\ +{edit_context}\ This utterance likely contains spoken self-corrections or restatements.\n\ Choose the best candidate interpretation and lightly refine it only when needed.\n\ {exact_cue_guidance}\ -When an exact strong edit cue is present, treat the non-literal candidates as more trustworthy than the literal transcript.\n\ -The candidate list is ordered from most likely to least likely by heuristics.\n\ -For exact strong edit cues, the first candidate is the heuristic best guess and should usually win unless another candidate is clearly better.\n\ +When an exact strong edit cue is present, treat non-literal candidates as evidence, not an automatic winner.\n\ +The candidate list is ordered heuristically and may be wrong for utterance-initial or courtesy-prefixed cues.\n\ Prefer the smallest replacement scope that yields a coherent result.\n\ Use span-level replacements when only a key phrase was corrected, clause-level replacements when the correction replaces the surrounding thought, and sentence-level replacements only when the whole sentence was canceled.\n\ Preserve literal wording when the cue is not actually an edit.\n\ @@ -299,6 +315,8 @@ Structured edit signals:\n\ {edit_signals}\ Structured edit intents:\n\ {edit_intents}\ +Structured cue context:\n\ +{edit_context}\ Self-corrections were already resolved before rewriting.\n\ Use this correction-aware transcript as the main source text. In agentic mode, you may still normalize likely \ technical terms or proper names when the utterance strongly supports them, even if the exact canonical spelling is not \ @@ -318,6 +336,8 @@ Structured edit signals:\n\ {edit_signals}\ Structured edit intents:\n\ {edit_intents}\ +Structured cue context:\n\ +{edit_context}\ Correction-aware transcript:\n\ {correction_aware}\n\ Treat the correction-aware transcript as authoritative for explicit spoken edits and overall meaning, but in agentic \ @@ -398,6 +418,18 @@ Glossary-backed candidates:\n\ .unwrap_or_default() } +fn render_edit_context(transcript: &RewriteTranscript) -> String { + let context = &transcript.edit_context; + format!( + "- cue_is_utterance_initial: {}\n- preceding_content_word_count: {}\n- courtesy_prefix_detected: {}\n- has_recent_same_focus_entry: {}\n- recommended_session_action_is_replace: {}\n", + context.cue_is_utterance_initial, + context.preceding_content_word_count, + context.courtesy_prefix_detected, + context.has_recent_same_focus_entry, + context.recommended_session_action_is_replace, + ) +} + fn render_edit_intents(transcript: &RewriteTranscript) -> String { if transcript.edit_intents.is_empty() { return "- none detected\n".to_string(); @@ -576,6 +608,15 @@ fn render_recommended_candidate(transcript: &RewriteTranscript) -> String { .unwrap_or_default() } +fn opening_cue_requires_literal_bias(transcript: &RewriteTranscript) -> bool { + transcript.edit_context.cue_is_utterance_initial + && transcript.edit_context.courtesy_prefix_detected + && !(transcript.edit_context.has_recent_same_focus_entry + && transcript + .edit_context + .recommended_session_action_is_replace) +} + fn render_typing_context(transcript: &RewriteTranscript) -> String { transcript .typing_context diff --git a/src/bin/whispers-rewrite-worker/rewrite_protocol.rs b/src/bin/whispers-rewrite-worker/rewrite_protocol.rs index edacdc4..33b13ff 100644 --- a/src/bin/whispers-rewrite-worker/rewrite_protocol.rs +++ b/src/bin/whispers-rewrite-worker/rewrite_protocol.rs @@ -18,6 +18,8 @@ pub struct RewriteTranscript { pub rewrite_candidates: Vec, pub recommended_candidate: Option, #[serde(default)] + pub edit_context: RewriteEditContext, + #[serde(default)] pub policy_context: RewritePolicyContext, } @@ -90,6 +92,15 @@ pub struct RewriteCandidate { pub text: String, } +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] +pub struct RewriteEditContext { + pub cue_is_utterance_initial: bool, + pub preceding_content_word_count: usize, + pub courtesy_prefix_detected: bool, + pub has_recent_same_focus_entry: bool, + pub recommended_session_action_is_replace: bool, +} + #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] pub struct RewritePolicyContext { pub correction_policy: RewriteCorrectionPolicy, diff --git a/src/cli.rs b/src/cli.rs index d431781..0874b52 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -62,7 +62,7 @@ pub enum Command { action: AsrModelAction, }, - /// Manage local rewrite models used by advanced post-processing + /// Manage local rewrite models used by rewrite post-processing RewriteModel { #[command(subcommand)] action: RewriteModelAction, @@ -80,7 +80,7 @@ pub enum Command { action: AppRuleAction, }, - /// Manage technical glossary entries for agentic rewrite + /// Manage technical glossary entries for rewrite post-processing Glossary { #[command(subcommand)] action: GlossaryAction, diff --git a/src/cloud.rs b/src/cloud.rs index fa04ea8..758a75b 100644 --- a/src/cloud.rs +++ b/src/cloud.rs @@ -679,7 +679,7 @@ mod tests { crate::test_support::set_env("OPENAI_API_KEY", "test-key"); let mut config = Config::default(); - config.postprocess.mode = crate::config::PostprocessMode::AdvancedLocal; + config.postprocess.mode = crate::config::PostprocessMode::Rewrite; config.rewrite.backend = RewriteBackend::Cloud; config.cloud.base_url = format!("{}/v1", server.base_url()); let service = CloudService::new(&config).expect("service"); @@ -701,6 +701,7 @@ mod tests { edit_hypotheses: Vec::new(), rewrite_candidates: Vec::new(), recommended_candidate: None, + edit_context: Default::default(), policy_context: crate::rewrite_protocol::RewritePolicyContext::default(), }, None, diff --git a/src/config.rs b/src/config.rs index 1b8354a..26436f0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -13,9 +13,9 @@ pub use edit::{ }; pub use paths::{data_dir, default_config_path, expand_tilde, resolve_config_path}; pub use schema::{ - AgenticRewriteConfig, AudioConfig, CleanupConfig, CleanupProfile, CloudConfig, - CloudLanguageMode, CloudProvider, CloudRewriteConfig, CloudSettingsUpdate, - CloudTranscriptionConfig, Config, FeedbackConfig, InjectConfig, PersonalizationConfig, - PostprocessConfig, PostprocessMode, RewriteBackend, RewriteConfig, RewriteFallback, - SessionConfig, TranscriptionBackend, TranscriptionConfig, TranscriptionFallback, + AudioConfig, CleanupConfig, CleanupProfile, CloudConfig, CloudLanguageMode, CloudProvider, + CloudRewriteConfig, CloudSettingsUpdate, CloudTranscriptionConfig, Config, FeedbackConfig, + InjectConfig, PersonalizationConfig, PostprocessConfig, PostprocessMode, RewriteBackend, + RewriteConfig, RewriteFallback, SessionConfig, TranscriptionBackend, TranscriptionConfig, + TranscriptionFallback, }; diff --git a/src/config/edit.rs b/src/config/edit.rs index 5ac4f03..1d6350f 100644 --- a/src/config/edit.rs +++ b/src/config/edit.rs @@ -2,7 +2,6 @@ use std::path::Path; use crate::error::{Result, WhsprError}; use crate::rewrite_profile::RewriteProfile; -use crate::rewrite_protocol::RewriteCorrectionPolicy; use super::{ CloudSettingsUpdate, PostprocessMode, RewriteBackend, RewriteFallback, TranscriptionBackend, @@ -46,7 +45,7 @@ flash_attn = true idle_timeout_ms = 120000 [postprocess] -# "raw" (default), "advanced_local", "agentic_rewrite", or "legacy_basic" for deprecated cleanup configs +# "raw" (default), "rewrite", or "legacy_basic" for deprecated cleanup configs mode = "raw" [session] @@ -72,7 +71,7 @@ snippet_trigger = "insert" backend = "local" # Cloud fallback behavior ("local" or "none") fallback = "local" -# Managed rewrite model name for advanced_local mode +# Managed rewrite model name for rewrite mode selected_model = "qwen-3.5-4b-q4_k_m" # Manual GGUF path override (empty = use selected managed model) # Custom rewrite models should be chat-capable GGUFs with an embedded @@ -90,11 +89,9 @@ idle_timeout_ms = 120000 max_output_chars = 1200 # Maximum tokens to generate for rewritten output max_tokens = 256 - -[agentic_rewrite] -# App-aware rewrite policy rules used by postprocess.mode = "agentic_rewrite" +# App-aware rewrite policy rules used by postprocess.mode = "rewrite" policy_path = "~/.local/share/whispers/app-rewrite-policy.toml" -# Technical glossary used by postprocess.mode = "agentic_rewrite" +# Technical glossary used by postprocess.mode = "rewrite" glossary_path = "~/.local/share/whispers/technical-glossary.toml" # Default correction policy ("conservative", "balanced", or "aggressive") default_correction_policy = "balanced" @@ -221,8 +218,8 @@ pub fn update_config_rewrite_selection(config_path: &Path, selected_model: &str) .and_then(|table| table.get("mode")) .and_then(|item| item.as_str()) { - Some("agentic_rewrite") => PostprocessMode::AgenticRewrite, - _ => PostprocessMode::AdvancedLocal, + Some("raw" | "legacy_basic") => PostprocessMode::Rewrite, + _ => PostprocessMode::Rewrite, }; doc["postprocess"]["mode"] = toml_edit::value(mode.as_str()); let rewrite_backend = doc["rewrite"] @@ -242,12 +239,6 @@ pub fn update_config_rewrite_selection(config_path: &Path, selected_model: &str) doc["rewrite"]["idle_timeout_ms"] = toml_edit::value(120000); doc["rewrite"]["max_output_chars"] = toml_edit::value(1200); doc["rewrite"]["max_tokens"] = toml_edit::value(256); - doc["agentic_rewrite"]["policy_path"] = - toml_edit::value(crate::agentic_rewrite::default_policy_path()); - doc["agentic_rewrite"]["glossary_path"] = - toml_edit::value(crate::agentic_rewrite::default_glossary_path()); - doc["agentic_rewrite"]["default_correction_policy"] = - toml_edit::value(RewriteCorrectionPolicy::Balanced.as_str()); std::fs::write(config_path, doc.to_string()) .map_err(|e| WhsprError::Config(format!("failed to write config: {e}")))?; @@ -335,7 +326,6 @@ fn ensure_standard_postprocess_tables(doc: &mut toml_edit::DocumentMut) { ensure_root_table(doc, "postprocess"); ensure_root_table(doc, "session"); ensure_root_table(doc, "rewrite"); - ensure_root_table(doc, "agentic_rewrite"); ensure_root_table(doc, "cloud"); ensure_nested_table(doc, "cloud", "transcription"); ensure_nested_table(doc, "cloud", "rewrite"); @@ -344,11 +334,8 @@ fn ensure_standard_postprocess_tables(doc: &mut toml_edit::DocumentMut) { fn normalize_postprocess_mode(doc: &mut toml_edit::DocumentMut) { let current = doc["postprocess"]["mode"].as_str().unwrap_or_default(); - if !matches!( - current, - "raw" | "advanced_local" | "agentic_rewrite" | "legacy_basic" - ) { - doc["postprocess"]["mode"] = toml_edit::value(PostprocessMode::AdvancedLocal.as_str()); + if !matches!(current, "raw" | "rewrite" | "legacy_basic") { + doc["postprocess"]["mode"] = toml_edit::value(PostprocessMode::Rewrite.as_str()); } } diff --git a/src/config/load.rs b/src/config/load.rs index abc5b79..d9848e4 100644 --- a/src/config/load.rs +++ b/src/config/load.rs @@ -26,6 +26,7 @@ impl Config { config.apply_legacy_transcription_migration(&contents, &config_path); config.apply_legacy_cleanup_migration(&contents, &config_path); + config.apply_legacy_agentic_rewrite_migration(&contents, &config_path); config.apply_cloud_sanitization(); Ok(config) } @@ -76,6 +77,41 @@ impl Config { } } + fn apply_legacy_agentic_rewrite_migration(&mut self, contents: &str, config_path: &Path) { + let legacy_present = section_present(contents, "agentic_rewrite"); + if !legacy_present { + return; + } + + let rewrite_has_policy = table_key_present(contents, "rewrite", "policy_path"); + let rewrite_has_glossary = table_key_present(contents, "rewrite", "glossary_path"); + let rewrite_has_default_policy = + table_key_present(contents, "rewrite", "default_correction_policy"); + + if !rewrite_has_policy { + self.rewrite.policy_path = self.legacy_agentic_rewrite.policy_path.clone(); + } + if !rewrite_has_glossary { + self.rewrite.glossary_path = self.legacy_agentic_rewrite.glossary_path.clone(); + } + if !rewrite_has_default_policy { + self.rewrite.default_correction_policy = + self.legacy_agentic_rewrite.default_correction_policy; + } + + if rewrite_has_policy || rewrite_has_glossary || rewrite_has_default_policy { + tracing::warn!( + "config {} contains deprecated [agentic_rewrite]; [rewrite] takes precedence", + config_path.display() + ); + } else { + tracing::warn!( + "config {} uses deprecated [agentic_rewrite]; mapping fields to [rewrite]", + config_path.display() + ); + } + } + fn apply_cloud_sanitization(&mut self) { if self.transcription.local_backend == TranscriptionBackend::Cloud { tracing::warn!( @@ -105,6 +141,14 @@ fn section_present(contents: &str, name: &str) -> bool { .is_some() } +fn table_key_present(contents: &str, table: &str, key: &str) -> bool { + toml::from_str::(contents) + .ok() + .and_then(|value| value.get(table).cloned()) + .and_then(|value| value.get(key).cloned()) + .is_some() +} + fn looks_like_cloud_api_key(value: &str) -> bool { let trimmed = value.trim(); trimmed.starts_with("sk-") diff --git a/src/config/paths.rs b/src/config/paths.rs index 0b07ec4..d415ae7 100644 --- a/src/config/paths.rs +++ b/src/config/paths.rs @@ -80,6 +80,14 @@ impl Config { .then(|| PathBuf::from(expand_tilde(&self.rewrite.instructions_path))) } + pub fn resolved_rewrite_policy_path(&self) -> PathBuf { + PathBuf::from(expand_tilde(&self.rewrite.policy_path)) + } + + pub fn resolved_rewrite_glossary_path(&self) -> PathBuf { + PathBuf::from(expand_tilde(&self.rewrite.glossary_path)) + } + pub fn resolved_dictionary_path(&self) -> PathBuf { PathBuf::from(expand_tilde(&self.personalization.dictionary_path)) } @@ -89,10 +97,10 @@ impl Config { } pub fn resolved_agentic_policy_path(&self) -> PathBuf { - PathBuf::from(expand_tilde(&self.agentic_rewrite.policy_path)) + self.resolved_rewrite_policy_path() } pub fn resolved_agentic_glossary_path(&self) -> PathBuf { - PathBuf::from(expand_tilde(&self.agentic_rewrite.glossary_path)) + self.resolved_rewrite_glossary_path() } } diff --git a/src/config/schema.rs b/src/config/schema.rs index 9ddff17..013b1fe 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -14,7 +14,8 @@ pub struct Config { pub session: SessionConfig, pub personalization: PersonalizationConfig, pub rewrite: RewriteConfig, - pub agentic_rewrite: AgenticRewriteConfig, + #[serde(default, rename = "agentic_rewrite")] + pub(crate) legacy_agentic_rewrite: LegacyAgenticRewriteConfig, pub cloud: CloudConfig, pub cleanup: CleanupConfig, pub inject: InjectConfig, @@ -80,8 +81,8 @@ pub struct PostprocessConfig { pub enum PostprocessMode { #[default] Raw, - AdvancedLocal, - AgenticRewrite, + #[serde(alias = "advanced_local", alias = "agentic_rewrite")] + Rewrite, LegacyBasic, } @@ -114,11 +115,14 @@ pub struct RewriteConfig { pub idle_timeout_ms: u64, pub max_output_chars: usize, pub max_tokens: usize, + pub policy_path: String, + pub glossary_path: String, + pub default_correction_policy: RewriteCorrectionPolicy, } #[derive(Debug, Clone, Deserialize, PartialEq, Eq)] #[serde(default)] -pub struct AgenticRewriteConfig { +pub(crate) struct LegacyAgenticRewriteConfig { pub policy_path: String, pub glossary_path: String, pub default_correction_policy: RewriteCorrectionPolicy, @@ -283,14 +287,13 @@ impl PostprocessMode { pub fn as_str(self) -> &'static str { match self { Self::Raw => "raw", - Self::AdvancedLocal => "advanced_local", - Self::AgenticRewrite => "agentic_rewrite", + Self::Rewrite => "rewrite", Self::LegacyBasic => "legacy_basic", } } pub fn uses_rewrite(self) -> bool { - matches!(self, Self::AdvancedLocal | Self::AgenticRewrite) + matches!(self, Self::Rewrite) } } @@ -363,11 +366,14 @@ impl Default for RewriteConfig { idle_timeout_ms: 120000, max_output_chars: 1200, max_tokens: 256, + policy_path: crate::agentic_rewrite::default_policy_path().into(), + glossary_path: crate::agentic_rewrite::default_glossary_path().into(), + default_correction_policy: RewriteCorrectionPolicy::Balanced, } } } -impl Default for AgenticRewriteConfig { +impl Default for LegacyAgenticRewriteConfig { fn default() -> Self { Self { policy_path: crate::agentic_rewrite::default_policy_path().into(), diff --git a/src/config/tests.rs b/src/config/tests.rs index 0b2a26f..a42f7b9 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -168,7 +168,7 @@ fn update_rewrite_selection_enables_advanced_mode() { .expect("select rewrite model"); let loaded = Config::load(Some(&config_path)).expect("load config"); - assert_eq!(loaded.postprocess.mode, PostprocessMode::AdvancedLocal); + assert_eq!(loaded.postprocess.mode, PostprocessMode::Rewrite); assert_eq!(loaded.rewrite.selected_model, "qwen-3.5-2b-q4_k_m"); assert!(loaded.rewrite.model_path.is_empty()); assert_eq!( @@ -183,6 +183,46 @@ fn update_rewrite_selection_enables_advanced_mode() { assert_eq!(loaded.rewrite.idle_timeout_ms, 120000); } +#[test] +fn update_rewrite_selection_preserves_policy_settings() { + let config_path = + crate::test_support::unique_temp_path("config-rewrite-policy-preserve", "toml"); + std::fs::write( + &config_path, + r#"[postprocess] +mode = "rewrite" + +[rewrite] +backend = "local" +fallback = "local" +selected_model = "qwen-3.5-4b-q4_k_m" +model_path = "" +instructions_path = "~/.local/share/whispers/rewrite-instructions.txt" +profile = "auto" +timeout_ms = 30000 +idle_timeout_ms = 120000 +max_output_chars = 1200 +max_tokens = 256 +policy_path = "/tmp/custom-policy.toml" +glossary_path = "/tmp/custom-glossary.toml" +default_correction_policy = "aggressive" +"#, + ) + .expect("write config"); + + update_config_rewrite_selection(&config_path, "qwen-3.5-2b-q4_k_m") + .expect("select rewrite model"); + + let loaded = Config::load(Some(&config_path)).expect("load config"); + assert_eq!(loaded.rewrite.selected_model, "qwen-3.5-2b-q4_k_m"); + assert_eq!(loaded.rewrite.policy_path, "/tmp/custom-policy.toml"); + assert_eq!(loaded.rewrite.glossary_path, "/tmp/custom-glossary.toml"); + assert_eq!( + loaded.rewrite.default_correction_policy, + crate::rewrite_protocol::RewriteCorrectionPolicy::Aggressive + ); +} + #[test] fn update_helpers_upgrade_legacy_configs_without_panicking() { let config_path = crate::test_support::unique_temp_path("config-legacy-upgrade", "toml"); @@ -215,7 +255,7 @@ language = "auto" TranscriptionBackend::WhisperCpp ); assert_eq!(loaded.transcription.selected_model, "large-v3-turbo"); - assert_eq!(loaded.postprocess.mode, PostprocessMode::AdvancedLocal); + assert_eq!(loaded.postprocess.mode, PostprocessMode::Rewrite); assert_eq!(loaded.rewrite.selected_model, "qwen-3.5-4b-q4_k_m"); let raw = std::fs::read_to_string(&config_path).expect("read upgraded config"); @@ -238,6 +278,47 @@ api_key_env = "sk-test-inline" assert_eq!(loaded.cloud.api_key_env, "OPENAI_API_KEY"); } +#[test] +fn load_deprecated_rewrite_modes_normalizes_to_rewrite() { + for mode in ["advanced_local", "agentic_rewrite"] { + let path = crate::test_support::unique_temp_path(&format!("config-mode-{mode}"), "toml"); + std::fs::write( + &path, + format!( + r#"[postprocess] +mode = "{mode}" +"# + ), + ) + .expect("write config"); + + let loaded = Config::load(Some(&path)).expect("load config"); + assert_eq!(loaded.postprocess.mode, PostprocessMode::Rewrite); + } +} + +#[test] +fn load_legacy_agentic_rewrite_section_maps_into_rewrite_fields() { + let path = crate::test_support::unique_temp_path("config-agentic-legacy", "toml"); + std::fs::write( + &path, + r#"[agentic_rewrite] +policy_path = "/tmp/policy.toml" +glossary_path = "/tmp/glossary.toml" +default_correction_policy = "aggressive" +"#, + ) + .expect("write config"); + + let loaded = Config::load(Some(&path)).expect("load config"); + assert_eq!(loaded.rewrite.policy_path, "/tmp/policy.toml"); + assert_eq!(loaded.rewrite.glossary_path, "/tmp/glossary.toml"); + assert_eq!( + loaded.rewrite.default_correction_policy, + crate::rewrite_protocol::RewriteCorrectionPolicy::Aggressive + ); +} + #[test] fn default_config_template_matches_example_file() { let example_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("config.example.toml"); diff --git a/src/personalization/rewrite.rs b/src/personalization/rewrite.rs index c6c5d75..4c4b455 100644 --- a/src/personalization/rewrite.rs +++ b/src/personalization/rewrite.rs @@ -1,10 +1,10 @@ use crate::cleanup; use crate::rewrite_protocol::{ - RewriteCandidate, RewriteCandidateKind, RewriteEditAction, RewriteEditHypothesis, - RewriteEditHypothesisMatchSource, RewriteEditIntent, RewriteEditSignal, RewriteEditSignalKind, - RewriteEditSignalScope, RewriteEditSignalStrength, RewriteIntentConfidence, - RewritePolicyContext, RewriteReplacementScope, RewriteTailShape, RewriteTranscript, - RewriteTranscriptSegment, + RewriteCandidate, RewriteCandidateKind, RewriteEditAction, RewriteEditContext, + RewriteEditHypothesis, RewriteEditHypothesisMatchSource, RewriteEditIntent, RewriteEditSignal, + RewriteEditSignalKind, RewriteEditSignalScope, RewriteEditSignalStrength, + RewriteIntentConfidence, RewritePolicyContext, RewriteReplacementScope, RewriteTailShape, + RewriteTranscript, RewriteTranscriptSegment, }; use crate::transcribe::Transcript; @@ -79,8 +79,12 @@ pub fn build_rewrite_transcript( &analysis.edit_hypotheses, rules, ); - let recommended_candidate = - recommended_candidate(&rewrite_candidates, &analysis.edit_hypotheses); + let edit_context = derive_edit_context(&transcript.raw_text, &analysis.edit_hypotheses); + let recommended_candidate = recommended_candidate( + &rewrite_candidates, + &analysis.edit_hypotheses, + &edit_context, + ); RewriteTranscript { raw_text, @@ -147,6 +151,7 @@ pub fn build_rewrite_transcript( edit_hypotheses, rewrite_candidates, recommended_candidate, + edit_context, policy_context: RewritePolicyContext::default(), } } @@ -312,7 +317,10 @@ fn build_rewrite_candidates( } } - if has_strong_explicit_hypothesis(edit_hypotheses) { + let edit_context = derive_edit_context(raw_text, edit_hypotheses); + if has_strong_explicit_hypothesis(edit_hypotheses) + && !(edit_context.cue_is_utterance_initial && edit_context.courtesy_prefix_detected) + { candidates.sort_by_key(|candidate| candidate_priority(candidate.kind)); } @@ -596,12 +604,70 @@ fn normalize_candidate_spacing(text: &str) -> Option { fn recommended_candidate( rewrite_candidates: &[RewriteCandidate], edit_hypotheses: &[cleanup::EditHypothesis], + edit_context: &RewriteEditContext, ) -> Option { + if edit_context.cue_is_utterance_initial && edit_context.courtesy_prefix_detected { + return None; + } + has_strong_explicit_hypothesis(edit_hypotheses) .then(|| rewrite_candidates.first().cloned()) .flatten() } +fn derive_edit_context( + raw_text: &str, + edit_hypotheses: &[cleanup::EditHypothesis], +) -> RewriteEditContext { + let spans = collect_word_spans(raw_text); + let Some(hypothesis) = earliest_strong_explicit_hypothesis(edit_hypotheses) else { + return RewriteEditContext::default(); + }; + + let prefix_words = spans + .get(..hypothesis.word_start) + .unwrap_or(&[]) + .iter() + .map(|span| span.normalized.as_str()) + .collect::>(); + let courtesy_prefix_word_count = courtesy_prefix_word_count(&prefix_words); + let preceding_content_word_count = prefix_words + .len() + .saturating_sub(courtesy_prefix_word_count); + + RewriteEditContext { + cue_is_utterance_initial: prefix_words.is_empty() || preceding_content_word_count == 0, + preceding_content_word_count, + courtesy_prefix_detected: courtesy_prefix_word_count > 0, + has_recent_same_focus_entry: false, + recommended_session_action_is_replace: false, + } +} + +fn earliest_strong_explicit_hypothesis( + edit_hypotheses: &[cleanup::EditHypothesis], +) -> Option<&cleanup::EditHypothesis> { + edit_hypotheses + .iter() + .filter(|hypothesis| { + hypothesis.strength == cleanup::EditSignalStrength::Strong + && matches!( + hypothesis.match_source, + cleanup::EditHypothesisMatchSource::Exact + | cleanup::EditHypothesisMatchSource::Alias + ) + }) + .min_by_key(|hypothesis| hypothesis.word_start) +} + +fn courtesy_prefix_word_count(words: &[&str]) -> usize { + match words { + ["my", "apologies"] => 2, + ["apologies"] | ["sorry"] => 1, + _ => 0, + } +} + fn has_strong_explicit_hypothesis(edit_hypotheses: &[cleanup::EditHypothesis]) -> bool { edit_hypotheses.iter().any(|hypothesis| { hypothesis.strength == cleanup::EditSignalStrength::Strong @@ -859,4 +925,26 @@ mod tests { Some(RewriteCandidateKind::SpanReplacement) ); } + + #[test] + fn courtesy_prefixed_opening_keeps_literal_bias_metadata() { + let transcript = Transcript { + raw_text: "my apologies i meant xxxyyyzzz".into(), + detected_language: Some("en".into()), + segments: Vec::new(), + }; + + let rewrite = build_rewrite_transcript(&transcript, &rules()); + assert!(rewrite.edit_context.cue_is_utterance_initial); + assert!(rewrite.edit_context.courtesy_prefix_detected); + assert_eq!(rewrite.edit_context.preceding_content_word_count, 0); + assert!(rewrite.recommended_candidate.is_none()); + assert_eq!( + rewrite + .rewrite_candidates + .first() + .map(|candidate| candidate.kind), + Some(RewriteCandidateKind::Literal) + ); + } } diff --git a/src/postprocess/finalize.rs b/src/postprocess/finalize.rs index c0a7747..a1fc431 100644 --- a/src/postprocess/finalize.rs +++ b/src/postprocess/finalize.rs @@ -63,7 +63,7 @@ pub async fn finalize_transcript( &rules, ) } - PostprocessMode::AdvancedLocal | PostprocessMode::AgenticRewrite => { + PostprocessMode::Rewrite => { finalize_rewrite_plan_or_fallback( config, rewrite_service, @@ -91,24 +91,6 @@ async fn finalize_rewrite_plan_or_fallback( rewrite_service: Option<&RewriteService>, plan: planning::RewritePlan, ) -> FinalizedTranscript { - if let Some(text) = plan.deterministic_replacement_text.clone() { - tracing::debug!( - output_len = text.len(), - mode = config.postprocess.mode.as_str(), - "using deterministic session replacement" - ); - return finalize_plain_text( - text, - SessionRewriteSummary { - had_edit_cues: plan.had_edit_cues, - rewrite_used: false, - recommended_candidate: plan.recommended_candidate.clone(), - }, - &plan.rules, - ) - .with_operation(plan.operation.clone()); - } - let local_rewrite_available = crate::rewrite::local_rewrite_available(); let local_backend_requested = config.rewrite.backend == RewriteBackend::Local; @@ -209,7 +191,7 @@ impl FinalizedTranscript { } fn rewrite_output_accepted( - config: &Config, + _config: &Config, rewrite_transcript: &RewriteTranscript, text: &str, ) -> bool { @@ -217,10 +199,6 @@ fn rewrite_output_accepted( return false; } - if config.postprocess.mode != PostprocessMode::AgenticRewrite { - return true; - } - match rewrite_transcript.policy_context.correction_policy { RewriteCorrectionPolicy::Conservative => { agentic_rewrite::conservative_output_allowed(rewrite_transcript, text) @@ -267,6 +245,7 @@ mod tests { text: "allowed rewrite".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }, custom_instructions: None, @@ -274,38 +253,13 @@ mod tests { operation: FinalizedOperation::Append, had_edit_cues: false, recommended_candidate: Some("allowed rewrite".into()), - deterministic_replacement_text: None, } } - #[tokio::test] - async fn deterministic_session_replacement_bypasses_rewrite_and_preserves_replace_operation() { - let config = plan_config(PostprocessMode::AdvancedLocal, RewriteBackend::Local); - let mut plan = rewrite_plan(); - plan.operation = FinalizedOperation::ReplaceLastEntry { - entry_id: 7, - delete_graphemes: 4, - }; - plan.deterministic_replacement_text = Some("deterministic replacement".into()); - plan.local_model_path = None; - - let finalized = finalize_rewrite_plan_or_fallback(&config, None, plan).await; - - assert_eq!(finalized.text, "deterministic replacement"); - assert_eq!( - finalized.operation, - FinalizedOperation::ReplaceLastEntry { - entry_id: 7, - delete_graphemes: 4, - } - ); - assert!(!finalized.rewrite_summary.rewrite_used); - } - #[tokio::test] #[cfg(not(feature = "local-rewrite"))] async fn local_rewrite_unavailable_build_falls_back_to_plain_text() { - let config = plan_config(PostprocessMode::AdvancedLocal, RewriteBackend::Local); + let config = plan_config(PostprocessMode::Rewrite, RewriteBackend::Local); let mut plan = rewrite_plan(); plan.operation = FinalizedOperation::ReplaceLastEntry { entry_id: 11, @@ -328,7 +282,7 @@ mod tests { #[tokio::test] #[cfg(feature = "local-rewrite")] async fn missing_local_model_falls_back_to_plain_text() { - let config = plan_config(PostprocessMode::AdvancedLocal, RewriteBackend::Local); + let config = plan_config(PostprocessMode::Rewrite, RewriteBackend::Local); let mut plan = rewrite_plan(); plan.local_model_path = None; @@ -340,7 +294,7 @@ mod tests { #[test] fn conservative_agentic_rejection_falls_back_to_precomputed_text() { - let mut config = plan_config(PostprocessMode::AgenticRewrite, RewriteBackend::Cloud); + let mut config = plan_config(PostprocessMode::Rewrite, RewriteBackend::Cloud); config.rewrite.fallback = RewriteFallback::None; let mut plan = rewrite_plan(); plan.rewrite_transcript.policy_context.correction_policy = diff --git a/src/postprocess/planning.rs b/src/postprocess/planning.rs index e802352..c1b23a8 100644 --- a/src/postprocess/planning.rs +++ b/src/postprocess/planning.rs @@ -21,7 +21,6 @@ pub(crate) struct RewritePlan { pub operation: FinalizedOperation, pub had_edit_cues: bool, pub recommended_candidate: Option, - pub deterministic_replacement_text: Option, } pub fn raw_text(transcript: &Transcript) -> String { @@ -53,14 +52,34 @@ pub(crate) fn build_rewrite_plan( recent_session: Option<&EligibleSessionEntry>, ) -> RewritePlan { let rules = load_runtime_rules(config); - let fallback_text = base_text(config, transcript); let local_model_path = resolve_rewrite_model_path(config); let mut rewrite_transcript = personalization::build_rewrite_transcript(transcript, &rules); rewrite_transcript.typing_context = typing_context.and_then(session::to_rewrite_typing_context); - if config.postprocess.mode == PostprocessMode::AgenticRewrite { - agentic_rewrite::apply_runtime_policy(config, &mut rewrite_transcript); - } + agentic_rewrite::apply_runtime_policy(config, &mut rewrite_transcript); let session_plan = session::build_backtrack_plan(&rewrite_transcript, recent_session); + let mut fallback_text = base_text(config, transcript); + if session_plan.recommended.as_ref().is_some_and(|candidate| { + matches!( + candidate.kind, + RewriteSessionBacktrackCandidateKind::ReplaceLastEntry + ) + }) { + if let Some(explicit_followup_text) = + cleanup::explicit_followup_replacement(&rewrite_transcript.raw_text) + { + fallback_text = explicit_followup_text; + } + } + rewrite_transcript.edit_context.has_recent_same_focus_entry = recent_session.is_some(); + rewrite_transcript + .edit_context + .recommended_session_action_is_replace = + session_plan.recommended.as_ref().is_some_and(|candidate| { + matches!( + candidate.kind, + RewriteSessionBacktrackCandidateKind::ReplaceLastEntry + ) + }); rewrite_transcript.recent_session_entries = session_plan.recent_entries.clone(); rewrite_transcript.session_backtrack_candidates = session_plan.candidates.clone(); rewrite_transcript.recommended_session_candidate = session_plan.recommended.clone(); @@ -93,7 +112,6 @@ pub(crate) fn build_rewrite_plan( .as_ref() .map(|candidate| candidate.text.clone()) }), - deterministic_replacement_text: session_plan.deterministic_replacement_text.clone(), rules, fallback_text, rewrite_transcript, @@ -103,9 +121,7 @@ pub(crate) fn build_rewrite_plan( fn base_text(config: &Config, transcript: &Transcript) -> String { match config.postprocess.mode { PostprocessMode::LegacyBasic => cleanup::clean_transcript(transcript, &config.cleanup), - PostprocessMode::AdvancedLocal | PostprocessMode::AgenticRewrite => { - cleanup::correction_aware_text(transcript) - } + PostprocessMode::Rewrite => cleanup::correction_aware_text(transcript), PostprocessMode::Raw => raw_text(transcript), } } @@ -126,3 +142,54 @@ fn recommended_operation(rewrite_transcript: &RewriteTranscript) -> FinalizedOpe }) .unwrap_or(FinalizedOperation::Append) } + +#[cfg(test)] +mod tests { + use super::build_rewrite_plan; + use crate::config::{Config, PostprocessMode}; + use crate::context::SurfaceKind; + use crate::postprocess::finalize::FinalizedOperation; + use crate::session::{EligibleSessionEntry, SessionEntry, SessionRewriteSummary}; + use crate::transcribe::Transcript; + + #[test] + fn build_rewrite_plan_uses_explicit_followup_replacement_for_replace_fallback() { + let mut config = Config::default(); + config.postprocess.mode = PostprocessMode::Rewrite; + + let transcript = Transcript { + raw_text: "srajvat, hi".into(), + detected_language: Some("en".into()), + segments: Vec::new(), + }; + let recent = EligibleSessionEntry { + entry: SessionEntry { + id: 7, + final_text: "Hello there".into(), + grapheme_len: 11, + injected_at_ms: 1, + focus_fingerprint: "hyprland:0x123".into(), + surface_kind: SurfaceKind::GenericText, + app_id: Some("firefox".into()), + window_title: Some("Example".into()), + rewrite_summary: SessionRewriteSummary { + had_edit_cues: false, + rewrite_used: true, + recommended_candidate: Some("Hello there".into()), + }, + }, + delete_graphemes: 11, + }; + + let plan = build_rewrite_plan(&config, &transcript, None, Some(&recent)); + assert_eq!(plan.fallback_text, "Hi"); + assert_eq!(plan.recommended_candidate.as_deref(), Some("Hi")); + assert_eq!( + plan.operation, + FinalizedOperation::ReplaceLastEntry { + entry_id: 7, + delete_graphemes: 11, + } + ); + } +} diff --git a/src/rewrite/prompt.rs b/src/rewrite/prompt.rs index bd9ca2c..5da5c69 100644 --- a/src/rewrite/prompt.rs +++ b/src/rewrite/prompt.rs @@ -131,8 +131,9 @@ fn agentic_latitude_contract( pub(crate) fn rewrite_instructions(profile: ResolvedRewriteProfile) -> &'static str { let base = "You clean up dictated speech into the final text the user meant to type. \ Return only the finished text. Do not explain anything. Remove obvious disfluencies when natural. \ -Use the correction-aware transcript as the primary source of truth unless structured edit signals say the \ -utterance may still be ambiguous. The raw transcript may still contain spoken editing phrases or canceled wording. \ +Use the correction-aware transcript as strong heuristic evidence. When structured edit signals are present, treat it \ +as advisory rather than absolute and resolve ambiguity using the raw transcript, session context, and candidates. The \ +raw transcript may still contain spoken editing phrases or canceled wording. \ Never reintroduce text that was removed by an explicit spoken correction cue. Respect any structured edit intents \ provided alongside the transcript. If structured edit signals or edit hypotheses are present, use the candidate \ interpretations as bounded options, choose the best interpretation, and lightly refine it only when needed for natural \ @@ -141,10 +142,13 @@ explicit correction says otherwise. Do not normalize names into more common spel When the utterance clearly refers to software, tools, APIs, libraries, Linux components, product names, or other \ technical concepts, prefer the most plausible intended technical term or proper name over a phonetically similar common \ word. Use nearby category words like window manager, editor, language, library, package manager, shell, or terminal \ -tool to disambiguate technical names. If the utterance remains genuinely ambiguous, stay close to the transcript rather \ +tool to disambiguate technical names. When a dictated word is an obvious phonetic near-miss for a likely technical term \ +and the surrounding context clearly identifies the category, correct it to the canonical technical spelling instead of \ +echoing the miss. If multiple plausible interpretations remain similarly credible, stay close to the transcript rather \ than inventing a niche term. \ -If an edit intent says to replace or cancel previous wording, preserve that edit and do not keep the spoken correction \ -phrase itself unless the transcript clearly still intends it. Examples:\n\ +If an edit intent says to replace or cancel previous wording, preserve that edit when the utterance or same-session \ +context clearly supports it. Preserve utterance-initial courtesy or apology wording when the raw transcript still \ +clearly intends it. Examples:\n\ - raw: Hello there. Scratch that. Hi.\n correction-aware: Hi.\n final: Hi.\n\ - raw: I'll bring cookies, scratch that, brownies.\n correction-aware: I'll bring brownies.\n final: I'll bring brownies.\n\ - raw: My name is Notes, scratch that my name is Jonatan.\n correction-aware: My my name is Jonatan.\n aggressive correction-aware: My name is Jonatan.\n final: My name is Jonatan.\n\ @@ -153,6 +157,7 @@ phrase itself unless the transcript clearly still intends it. Examples:\n\ - raw: Let's meet tomorrow, or rather Friday.\n correction-aware: Let's meet Friday.\n final: Let's meet Friday.\n\ - raw: I'm currently using the window manager Hyperland.\n correction-aware: I'm currently using the window manager Hyperland.\n final: I'm currently using the window manager Hyprland.\n\ - raw: I'm switching from Sui to Hyperland.\n correction-aware: I'm switching from Sui to Hyperland.\n final: I'm switching from Sway to Hyprland.\n\ +- raw: I moved back to the window manager neary.\n correction-aware: I moved back to the window manager neary.\n final: I moved back to the window manager niri.\n\ - raw: I use type script for backend tooling.\n correction-aware: I use type script for backend tooling.\n final: I use TypeScript for backend tooling.\n\ - raw: I edit the config in neo vim.\n correction-aware: I edit the config in neo vim.\n final: I edit the config in Neovim."; @@ -160,8 +165,9 @@ phrase itself unless the transcript clearly still intends it. Examples:\n\ ResolvedRewriteProfile::Qwen => { "You clean up dictated speech into the final text the user meant to type. \ Return only the finished text. Do not explain anything. Do not emit reasoning, think tags, or XML wrappers. \ -Remove obvious disfluencies when natural. Use the correction-aware transcript as the primary source of truth unless \ -structured edit signals say the utterance may still be ambiguous. The raw transcript may still contain spoken editing \ +Remove obvious disfluencies when natural. Use the correction-aware transcript as strong heuristic evidence. When \ +structured edit signals are present, treat it as advisory rather than absolute and resolve ambiguity using the raw \ +transcript, session context, and candidates. The raw transcript may still contain spoken editing \ phrases or canceled wording. Never reintroduce text that was removed by an explicit spoken correction cue. Respect \ any structured edit intents provided alongside the transcript. If structured edit signals or edit hypotheses are \ present, use the candidate interpretations as bounded options, choose the best interpretation, and lightly refine it \ @@ -170,9 +176,10 @@ unless a user dictionary or explicit correction says otherwise. Do not normalize because they look familiar. When the utterance clearly refers to software, tools, APIs, libraries, Linux components, \ product names, or other technical concepts, prefer the most plausible intended technical term or proper name over a \ phonetically similar common word. Use nearby category words like window manager, editor, language, library, package \ -manager, shell, or terminal tool to disambiguate technical names. If the utterance remains genuinely ambiguous, stay \ -close to the transcript rather than inventing a niche term. If an edit intent says to replace or cancel previous wording, preserve that edit and do \ -not keep the spoken correction phrase itself unless the transcript clearly still intends it. Examples:\n\ +manager, shell, or terminal tool to disambiguate technical names. When a dictated word is an obvious phonetic near-miss \ +for a likely technical term and the surrounding context clearly identifies the category, correct it to the canonical \ +technical spelling instead of echoing the miss. If multiple plausible interpretations remain similarly credible, stay \ +close to the transcript rather than inventing a niche term. If an edit intent says to replace or cancel previous wording, preserve that edit when the utterance or same-session context clearly supports it. Preserve utterance-initial courtesy or apology wording when the raw transcript still clearly intends it. Examples:\n\ - raw: Hello there. Scratch that. Hi.\n correction-aware: Hi.\n final: Hi.\n\ - raw: I'll bring cookies, scratch that, brownies.\n correction-aware: I'll bring brownies.\n final: I'll bring brownies.\n\ - raw: My name is Notes, scratch that my name is Jonatan.\n correction-aware: My my name is Jonatan.\n aggressive correction-aware: My name is Jonatan.\n final: My name is Jonatan.\n\ @@ -181,6 +188,7 @@ not keep the spoken correction phrase itself unless the transcript clearly still - raw: Let's meet tomorrow, or rather Friday.\n correction-aware: Let's meet Friday.\n final: Let's meet Friday.\n\ - raw: I'm currently using the window manager Hyperland.\n correction-aware: I'm currently using the window manager Hyperland.\n final: I'm currently using the window manager Hyprland.\n\ - raw: I'm switching from Sui to Hyperland.\n correction-aware: I'm switching from Sui to Hyperland.\n final: I'm switching from Sway to Hyprland.\n\ +- raw: I moved back to the window manager neary.\n correction-aware: I moved back to the window manager neary.\n final: I moved back to the window manager niri.\n\ - raw: I use type script for backend tooling.\n correction-aware: I use type script for backend tooling.\n final: I use TypeScript for backend tooling.\n\ - raw: I edit the config in neo vim.\n correction-aware: I edit the config in neo vim.\n final: I edit the config in Neovim." } @@ -194,6 +202,7 @@ pub(crate) fn build_user_message(transcript: &RewriteTranscript) -> String { let raw = transcript.raw_text.trim(); let edit_intents = render_edit_intents(transcript); let edit_signals = render_edit_signals(transcript); + let edit_context = render_edit_context(transcript); let agentic_context = render_agentic_context(transcript); let route = rewrite_route(transcript); tracing::debug!( @@ -232,12 +241,15 @@ Active typing context:\n\ Recent dictation session:\n\ {recent_session_entries}\ {agentic_policy_context}\ +Structured cue context:\n\ +{edit_context}\ Session backtrack candidates:\n\ {session_candidates}\ {recommended_session_candidate}\ The user may be correcting the most recent prior dictation entry rather than appending new text.\n\ If the recommended session candidate says replace_last_entry, treat your final text as the replacement text for that previous dictation entry, not as newly appended text.\n\ Prefer the recommended session candidate unless another listed session candidate is clearly better.\n\ +If the utterance begins with a courtesy-prefixed correction cue and the session evidence is weak, preserve the courtesy wording instead of assuming replacement.\n\ {surface_guidance}\ Current utterance correction candidate:\n\ {correction_aware}\n\ @@ -255,9 +267,12 @@ Final text:" let recent_segments = render_recent_segments(transcript, 4); let aggressive_candidate = render_aggressive_candidate(transcript); let exact_cue_guidance = if has_strong_explicit_edit_cue(transcript) { - "A strong explicit spoken edit cue was detected. The literal raw transcript probably contains canceled wording. \ -Prefer a candidate interpretation that removes the cue and canceled wording unless doing so would clearly lose intended meaning. \ -If the cue is an exact strong match for phrases like scratch that, never mind, or wait no, do not keep the literal cue text in the final output.\n" + if opening_cue_requires_literal_bias(transcript) { + "A strong edit cue appears at the beginning of the utterance without strong same-session replacement evidence. \ +Do not assume it cancels earlier text. If the opening includes courtesy language such as sorry or my apologies, preserve that courtesy wording unless another candidate is clearly better.\n" + } else { + "A strong explicit spoken edit cue was detected. Prefer a candidate interpretation that preserves the intended edit when the cue clearly corrects earlier same-utterance wording or the most recent same-session dictation.\n" + } } else { "" }; @@ -272,12 +287,13 @@ Structured edit signals:\n\ {edit_signals}\ Structured edit intents:\n\ {edit_intents}\ +Structured cue context:\n\ +{edit_context}\ This utterance likely contains spoken self-corrections or restatements.\n\ Choose the best candidate interpretation and lightly refine it only when needed.\n\ {exact_cue_guidance}\ -When an exact strong edit cue is present, treat the non-literal candidates as more trustworthy than the literal transcript.\n\ -The candidate list is ordered from most likely to least likely by heuristics.\n\ -For exact strong edit cues, the first candidate is the heuristic best guess and should usually win unless another candidate is clearly better.\n\ +When an exact strong edit cue is present, treat non-literal candidates as evidence, not an automatic winner.\n\ +The candidate list is ordered heuristically and may be wrong for utterance-initial or courtesy-prefixed cues.\n\ Prefer the smallest replacement scope that yields a coherent result.\n\ Use span-level replacements when only a key phrase was corrected, clause-level replacements when the correction replaces the surrounding thought, and sentence-level replacements only when the whole sentence was canceled.\n\ Preserve literal wording when the cue is not actually an edit.\n\ @@ -303,6 +319,8 @@ Structured edit signals:\n\ {edit_signals}\ Structured edit intents:\n\ {edit_intents}\ +Structured cue context:\n\ +{edit_context}\ Self-corrections were already resolved before rewriting.\n\ Use this correction-aware transcript as the main source text. In agentic mode, you may still normalize likely \ technical terms or proper names when the utterance strongly supports them, even if the exact canonical spelling is not \ @@ -322,6 +340,8 @@ Structured edit signals:\n\ {edit_signals}\ Structured edit intents:\n\ {edit_intents}\ +Structured cue context:\n\ +{edit_context}\ Correction-aware transcript:\n\ {correction_aware}\n\ Treat the correction-aware transcript as authoritative for explicit spoken edits and overall meaning, but in agentic \ @@ -402,6 +422,18 @@ Glossary-backed candidates:\n\ } } +fn render_edit_context(transcript: &RewriteTranscript) -> String { + let context = &transcript.edit_context; + format!( + "- cue_is_utterance_initial: {}\n- preceding_content_word_count: {}\n- courtesy_prefix_detected: {}\n- has_recent_same_focus_entry: {}\n- recommended_session_action_is_replace: {}\n", + context.cue_is_utterance_initial, + context.preceding_content_word_count, + context.courtesy_prefix_detected, + context.has_recent_same_focus_entry, + context.recommended_session_action_is_replace, + ) +} + fn render_edit_intents(transcript: &RewriteTranscript) -> String { if transcript.edit_intents.is_empty() { return "- none detected\n".to_string(); @@ -580,6 +612,15 @@ fn render_recommended_candidate(transcript: &RewriteTranscript) -> String { .unwrap_or_default() } +fn opening_cue_requires_literal_bias(transcript: &RewriteTranscript) -> bool { + transcript.edit_context.cue_is_utterance_initial + && transcript.edit_context.courtesy_prefix_detected + && !(transcript.edit_context.has_recent_same_focus_entry + && transcript + .edit_context + .recommended_session_action_is_replace) +} + fn render_typing_context(transcript: &RewriteTranscript) -> String { transcript .typing_context diff --git a/src/rewrite/tests.rs b/src/rewrite/tests.rs index c401459..cdb7930 100644 --- a/src/rewrite/tests.rs +++ b/src/rewrite/tests.rs @@ -72,6 +72,7 @@ fn correction_transcript() -> RewriteTranscript { kind: RewriteCandidateKind::Literal, text: "Hi there, this is a test. Wait, no. Hi there.".into(), }), + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), } } @@ -105,6 +106,7 @@ fn candidate_only_transcript() -> RewriteTranscript { }, ], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), } } @@ -141,6 +143,7 @@ fn fast_agentic_transcript() -> RewriteTranscript { }, ], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext { correction_policy: RewriteCorrectionPolicy::Balanced, matched_rule_names: vec!["baseline/global-default".into()], @@ -250,12 +253,10 @@ fn cue_prompt_includes_raw_candidate_and_signals() { assert!(prompt.contains("tail_shape: phrase")); assert!(prompt.contains("Candidate interpretations")); assert!(prompt.contains("A strong explicit spoken edit cue was detected")); - assert!( - prompt.contains( - "The candidate list is ordered from most likely to least likely by heuristics." - ) - ); - assert!(prompt.contains("the first candidate is the heuristic best guess")); + assert!(prompt.contains("The candidate list is ordered heuristically and may be wrong")); + assert!(prompt.contains( + "When an exact strong edit cue is present, treat non-literal candidates as evidence, not an automatic winner." + )); assert!(prompt.contains("Recommended interpretation:")); assert!(prompt.contains( "Use this as the default final text unless another candidate is clearly better." @@ -271,6 +272,7 @@ fn cue_prompt_includes_raw_candidate_and_signals() { assert!(prompt.contains("trigger: \"wait no\"")); assert!(prompt.contains("Structured edit intents")); assert!(prompt.contains("replace_previous_sentence")); + assert!(prompt.contains("Structured cue context")); assert!(prompt.contains("Choose the best candidate interpretation")); assert!(prompt.contains("Candidate interpretations:\n")); assert!(prompt.contains("Correction candidate:\nHi there.")); @@ -327,6 +329,7 @@ fn user_message_includes_recent_segments_when_correction_matches_raw() { text: "Hi there.".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; @@ -359,6 +362,7 @@ fn effective_max_tokens_scales_with_transcript_length() { text: "hi there".into(), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; assert_eq!(effective_max_tokens(256, &short), 48); @@ -381,6 +385,7 @@ fn effective_max_tokens_scales_with_transcript_length() { text: "word ".repeat(80), }], recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; assert_eq!(effective_max_tokens(256, &long), 184); @@ -448,6 +453,7 @@ fn session_prompt_includes_recent_entry_and_context() { kind: RewriteCandidateKind::SentenceReplacement, text: "Hi".into(), }), + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; diff --git a/src/rewrite_model.rs b/src/rewrite_model.rs index 5315f63..4388c97 100644 --- a/src/rewrite_model.rs +++ b/src/rewrite_model.rs @@ -27,7 +27,7 @@ pub const REWRITE_MODELS: &[RewriteModelInfo] = &[ name: "qwen-3.5-4b-q4_k_m", filename: "Qwen_Qwen3.5-4B-Q4_K_M.gguf", size: "~2.9 GB", - description: "Recommended balance for advanced_local mode", + description: "Recommended balance for rewrite mode", url: "https://huggingface.co/bartowski/Qwen_Qwen3.5-4B-GGUF/resolve/main/Qwen_Qwen3.5-4B-Q4_K_M.gguf", profile: RewriteProfile::Qwen, }, @@ -217,7 +217,7 @@ mod tests { select_model("qwen-3.5-2b-q4_k_m", Some(&config_path)).expect("select model"); let loaded = Config::load(Some(&config_path)).expect("load config"); - assert_eq!(loaded.postprocess.mode, PostprocessMode::AdvancedLocal); + assert_eq!(loaded.postprocess.mode, PostprocessMode::Rewrite); assert_eq!(loaded.rewrite.selected_model, "qwen-3.5-2b-q4_k_m"); } diff --git a/src/rewrite_protocol.rs b/src/rewrite_protocol.rs index edacdc4..33b13ff 100644 --- a/src/rewrite_protocol.rs +++ b/src/rewrite_protocol.rs @@ -18,6 +18,8 @@ pub struct RewriteTranscript { pub rewrite_candidates: Vec, pub recommended_candidate: Option, #[serde(default)] + pub edit_context: RewriteEditContext, + #[serde(default)] pub policy_context: RewritePolicyContext, } @@ -90,6 +92,15 @@ pub struct RewriteCandidate { pub text: String, } +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] +pub struct RewriteEditContext { + pub cue_is_utterance_initial: bool, + pub preceding_content_word_count: usize, + pub courtesy_prefix_detected: bool, + pub has_recent_same_focus_entry: bool, + pub recommended_session_action_is_replace: bool, +} + #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] pub struct RewritePolicyContext { pub correction_policy: RewriteCorrectionPolicy, diff --git a/src/session/mod.rs b/src/session/mod.rs index 031de20..00c6332 100644 --- a/src/session/mod.rs +++ b/src/session/mod.rs @@ -40,5 +40,4 @@ pub struct SessionBacktrackPlan { pub recent_entries: Vec, pub candidates: Vec, pub recommended: Option, - pub deterministic_replacement_text: Option, } diff --git a/src/session/planning.rs b/src/session/planning.rs index 1eeedbf..1956d2a 100644 --- a/src/session/planning.rs +++ b/src/session/planning.rs @@ -14,11 +14,12 @@ pub fn build_backtrack_plan( let Some(recent_entry) = recent_entry else { return SessionBacktrackPlan::default(); }; - if !should_offer_session_backtrack(transcript) { + let explicit_followup_text = cleanup::explicit_followup_replacement(&transcript.raw_text); + if !should_offer_session_backtrack(transcript, explicit_followup_text.as_deref()) { return SessionBacktrackPlan::default(); } - let append_text = preferred_current_text(transcript); + let append_text = preferred_current_text(transcript, explicit_followup_text.as_deref()); if append_text.is_empty() { return SessionBacktrackPlan::default(); } @@ -40,7 +41,6 @@ pub fn build_backtrack_plan( recent_entries: vec![to_rewrite_session_entry(&recent_entry.entry)], candidates: vec![replace_candidate.clone(), append_candidate], recommended: Some(replace_candidate), - deterministic_replacement_text: preferred_current_text_for_exact_followup(transcript), } } @@ -55,8 +55,11 @@ pub fn to_rewrite_typing_context(context: &TypingContext) -> Option bool { - if cleanup::explicit_followup_replacement(&transcript.raw_text).is_some() { +fn should_offer_session_backtrack( + transcript: &RewriteTranscript, + explicit_followup_text: Option<&str>, +) -> bool { + if explicit_followup_text.is_some() { return true; } @@ -98,12 +101,17 @@ fn should_offer_session_backtrack(transcript: &RewriteTranscript) -> bool { }) } -fn preferred_current_text(transcript: &RewriteTranscript) -> String { +fn preferred_current_text( + transcript: &RewriteTranscript, + explicit_followup_text: Option<&str>, +) -> String { transcript .recommended_candidate .as_ref() .map(|candidate| candidate.text.trim()) .filter(|text: &&str| !text.is_empty()) + .or(explicit_followup_text) + .filter(|text: &&str| !text.trim().is_empty()) .or_else(|| { Some(transcript.correction_aware_text.trim()).filter(|text: &&str| !text.is_empty()) }) @@ -112,52 +120,6 @@ fn preferred_current_text(transcript: &RewriteTranscript) -> String { .to_string() } -fn preferred_current_text_for_exact_followup(transcript: &RewriteTranscript) -> Option { - if let Some(text) = cleanup::explicit_followup_replacement(&transcript.raw_text) { - return Some(text); - } - - if !has_strong_explicit_followup_cue(transcript) { - return None; - } - - let raw_prefix = normalize_prefix(&transcript.raw_text); - if ![ - "scratch that", - "actually scratch that", - "never mind", - "nevermind", - "actually never mind", - "actually nevermind", - "oh wait never mind", - "oh wait nevermind", - "forget that", - ] - .iter() - .any(|cue| raw_prefix.starts_with(cue)) - { - return None; - } - - let preferred = preferred_current_text(transcript); - (!preferred.is_empty()).then_some(preferred) -} - -fn has_strong_explicit_followup_cue(transcript: &RewriteTranscript) -> bool { - transcript.edit_hypotheses.iter().any(|hypothesis| { - hypothesis.strength == crate::rewrite_protocol::RewriteEditSignalStrength::Strong - && matches!( - hypothesis.match_source, - crate::rewrite_protocol::RewriteEditHypothesisMatchSource::Exact - | crate::rewrite_protocol::RewriteEditHypothesisMatchSource::Alias - ) - && matches!( - hypothesis.cue_family.as_str(), - "scratch_that" | "never_mind" - ) - }) -} - fn normalize_prefix(text: &str) -> String { text.chars() .map(|ch| { @@ -237,6 +199,7 @@ mod tests { kind: RewriteCandidateKind::SentenceReplacement, text: "Hi".into(), }), + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; @@ -272,7 +235,6 @@ mod tests { .and_then(|candidate| candidate.entry_id), Some(7) ); - assert_eq!(plan.deterministic_replacement_text.as_deref(), Some("Hi")); } #[test] @@ -292,6 +254,54 @@ mod tests { edit_hypotheses: Vec::new(), rewrite_candidates: Vec::new(), recommended_candidate: None, + edit_context: Default::default(), + policy_context: RewritePolicyContext::default(), + }; + + let recent = EligibleSessionEntry { + entry: SessionEntry { + id: 7, + final_text: "Hello there".into(), + grapheme_len: 11, + injected_at_ms: 1, + focus_fingerprint: "hyprland:0x123".into(), + surface_kind: SurfaceKind::GenericText, + app_id: Some("firefox".into()), + window_title: Some("Example".into()), + rewrite_summary: SessionRewriteSummary { + had_edit_cues: false, + rewrite_used: true, + recommended_candidate: Some("Hello there".into()), + }, + }, + delete_graphemes: 11, + }; + + let plan = build_backtrack_plan(&transcript, Some(&recent)); + assert_eq!( + plan.recommended.as_ref().map(|candidate| candidate.kind), + Some(RewriteSessionBacktrackCandidateKind::ReplaceLastEntry) + ); + } + + #[test] + fn build_backtrack_plan_uses_explicit_followup_replacement_for_alias_fallbacks() { + let transcript = RewriteTranscript { + raw_text: "srajvat, hi".into(), + correction_aware_text: "srajvat, hi".into(), + aggressive_correction_text: None, + detected_language: None, + typing_context: None, + recent_session_entries: Vec::new(), + session_backtrack_candidates: Vec::new(), + recommended_session_candidate: None, + segments: Vec::new(), + edit_intents: Vec::new(), + edit_signals: Vec::new(), + edit_hypotheses: Vec::new(), + rewrite_candidates: Vec::new(), + recommended_candidate: None, + edit_context: Default::default(), policy_context: RewritePolicyContext::default(), }; @@ -319,6 +329,11 @@ mod tests { plan.recommended.as_ref().map(|candidate| candidate.kind), Some(RewriteSessionBacktrackCandidateKind::ReplaceLastEntry) ); - assert_eq!(plan.deterministic_replacement_text.as_deref(), Some("Hi")); + assert_eq!( + plan.recommended + .as_ref() + .map(|candidate| candidate.text.as_str()), + Some("Hi") + ); } } diff --git a/src/setup/select.rs b/src/setup/select.rs index 0c27bd2..9f5b5bb 100644 --- a/src/setup/select.rs +++ b/src/setup/select.rs @@ -54,16 +54,9 @@ pub(super) fn choose_rewrite_model( } pub(super) fn choose_rewrite_mode(ui: &SetupUi) -> Result { - let items = [ - "advanced_local: smart rewrite cleanup with current bounded-candidate behavior", - "agentic_rewrite: app-aware rewrite with policy and technical glossary support", - ]; - let selection = ui.select("Choose the rewrite mode", &items, 1)?; - Ok(if selection == 0 { - PostprocessMode::AdvancedLocal - } else { - PostprocessMode::AgenticRewrite - }) + let items = ["rewrite: unified rewrite with app-aware policy and glossary support"]; + let _selection = ui.select("Choose the rewrite mode", &items, 0)?; + Ok(PostprocessMode::Rewrite) } pub(super) fn configure_cloud( diff --git a/src/setup/side_effects.rs b/src/setup/side_effects.rs index fbeb369..67590cc 100644 --- a/src/setup/side_effects.rs +++ b/src/setup/side_effects.rs @@ -33,14 +33,14 @@ pub(super) fn maybe_create_agentic_starter_files( config_path: &Path, selections: &SetupSelections, ) -> Result<()> { - if selections.postprocess_mode != crate::config::PostprocessMode::AgenticRewrite { + if !selections.postprocess_mode.uses_rewrite() { return Ok(()); } let config = config::Config::load(Some(config_path))?; let created = crate::agentic_rewrite::ensure_starter_files(&config)?; for path in created { - ui.print_info(format!("Created agentic rewrite starter file: {}", path)); + ui.print_info(format!("Created rewrite starter file: {}", path)); } Ok(()) }