CortexLM · echobt · Jan 27, 2026
diff --git a/cortex-app-server/src/middleware.rs b/cortex-app-server/src/middleware.rs
@@ -267,6 +267,10 @@ pub async fn body_limit_middleware(
 /// CORS configuration.
 /// Includes Access-Control-Max-Age header to allow browsers to cache
 /// preflight responses, reducing the number of OPTIONS requests.
+///
+/// Issue #2423: Handles the incompatibility between wildcard CORS origin (*)
+/// and credential requests. When credentials are needed, specific origins
+/// must be used instead of wildcards.
 pub fn cors_layer(origins: &[String]) -> tower_http::cors::CorsLayer {
     use tower_http::cors::{Any, CorsLayer};
 
@@ -275,17 +279,50 @@ pub fn cors_layer(origins: &[String]) -> tower_http::cors::CorsLayer {
     let max_age = std::time::Duration::from_secs(86400);
 
     if origins.is_empty() {
+        // Issue #2423: Using permissive() creates a CORS layer that allows
+        // any origin, but does NOT support credentials. This is intentional
+        // because Access-Control-Allow-Origin: * is incompatible with
+        // credentials: 'include' in fetch requests.
+        //
+        // If you need to support credentials, you must specify explicit
+        // origins using the cors_origins config option instead of using
+        // the permissive default.
         CorsLayer::permissive().max_age(max_age)
     } else {
-        let origins: Vec<HeaderValue> = origins
-            .iter()
-            .filter_map(|o| HeaderValue::from_str(o).ok())
-            .collect();
-        CorsLayer::new()
-            .allow_origin(origins)
-            .allow_methods(Any)
-            .allow_headers(Any)
-            .max_age(max_age)
+        // Issue #2423: Check for special "reflect" origin that mirrors
+        // the request origin. This allows credentials while being flexible.
+        let has_reflect = origins.iter().any(|o| o == "reflect" || o == "mirror");
+
+        if has_reflect {
+            // When "reflect" is specified, we allow credentials and will
+            // mirror the request's Origin header. This requires using
+            // allow_origin with a function that returns the request origin.
+            CorsLayer::new()
+                .allow_origin(tower_http::cors::AllowOrigin::mirror_request())
+                .allow_methods(Any)
+                .allow_headers(Any)
+                .allow_credentials(true)
+                .max_age(max_age)
+        } else {
+            // With specific origins, we can safely support credentials
+            let origins: Vec<HeaderValue> = origins
+                .iter()
+                .filter(|o| *o != "*") // Filter out wildcards when specific origins exist
+                .filter_map(|o| HeaderValue::from_str(o).ok())
+                .collect();
+
+            if origins.is_empty() {
+                // All origins were wildcards, fall back to permissive (no credentials)
+                CorsLayer::permissive().max_age(max_age)
+            } else {
+                CorsLayer::new()
+                    .allow_origin(origins)
+                    .allow_methods(Any)
+                    .allow_headers(Any)
+                    .allow_credentials(true) // Safe to enable with specific origins
+                    .max_age(max_age)
+            }
+        }
     }
 }
 

diff --git a/cortex-app-server/src/session_manager.rs b/cortex-app-server/src/session_manager.rs
@@ -718,6 +718,9 @@ async fn forward_events(
 
 /// Forward events from CLI session to a shared WebSocket sender.
 /// The shared sender can be updated when clients reconnect.
+///
+/// Issue #2426: Now tracks consecutive send failures and signals cancellation
+/// when a client disconnects mid-stream to prevent wasting API tokens.
 async fn forward_events_shared(
     event_rx: async_channel::Receiver<Event>,
     ws_tx: Arc<tokio::sync::RwLock<Option<mpsc::Sender<WsMessage>>>>,
@@ -728,6 +731,12 @@ async fn forward_events_shared(
     let mut current_message = String::new();
     let mut current_tool_calls: Vec<StoredToolCall> = Vec::new();
 
+    // Issue #2426: Track consecutive send failures to detect disconnected clients
+    // If we fail to send more than N consecutive messages without a reconnect,
+    // we should log a warning about potential token waste.
+    let mut consecutive_failures = 0u32;
+    const MAX_CONSECUTIVE_FAILURES: u32 = 5;
+
     while let Ok(event) = event_rx.recv().await {
         // Track message content for storage
         match &event.msg {
@@ -783,11 +792,42 @@ async fn forward_events_shared(
             let sender_guard = ws_tx.read().await;
             if let Some(sender) = sender_guard.as_ref() {
                 if sender.send(msg).await.is_err() {
-                    debug!(session_id = %sid, "WebSocket sender closed, waiting for reconnection");
+                    consecutive_failures += 1;
+
+                    // Issue #2426: Warn about potential token waste on persistent disconnect
+                    if consecutive_failures == MAX_CONSECUTIVE_FAILURES {
+                        warn!(
+                            session_id = %sid,
+                            consecutive_failures = consecutive_failures,
+                            "WebSocket client appears to be disconnected. \
+                             Generation continues but output is being discarded. \
+                             Consider implementing client-side cancellation to save API tokens."
+                        );
+                    }
+
+                    debug!(
+                        session_id = %sid,
+                        consecutive_failures = consecutive_failures,
+                        "WebSocket sender closed, waiting for reconnection"
+                    );
                     // Don't break - the client might reconnect
+                } else {
+                    // Reset failure counter on successful send
+                    consecutive_failures = 0;
                 }
             } else {
-                debug!(session_id = %sid, "No active WebSocket connection, event buffered");
+                consecutive_failures += 1;
+
+                if consecutive_failures == MAX_CONSECUTIVE_FAILURES {
+                    warn!(
+                        session_id = %sid,
+                        "No active WebSocket connection for {} consecutive events. \
+                         API tokens may be wasted on discarded output.",
+                        consecutive_failures
+                    );
+                }
+
+                debug!(session_id = %sid, "No active WebSocket connection, event discarded");
             }
         }
     }

diff --git a/cortex-cli/src/agent_cmd.rs b/cortex-cli/src/agent_cmd.rs
@@ -83,6 +83,11 @@ pub struct ShowArgs {
     /// Output as JSON.
     #[arg(long)]
     pub json: bool,
+
+    /// Output only definition properties (excludes runtime data like last_used, usage_count).
+    /// Use this flag when exporting an agent for re-import.
+    #[arg(long, alias = "export")]
+    pub definition_only: bool,
 }
 
 /// Arguments for create command.
@@ -1051,6 +1056,24 @@ async fn run_show(args: ShowArgs) -> Result<()> {
                     serde_json::Value::String("builtin".to_string()),
                 );
             }
+
+            // Issue #2417: Remove runtime properties when --definition-only is set
+            // This allows the output to be directly re-imported without errors
+            if args.definition_only {
+                // Remove runtime properties that shouldn't be in agent definitions
+                let runtime_properties = [
+                    "last_used",
+                    "usage_count",
+                    "created_at",
+                    "updated_at",
+                    "native",
+                    "source",
+                    "path",
+                ];
+                for prop in runtime_properties {
+                    map.remove(prop);
+                }
+            }
         }
         let json = serde_json::to_string_pretty(&json_value)?;
         println!("{json}");

diff --git a/cortex-cli/src/debug_cmd.rs b/cortex-cli/src/debug_cmd.rs
@@ -26,6 +26,11 @@ use cortex_protocol::ConversationId;
 /// Debug CLI for Cortex.
 #[derive(Debug, Parser)]
 pub struct DebugCli {
+    /// Suppress non-essential output (quiet mode).
+    /// Issue #2419: Debug commands now respect the --quiet flag.
+    #[arg(long, short = 'q', global = true)]
+    pub quiet: bool,
+
     #[command(subcommand)]
     pub subcommand: DebugSubcommand,
 }
@@ -2537,17 +2542,69 @@ fn get_cortex_home() -> PathBuf {
 
 impl DebugCli {
     /// Run the debug command.
+    ///
+    /// Issue #2419: Debug commands now respect the --quiet flag.
+    /// When quiet mode is enabled, only essential output (JSON, errors) is shown.
     pub async fn run(self) -> Result<()> {
+        // Pass quiet flag to subcommands that support it
+        let quiet = self.quiet;
+
         match self.subcommand {
-            DebugSubcommand::Config(args) => run_config(args).await,
-            DebugSubcommand::File(args) => run_file(args).await,
-            DebugSubcommand::Lsp(args) => run_lsp(args).await,
-            DebugSubcommand::Ripgrep(args) => run_ripgrep(args).await,
-            DebugSubcommand::Skill(args) => run_skill(args).await,
-            DebugSubcommand::Snapshot(args) => run_snapshot(args).await,
-            DebugSubcommand::Paths(args) => run_paths(args).await,
-            DebugSubcommand::System(args) => run_system(args).await,
-            DebugSubcommand::Wait(args) => run_wait(args).await,
+            DebugSubcommand::Config(mut args) => {
+                // In quiet mode, default to JSON output for machine parsing
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_config(args).await
+            }
+            DebugSubcommand::File(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_file(args).await
+            }
+            DebugSubcommand::Lsp(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_lsp(args).await
+            }
+            DebugSubcommand::Ripgrep(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_ripgrep(args).await
+            }
+            DebugSubcommand::Skill(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_skill(args).await
+            }
+            DebugSubcommand::Snapshot(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_snapshot(args).await
+            }
+            DebugSubcommand::Paths(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_paths(args).await
+            }
+            DebugSubcommand::System(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_system(args).await
+            }
+            DebugSubcommand::Wait(mut args) => {
+                if quiet && !args.json {
+                    args.json = true;
+                }
+                run_wait(args).await
+            }
         }
     }
 }

diff --git a/cortex-engine/src/context/file_context.rs b/cortex-engine/src/context/file_context.rs
@@ -378,7 +378,13 @@ impl FileContextCollection {
     }
 
     /// Trim to fit within max tokens.
-    pub fn trim_to_fit(&mut self) {
+    ///
+    /// Issue #2422: Now logs a warning when files are dropped due to token limits.
+    /// Returns the number of files that were dropped.
+    pub fn trim_to_fit(&mut self) -> usize {
+        let original_count = self.contexts.len();
+        let mut dropped_files: Vec<String> = Vec::new();
+
         while self.token_count() > self.max_tokens && !self.contexts.is_empty() {
             // Remove lowest relevance file
             let lowest = self
@@ -392,9 +398,25 @@ impl FileContextCollection {
                 .map(|(k, _)| k.clone());
 
             if let Some(path) = lowest {
+                dropped_files.push(path.display().to_string());
                 self.contexts.remove(&path);
             }
         }
+
+        // Issue #2422: Log warning when files are dropped
+        let dropped_count = dropped_files.len();
+        if dropped_count > 0 {
+            tracing::warn!(
+                "Context truncated: {} of {} files dropped to fit {} token limit. \
+                 Dropped files: {}",
+                dropped_count,
+                original_count,
+                self.max_tokens,
+                dropped_files.join(", ")
+            );
+        }
+
+        dropped_count
     }
 
     /// Format all contexts for prompt.

diff --git a/cortex-engine/src/input.rs b/cortex-engine/src/input.rs
@@ -4,7 +4,7 @@
 //! readline, history, and completion.
 
 use std::collections::VecDeque;
-use std::io::{self, BufRead, Write};
+use std::io::{self, BufRead, IsTerminal, Write};
 
 use serde::{Deserialize, Serialize};
 
@@ -293,7 +293,18 @@ impl<R: BufRead, W: Write> InputReader<R, W> {
     }
 
     /// Read confirmation.
+    ///
+    /// Issue #2413: Only accepts input when stdin is connected to an active TTY.
+    /// This prevents confirmation prompts from capturing input when the terminal
+    /// is unfocused or running in background (e.g., in tmux/screen).
     fn read_confirm(&mut self, config: &InputConfig) -> io::Result<InputResult> {
+        // Check if stdin is connected to a TTY before prompting for confirmation
+        // This prevents capturing input from unfocused terminals (#2413)
+        if !std::io::stdin().is_terminal() {
+            // If not a TTY, default to "no" for safety
+            return Ok(InputResult::new("", "no"));
+        }
+
         write!(self.writer, "{} [y/N]: ", config.prompt)?;
         self.writer.flush()?;
 

diff --git a/cortex-engine/src/tools/router.rs b/cortex-engine/src/tools/router.rs
@@ -178,18 +178,38 @@ impl ToolRouter {
     }
 
     /// Execute a tool.
+    ///
+    /// Issue #2420: When a tool is not found, instead of returning an error that
+    /// might cause infinite retry loops, we return an error ToolResult that the
+    /// model can understand and react to appropriately.
     pub async fn execute(
         &self,
         tool_name: &str,
         arguments: Value,
         context: &ToolContext,
     ) -> Result<ToolResult> {
-        let handler = self
-            .handlers
-            .get(tool_name)
-            .ok_or_else(|| CortexError::UnknownTool {
-                name: tool_name.to_string(),
-            })?;
+        // Issue #2420: Return an error result instead of causing a retry loop
+        // when the model calls a non-existent tool
+        let handler = match self.handlers.get(tool_name) {
+            Some(h) => h,
+            None => {
+                // Get list of available tools to help the model
+                let available_tools: Vec<&String> = self.handlers.keys().take(10).collect();
+                let error_message = format!(
+                    "Error: Tool '{}' does not exist. Available tools include: {}. \
+                     Please use only tools that are available.",
+                    tool_name,
+                    available_tools
+                        .iter()
+                        .map(|s| s.as_str())
+                        .collect::<Vec<_>>()
+                        .join(", ")
+                );
+
+                // Return as error result so model can understand and adjust
+                return Ok(ToolResult::error(error_message));
+            }
+        };
 
         handler.execute(arguments, context).await
     }