From d0b4c161b348a9df9a880b9f9d5af5695aadc5a5 Mon Sep 17 00:00:00 2001 From: Bounty Bot Date: Tue, 27 Jan 2026 21:11:35 +0000 Subject: [PATCH] fix: batch fixes for issues #2906, 2909, 2911, 2913, 2915, 2919, 2921, 2924, 2927, 2928 [skip ci] - #2906: Add unified --mode filter flag to agent list command - #2909: Validate MCP server environment variables for dangerous patterns - #2911: Normalize file paths in debug file command to remove trailing dots - #2913: Return explicit error when --attach URL format is invalid - #2915: Add --quiet flag to login command for CI/CD scripting - #2919: Add JSON metadata output format for scrape command - #2921: Add --dry-run flag to mcp add command - #2924: Add capability filters (--vision, --tools, --json-mode) to models list - #2927: Include full error chain and OS error codes in MCP debug errors - #2928: Add agent test command for configuration validation --- cortex-cli/src/agent_cmd.rs | 260 ++++++++++++++++++++++++- cortex-cli/src/debug_cmd.rs | 42 ++++ cortex-cli/src/login.rs | 38 ++-- cortex-cli/src/main.rs | 17 +- cortex-cli/src/mcp_cmd.rs | 113 ++++++++++- cortex-cli/src/models_cmd.rs | 44 ++++- cortex-cli/src/run_cmd.rs | 39 ++-- cortex-cli/src/scrape_cmd.rs | 154 ++++++++++++++- cortex-gui/src-tauri/src/extensions.rs | 13 +- cortex-gui/src-tauri/src/fs.rs | 18 +- cortex-gui/src-tauri/src/remote.rs | 4 +- 11 files changed, 677 insertions(+), 65 deletions(-) diff --git a/cortex-cli/src/agent_cmd.rs b/cortex-cli/src/agent_cmd.rs index f41b2671..a6c06cb5 100644 --- a/cortex-cli/src/agent_cmd.rs +++ b/cortex-cli/src/agent_cmd.rs @@ -39,6 +39,9 @@ pub enum AgentSubcommand { /// Install an agent from the registry. Install(InstallArgs), + + /// Test and validate an agent configuration without running it (#2928). + Test(TestArgs), } /// Arguments for list command. @@ -56,6 +59,12 @@ pub struct ListArgs { #[arg(long)] pub subagents: bool, + /// Filter by agent mode: primary, subagent, or all. + /// Can specify multiple modes separated by commas (e.g., --mode primary,subagent). + /// This is a unified alternative to --primary and --subagents flags. + #[arg(long, value_delimiter = ',')] + pub mode: Vec, + /// Show all agents including hidden ones. #[arg(long)] pub all: bool, @@ -165,6 +174,17 @@ pub struct InstallArgs { pub registry: Option, } +/// Arguments for test command (#2928). +#[derive(Debug, Parser)] +pub struct TestArgs { + /// Name of the agent to test. + pub name: String, + + /// Output as JSON. + #[arg(long)] + pub json: bool, +} + /// Agent operation mode. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(rename_all = "lowercase")] @@ -336,6 +356,7 @@ impl AgentCli { AgentSubcommand::Edit(args) => run_edit(args).await, AgentSubcommand::Remove(args) => run_remove(args).await, AgentSubcommand::Install(args) => run_install(args).await, + AgentSubcommand::Test(args) => run_test(args).await, } } } @@ -900,6 +921,22 @@ async fn run_list(args: ListArgs) -> Result<()> { let agents = load_all_agents()?; + // Parse --mode filter values into agent modes + let mode_filters: Vec = args + .mode + .iter() + .filter_map(|m| m.parse::().ok()) + .collect(); + + // Determine effective mode filters from both --mode flag and legacy --primary/--subagents flags + let filter_primary = + args.primary || mode_filters.iter().any(|m| matches!(m, AgentMode::Primary)); + let filter_subagents = args.subagents + || mode_filters + .iter() + .any(|m| matches!(m, AgentMode::Subagent)); + let filter_all_modes = mode_filters.iter().any(|m| matches!(m, AgentMode::All)); + // Filter agents let mut filtered: Vec<_> = agents .iter() @@ -908,12 +945,23 @@ async fn run_list(args: ListArgs) -> Result<()> { if !args.all && a.hidden { return false; } - // Filter by mode - if args.primary && !matches!(a.mode, AgentMode::Primary | AgentMode::All) { - return false; - } - if args.subagents && !matches!(a.mode, AgentMode::Subagent | AgentMode::All) { - return false; + // Filter by mode (using unified logic for --mode flag and legacy --primary/--subagents) + // If filter_all_modes is set, show all modes + // If both filter_primary and filter_subagents are set, show all + // If neither is set and no mode filters, show all + let show_all_modes = filter_all_modes + || (filter_primary && filter_subagents) + || (!filter_primary && !filter_subagents && mode_filters.is_empty()); + if !show_all_modes { + if filter_primary && !matches!(a.mode, AgentMode::Primary | AgentMode::All) { + return false; + } + if filter_subagents + && !filter_primary + && !matches!(a.mode, AgentMode::Subagent | AgentMode::All) + { + return false; + } } // Filter by pattern if let Some(ref pattern) = args.filter { @@ -1755,6 +1803,206 @@ async fn run_install(args: InstallArgs) -> Result<()> { Ok(()) } +/// Test and validate an agent configuration without running it (#2928). +async fn run_test(args: TestArgs) -> Result<()> { + let agents = load_all_agents()?; + + let agent = agents + .iter() + .find(|a| a.name == args.name) + .ok_or_else(|| anyhow::anyhow!("Agent '{}' not found", args.name))?; + + // Validation result structure + let mut errors: Vec = Vec::new(); + let mut warnings: Vec = Vec::new(); + + // Check for valid name + if agent.name.trim().is_empty() { + errors.push("Agent name is empty".to_string()); + } + + // Check for valid mode + // (mode is always valid if it was parsed, so we check consistency) + if agent.native && matches!(agent.mode, AgentMode::Subagent) { + warnings.push("Built-in agents as subagents cannot be modified".to_string()); + } + + // Check model reference if specified + if let Some(ref model) = agent.model { + if model.trim().is_empty() { + errors.push("Model name is specified but empty".to_string()); + } + // Validate model format + if !model.contains('/') + && !model + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' || c == ':') + { + warnings.push(format!( + "Model '{}' may not be in the expected format (provider/model or alias)", + model + )); + } + } + + // Check temperature if specified + if let Some(temp) = agent.temperature { + if !(0.0..=2.0).contains(&temp) { + errors.push(format!( + "Temperature {} is outside valid range (0.0-2.0)", + temp + )); + } + } + + // Check top_p if specified + if let Some(top_p) = agent.top_p { + if !(0.0..=1.0).contains(&top_p) { + errors.push(format!("Top-p {} is outside valid range (0.0-1.0)", top_p)); + } + } + + // Check for invalid tool references + let known_tools = [ + "Read", + "Create", + "Edit", + "MultiEdit", + "LS", + "Grep", + "Glob", + "Execute", + "FetchUrl", + "WebSearch", + "TodoWrite", + "TodoRead", + "Task", + "ApplyPatch", + "CodeSearch", + "ViewImage", + "LspDiagnostics", + "LspHover", + "LspSymbols", + ]; + + if let Some(ref allowed) = agent.allowed_tools { + for tool in allowed { + let tool_lower = tool.to_lowercase(); + if !known_tools.iter().any(|t| t.to_lowercase() == tool_lower) { + warnings.push(format!("Unknown tool in allowed_tools: '{}'", tool)); + } + } + } + + for tool in &agent.denied_tools { + let tool_lower = tool.to_lowercase(); + if !known_tools.iter().any(|t| t.to_lowercase() == tool_lower) { + warnings.push(format!("Unknown tool in denied_tools: '{}'", tool)); + } + } + + // Check for conflicting tool settings + if let Some(ref allowed) = agent.allowed_tools { + for tool in &agent.denied_tools { + if allowed + .iter() + .any(|t| t.to_lowercase() == tool.to_lowercase()) + { + errors.push(format!( + "Tool '{}' is in both allowed_tools and denied_tools", + tool + )); + } + } + } + + // Check prompt is not too short (potential misconfiguration) + if let Some(ref prompt) = agent.prompt { + if prompt.trim().len() < 10 && !agent.native { + warnings.push("System prompt is very short (less than 10 characters)".to_string()); + } + } + + // Check max_turns + if let Some(turns) = agent.max_turns { + if turns == 0 { + errors.push("max_turns is set to 0, agent cannot complete any turns".to_string()); + } else if turns > 1000 { + warnings.push(format!( + "max_turns is very high ({}), consider a lower limit", + turns + )); + } + } + + // Build result + let is_valid = errors.is_empty(); + + if args.json { + let result = serde_json::json!({ + "name": agent.name, + "valid": is_valid, + "errors": errors, + "warnings": warnings, + "source": agent.source.to_string(), + "path": agent.path.as_ref().map(|p| p.display().to_string()), + }); + println!("{}", serde_json::to_string_pretty(&result)?); + } else { + println!("Agent Test: {}", agent.name); + println!("{}", "=".repeat(50)); + println!(); + + if is_valid { + println!("Status: VALID"); + } else { + println!("Status: INVALID"); + } + println!("Source: {}", agent.source); + if let Some(ref path) = agent.path { + println!("Path: {}", path.display()); + } + + if !errors.is_empty() { + println!(); + println!("Errors:"); + for error in &errors { + println!(" {} {}", '\u{2717}', error); // X mark + } + } + + if !warnings.is_empty() { + println!(); + println!("Warnings:"); + for warning in &warnings { + println!(" {} {}", '\u{26A0}', warning); // Warning sign + } + } + + if errors.is_empty() && warnings.is_empty() { + println!(); + println!("No issues found. Agent configuration is valid."); + } + + println!(); + if is_valid { + println!( + "Agent '{}' can be used with 'cortex -a {}'", + agent.name, agent.name + ); + } else { + println!("Fix the errors above before using this agent."); + } + } + + // Return error if validation failed + if !is_valid { + bail!("Agent validation failed with {} error(s)", errors.len()); + } + + Ok(()) +} + /// Generate agent using AI. async fn run_generate(args: CreateArgs) -> Result<()> { use cortex_engine::agent::{AgentGenerator, GeneratedAgent}; diff --git a/cortex-cli/src/debug_cmd.rs b/cortex-cli/src/debug_cmd.rs index 06c3ac4d..d5cf82cc 100644 --- a/cortex-cli/src/debug_cmd.rs +++ b/cortex-cli/src/debug_cmd.rs @@ -463,6 +463,10 @@ async fn run_file(args: FileArgs) -> Result<()> { std::env::current_dir()?.join(&args.path) }; + // Normalize path to remove "." and ".." components (#2911) + // This prevents displaying paths like "C:\Users\Leo\." when user runs "cortex debug file ." + let path = normalize_path(&path); + let exists = path.exists(); // Detect special file types using stat() BEFORE attempting any reads @@ -857,6 +861,44 @@ fn detect_encoding_and_binary(path: &PathBuf) -> (Option, Option) (encoding, is_binary) } +/// Normalize a path by removing "." and ".." components. +/// This is similar to std::fs::canonicalize but doesn't require the path to exist +/// and doesn't resolve symlinks. +fn normalize_path(path: &std::path::Path) -> PathBuf { + use std::path::Component; + + let mut components = Vec::new(); + + for component in path.components() { + match component { + Component::CurDir => { + // Skip "." components + } + Component::ParentDir => { + // Go up one directory if possible + if !components.is_empty() + && !matches!(components.last(), Some(Component::ParentDir)) + { + components.pop(); + } else { + components.push(component); + } + } + _ => { + components.push(component); + } + } + } + + // Reconstruct the path + if components.is_empty() { + // If all components were removed (e.g., for "."), return the current directory + PathBuf::from(".") + } else { + components.iter().collect() + } +} + /// Format file size in human-readable format. fn format_size(bytes: u64) -> String { const KB: u64 = 1024; diff --git a/cortex-cli/src/login.rs b/cortex-cli/src/login.rs index d1313629..39f7885d 100644 --- a/cortex-cli/src/login.rs +++ b/cortex-cli/src/login.rs @@ -42,7 +42,11 @@ fn get_cortex_home() -> PathBuf { } /// Run login with API key. -pub async fn run_login_with_api_key(config_overrides: CliConfigOverrides, api_key: String) -> ! { +pub async fn run_login_with_api_key( + config_overrides: CliConfigOverrides, + api_key: String, + quiet: bool, +) -> ! { check_duplicate_config_overrides(&config_overrides); let cortex_home = get_cortex_home(); @@ -52,16 +56,22 @@ pub async fn run_login_with_api_key(config_overrides: CliConfigOverrides, api_ke // Use save_auth_with_fallback for automatic keyring -> encrypted file fallback match save_auth_with_fallback(&cortex_home, &data) { Ok(mode) => { - match mode { - CredentialsStoreMode::Keyring => { - print_success("Logged in successfully. Credentials stored in system keyring."); - } - CredentialsStoreMode::EncryptedFile => { - print_success("Logged in successfully. Credentials stored in encrypted file."); - print_dim("System keyring unavailable, using encrypted file storage."); - } - CredentialsStoreMode::File => { - print_success("Logged in successfully (legacy storage)."); + if !quiet { + match mode { + CredentialsStoreMode::Keyring => { + print_success( + "Logged in successfully. Credentials stored in system keyring.", + ); + } + CredentialsStoreMode::EncryptedFile => { + print_success( + "Logged in successfully. Credentials stored in encrypted file.", + ); + print_dim("System keyring unavailable, using encrypted file storage."); + } + CredentialsStoreMode::File => { + print_success("Logged in successfully (legacy storage)."); + } } } std::process::exit(0); @@ -202,7 +212,7 @@ pub async fn run_logout(config_overrides: CliConfigOverrides, skip_confirmation: } /// Read API key from stdin. -pub fn read_api_key_from_stdin() -> String { +pub fn read_api_key_from_stdin(quiet: bool) -> String { let mut stdin = std::io::stdin(); if stdin.is_terminal() { @@ -213,7 +223,9 @@ pub fn read_api_key_from_stdin() -> String { std::process::exit(1); } - print_info("Reading API key from stdin..."); + if !quiet { + print_info("Reading API key from stdin..."); + } let mut buffer = String::new(); if let Err(err) = stdin.read_to_string(&mut buffer) { diff --git a/cortex-cli/src/main.rs b/cortex-cli/src/main.rs index 28edb7b4..75ae4a59 100644 --- a/cortex-cli/src/main.rs +++ b/cortex-cli/src/main.rs @@ -536,6 +536,12 @@ struct LoginCommand { #[arg(long = "sso")] use_sso: bool, + /// Quiet mode - suppress all output on success. + /// Only errors will be printed. Exit code 0 indicates success. + /// Useful for scripting and CI/CD automation. + #[arg(short = 'q', long = "quiet")] + quiet: bool, + /// Override the OAuth issuer base URL (advanced) #[arg(long = "experimental_issuer", value_name = "URL", hide = true)] issuer_base_url: Option, @@ -939,12 +945,15 @@ async fn main() -> Result<()> { run_login_status(login_cli.config_overrides).await; } None => { + let quiet = login_cli.quiet; if let Some(token) = login_cli.token { // Direct token authentication for CI/CD - run_login_with_api_key(login_cli.config_overrides, token).await; + run_login_with_api_key(login_cli.config_overrides, token, quiet).await; } else if login_cli.use_sso { // Enterprise SSO authentication (uses device code flow with SSO issuer) - eprintln!("Starting enterprise SSO authentication..."); + if !quiet { + eprintln!("Starting enterprise SSO authentication..."); + } run_login_with_device_code( login_cli.config_overrides, login_cli.issuer_base_url, @@ -959,8 +968,8 @@ async fn main() -> Result<()> { ) .await; } else if login_cli.with_api_key { - let api_key = read_api_key_from_stdin(); - run_login_with_api_key(login_cli.config_overrides, api_key).await; + let api_key = read_api_key_from_stdin(quiet); + run_login_with_api_key(login_cli.config_overrides, api_key, quiet).await; } else { // Default: try device code auth run_login_with_device_code( diff --git a/cortex-cli/src/mcp_cmd.rs b/cortex-cli/src/mcp_cmd.rs index 4a8c666d..e3565fe3 100644 --- a/cortex-cli/src/mcp_cmd.rs +++ b/cortex-cli/src/mcp_cmd.rs @@ -206,6 +206,69 @@ fn validate_env_var_name(name: &str) -> Result<()> { Ok(()) } +/// Sensitive environment variable names that could enable security exploits. +/// These variables control how programs load code or find executables. +const SENSITIVE_ENV_VAR_NAMES: &[&str] = &[ + "PATH", + "LD_PRELOAD", + "LD_LIBRARY_PATH", + "DYLD_INSERT_LIBRARIES", + "DYLD_LIBRARY_PATH", + "PYTHONPATH", + "NODE_PATH", + "RUBYLIB", + "PERL5LIB", + "CLASSPATH", +]; + +/// Patterns in environment variable values that might indicate command injection attempts. +const DANGEROUS_VALUE_PATTERNS: &[&str] = &[ + ";", // Command separator + "&&", // Command chaining + "||", // Command chaining + "|", // Pipe + "`", // Command substitution + "$(", // Command substitution + "$((", // Arithmetic expansion + "\n", // Newline (potential injection) + "\r", // Carriage return + "cmd /c", // Windows command execution + "cmd.exe", // Windows command execution + "/bin/sh", // Shell execution + "/bin/bash", // Shell execution +]; + +/// Check if an environment variable value contains potentially dangerous patterns +/// and return a warning message if so. +fn check_dangerous_env_value(name: &str, value: &str) -> Option { + let name_upper = name.to_uppercase(); + let value_lower = value.to_lowercase(); + + // Check if this is a sensitive variable name + let is_sensitive_name = SENSITIVE_ENV_VAR_NAMES.iter().any(|s| name_upper == *s); + + // Check for dangerous patterns in the value + let dangerous_pattern = DANGEROUS_VALUE_PATTERNS + .iter() + .find(|p| value_lower.contains(&p.to_lowercase()) || value.contains(*p)); + + if is_sensitive_name { + Some(format!( + "Warning: Setting '{}' is a security-sensitive environment variable. \ + Modifying it could affect how MCP servers load code or find executables.", + name + )) + } else if let Some(pattern) = dangerous_pattern { + Some(format!( + "Warning: Environment variable '{}' contains potentially dangerous pattern '{}'. \ + This could enable command injection if the MCP server uses this value unsafely.", + name, pattern + )) + } else { + None + } +} + /// Validates environment variable value. fn validate_env_var_value(value: &str) -> Result<()> { if value.len() > MAX_ENV_VAR_VALUE_LENGTH { @@ -344,6 +407,11 @@ pub struct AddArgs { #[arg(long, short = 'f')] pub force: bool, + /// Preview what would be added without making changes. + /// Shows the configuration that would be written to config.toml. + #[arg(long)] + pub dry_run: bool, + #[command(flatten)] pub transport_args: AddMcpTransportArgs, } @@ -741,6 +809,7 @@ async fn run_add(args: AddArgs) -> Result<()> { let AddArgs { name, force, + dry_run, transport_args, } = args; @@ -827,9 +896,18 @@ async fn run_add(args: AddArgs) -> Result<()> { MAX_ENV_VARS ); } + let mut security_warnings = Vec::new(); for (key, value) in &stdio.env { validate_env_var_name(key)?; validate_env_var_value(value)?; + // Check for potentially dangerous environment variable values + if let Some(warning) = check_dangerous_env_value(key, value) { + security_warnings.push(warning); + } + } + // Print security warnings before proceeding + for warning in &security_warnings { + eprintln!("⚠️ {}", warning); } let mut command_parts = stdio.command.into_iter(); @@ -962,6 +1040,19 @@ url = "{url_escaped}" _ => bail!("exactly one of --command or --url must be provided"), }; + // Handle dry-run mode (#2921) + if dry_run { + println!("Dry-run mode - no changes will be made\n"); + println!( + "Would add the following configuration to {}:\n", + config_path.display() + ); + println!("{}", transport_toml.trim()); + println!(); + print!("[Preview] {}", success_msg); + return Ok(()); + } + // Append to config config_content.push_str(&transport_toml); @@ -1740,14 +1831,32 @@ async fn test_stdio_connection( _args.split_whitespace().collect() }; - // Spawn the process + // Spawn the process with full error chain (#2927) let mut child = Command::new(command) .args(&args_vec) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::piped()) .spawn() - .with_context(|| format!("Failed to spawn command: {}", command))?; + .map_err(|e| { + // Include the full error chain including OS error code + let mut error_msg = format!("Failed to spawn command: {}", command); + error_msg.push_str(&format!("\n Reason: {}", e)); + if let Some(os_error) = e.raw_os_error() { + error_msg.push_str(&format!(" (OS error code: {})", os_error)); + } + // Provide helpful hints based on error kind + match e.kind() { + std::io::ErrorKind::NotFound => { + error_msg.push_str("\n Hint: The command was not found in PATH. Check that the executable exists and is installed."); + } + std::io::ErrorKind::PermissionDenied => { + error_msg.push_str("\n Hint: Permission denied. Check that the file is executable and you have permission to run it."); + } + _ => {} + } + anyhow::anyhow!(error_msg) + })?; let mut stdin = child .stdin diff --git a/cortex-cli/src/models_cmd.rs b/cortex-cli/src/models_cmd.rs index 9f5b43f8..196d30b2 100644 --- a/cortex-cli/src/models_cmd.rs +++ b/cortex-cli/src/models_cmd.rs @@ -82,6 +82,18 @@ pub struct ListModelsArgs { /// Show full model IDs without truncation (Issue #1991) #[arg(long)] pub full: bool, + + /// Filter to models with vision capability (#2924) + #[arg(long)] + pub vision: bool, + + /// Filter to models with tool use capability (#2924) + #[arg(long)] + pub tools: bool, + + /// Filter to models with JSON mode capability (#2924) + #[arg(long)] + pub json_mode: bool, } /// Model information for display. @@ -124,12 +136,26 @@ impl ModelsCli { args.offset, &args.sort, args.full, + args.vision, + args.tools, + args.json_mode, ) .await } None => { // Default: list models with optional provider filter (no pagination) - run_list(self.provider, self.json, None, 0, "id", false).await + run_list( + self.provider, + self.json, + None, + 0, + "id", + false, + false, + false, + false, + ) + .await } } } @@ -450,6 +476,9 @@ async fn run_list( offset: usize, sort_by: &str, show_full: bool, + filter_vision: bool, + filter_tools: bool, + filter_json_mode: bool, ) -> Result<()> { let mut models = get_available_models(); @@ -471,7 +500,7 @@ async fn run_list( } // Filter by provider if specified - let filtered: Vec<_> = if let Some(ref provider) = provider_filter { + let mut filtered: Vec<_> = if let Some(ref provider) = provider_filter { let provider_lower = provider.to_lowercase(); models .into_iter() @@ -481,6 +510,17 @@ async fn run_list( models }; + // Filter by capabilities (#2924) + if filter_vision { + filtered.retain(|m| m.capabilities.vision); + } + if filter_tools { + filtered.retain(|m| m.capabilities.tools); + } + if filter_json_mode { + filtered.retain(|m| m.capabilities.json_mode); + } + let total_count = filtered.len(); // Apply pagination diff --git a/cortex-cli/src/run_cmd.rs b/cortex-cli/src/run_cmd.rs index cb937759..f0bf0573 100644 --- a/cortex-cli/src/run_cmd.rs +++ b/cortex-cli/src/run_cmd.rs @@ -633,25 +633,36 @@ impl RunCli { /// Run attached to a remote server. async fn run_attached( &self, - _server_url: &str, - message: &str, - attachments: &[FileAttachment], - session_mode: SessionMode, + server_url: &str, + _message: &str, + _attachments: &[FileAttachment], + _session_mode: SessionMode, ) -> Result<()> { - // For now, we'll implement a basic HTTP client approach - // In a full implementation, this would use a proper SDK client + // Validate URL format + if !server_url.starts_with("http://") && !server_url.starts_with("https://") { + bail!( + "Invalid server URL: '{}'. URL must start with http:// or https://", + server_url + ); + } if self.verbose { - eprintln!("Attaching to server: {_server_url}"); - eprintln!("Session mode: {session_mode:?}"); - eprintln!("Message length: {} chars", message.len()); - eprintln!("Attachments: {}", attachments.len()); + eprintln!("Attempting to attach to server: {}", server_url); } - // TODO: Implement full server attachment when cortex SDK client is available - // For now, fall back to local execution with a warning - print_warning("Server attachment not yet fully implemented. Running locally instead."); - self.run_local(message, attachments, session_mode).await + // The --attach flag requires a working server connection. + // Since server attachment is not yet implemented, we fail with a clear error + // instead of silently falling back to local execution (#2913). + bail!( + "Cannot attach to server at '{}': Server attachment is not yet implemented.\n\n\ + The --attach flag is reserved for connecting to a running Cortex server, \ + which is a feature currently under development.\n\n\ + To run locally without server attachment, remove the --attach flag:\n\ + \x20 cortex run \"your message here\"\n\n\ + If you need to run against a specific server, this feature will be available \ + in a future release.", + server_url + ); } /// Determine if streaming output is enabled. diff --git a/cortex-cli/src/scrape_cmd.rs b/cortex-cli/src/scrape_cmd.rs index 95309e58..ebafc84a 100644 --- a/cortex-cli/src/scrape_cmd.rs +++ b/cortex-cli/src/scrape_cmd.rs @@ -23,6 +23,8 @@ pub enum OutputFormat { Text, /// Cleaned HTML. Html, + /// JSON with structured metadata. + Json, } impl std::str::FromStr for OutputFormat { @@ -33,7 +35,8 @@ impl std::str::FromStr for OutputFormat { "markdown" | "md" => Ok(Self::Markdown), "text" | "txt" | "plain" => Ok(Self::Text), "html" => Ok(Self::Html), - _ => bail!("Invalid format: {s}. Use markdown, text, or html"), + "json" => Ok(Self::Json), + _ => bail!("Invalid format: {s}. Use markdown, text, html, or json"), } } } @@ -432,12 +435,161 @@ impl ScrapeCommand { } OutputFormat::Text => html_to_text(&content_html), OutputFormat::Html => clean_html(&content_html, self.no_images, self.no_links), + OutputFormat::Json => { + // Extract structured metadata from the HTML + let metadata = extract_page_metadata(&document, &self.url); + serde_json::to_string_pretty(&metadata) + .map_err(|e| anyhow::anyhow!("Failed to serialize JSON: {e}"))? + } }; Ok(output) } } +/// Metadata extracted from a web page for JSON output format (#2919). +#[derive(Debug, serde::Serialize)] +struct PageMetadata { + /// The original URL that was scraped. + url: String, + /// Page title from tag. + title: Option<String>, + /// Page description from meta description tag. + description: Option<String>, + /// The main text content in markdown format. + content: String, + /// Links found on the page. + links: Vec<LinkInfo>, + /// Images found on the page. + images: Vec<ImageInfo>, + /// Additional metadata from meta tags. + metadata: serde_json::Value, +} + +/// Information about a link on the page. +#[derive(Debug, serde::Serialize)] +struct LinkInfo { + /// Link URL (href attribute). + href: String, + /// Link text content. + text: String, +} + +/// Information about an image on the page. +#[derive(Debug, serde::Serialize)] +struct ImageInfo { + /// Image URL (src attribute). + src: String, + /// Alt text. + alt: Option<String>, +} + +/// Extract structured metadata from HTML for JSON output. +fn extract_page_metadata(document: &Html, url: &str) -> PageMetadata { + // Extract title + let title = Selector::parse("title") + .ok() + .and_then(|sel| document.select(&sel).next()) + .map(|el| el.text().collect::<String>().trim().to_string()); + + // Extract meta description + let description = Selector::parse("meta[name='description']") + .ok() + .and_then(|sel| document.select(&sel).next()) + .and_then(|el| el.value().attr("content")) + .map(|s| s.to_string()); + + // Extract main content as markdown + let content_html = extract_main_content(document); + let content = html_to_markdown(&content_html, false, false); + + // Extract links + let links = Selector::parse("a[href]") + .ok() + .map(|sel| { + document + .select(&sel) + .filter_map(|el| { + el.value().attr("href").map(|href| LinkInfo { + href: decode_html_entities(href).to_string(), + text: el.text().collect::<String>().trim().to_string(), + }) + }) + .filter(|link| !link.href.is_empty() && !link.href.starts_with('#')) + .collect() + }) + .unwrap_or_default(); + + // Extract images + let images = Selector::parse("img[src]") + .ok() + .map(|sel| { + document + .select(&sel) + .filter_map(|el| { + el.value().attr("src").map(|src| ImageInfo { + src: src.to_string(), + alt: el.value().attr("alt").map(|s| s.to_string()), + }) + }) + .filter(|img| !img.src.is_empty()) + .collect() + }) + .unwrap_or_default(); + + // Extract other metadata + let mut meta_map = serde_json::Map::new(); + + // Common meta tags + let meta_tags = [ + ("og:title", "og_title"), + ("og:description", "og_description"), + ("og:image", "og_image"), + ("og:type", "og_type"), + ("twitter:card", "twitter_card"), + ("twitter:title", "twitter_title"), + ("author", "author"), + ("keywords", "keywords"), + ("robots", "robots"), + ]; + + for (name, key) in meta_tags { + if let Some(value) = + Selector::parse(&format!("meta[name='{}'], meta[property='{}']", name, name)) + .ok() + .and_then(|sel| document.select(&sel).next()) + .and_then(|el| el.value().attr("content")) + { + meta_map.insert( + key.to_string(), + serde_json::Value::String(value.to_string()), + ); + } + } + + // Extract canonical URL if present + if let Some(canonical) = Selector::parse("link[rel='canonical']") + .ok() + .and_then(|sel| document.select(&sel).next()) + .and_then(|el| el.value().attr("href")) + { + meta_map.insert( + "canonical".to_string(), + serde_json::Value::String(canonical.to_string()), + ); + } + + PageMetadata { + url: url.to_string(), + title, + description, + content, + links, + images, + metadata: serde_json::Value::Object(meta_map), + } +} + /// Parse custom headers from command line arguments. /// Validates header length and warns about duplicate headers. fn parse_headers(headers: &[String]) -> Result<HashMap<String, String>> { diff --git a/cortex-gui/src-tauri/src/extensions.rs b/cortex-gui/src-tauri/src/extensions.rs index fdf6da4e..a114896c 100755 --- a/cortex-gui/src-tauri/src/extensions.rs +++ b/cortex-gui/src-tauri/src/extensions.rs @@ -1610,12 +1610,8 @@ pub async fn vscode_execute_builtin_command( // This should be handled by the frontend directly Ok(serde_json::Value::Null) } - "workbench.action.files.saveAll" => { - Ok(serde_json::Value::Null) - } - "workbench.action.closeActiveEditor" => { - Ok(serde_json::Value::Null) - } + "workbench.action.files.saveAll" => Ok(serde_json::Value::Null), + "workbench.action.closeActiveEditor" => Ok(serde_json::Value::Null), _ => { tracing::warn!( "VS Code builtin command '{}' is not implemented in Cortex", @@ -1644,10 +1640,7 @@ pub async fn vscode_execute_command( args.len() ); - tracing::warn!( - "Extension command '{}' execution not implemented", - command - ); + tracing::warn!("Extension command '{}' execution not implemented", command); Err(format!( "Extension command '{}' is not available. Extension command execution requires the VS Code extension host which is not yet fully implemented.", command diff --git a/cortex-gui/src-tauri/src/fs.rs b/cortex-gui/src-tauri/src/fs.rs index 258ab68e..2c2e4c01 100755 --- a/cortex-gui/src-tauri/src/fs.rs +++ b/cortex-gui/src-tauri/src/fs.rs @@ -2851,15 +2851,9 @@ pub struct TextEdit { #[tauri::command] pub async fn apply_workspace_edit(uri: String, edits: Vec<TextEdit>) -> Result<(), String> { // Convert file:// URI to path - let file_path = uri - .strip_prefix("file://") - .unwrap_or(&uri); - - tracing::debug!( - "Applying {} workspace edits to {}", - edits.len(), - file_path - ); + let file_path = uri.strip_prefix("file://").unwrap_or(&uri); + + tracing::debug!("Applying {} workspace edits to {}", edits.len(), file_path); // Read the file content let content = tokio::fs::read_to_string(file_path) @@ -2896,9 +2890,9 @@ pub async fn apply_workspace_edit(uri: String, edits: Vec<TextEdit>) -> Result<( if start_line == end_line { // Single line edit - let line = lines.get_mut(start_line).ok_or_else(|| { - format!("Invalid line index: {}", start_line) - })?; + let line = lines + .get_mut(start_line) + .ok_or_else(|| format!("Invalid line index: {}", start_line))?; let safe_start = start_char.min(line.len()); let safe_end = end_char.min(line.len()); diff --git a/cortex-gui/src-tauri/src/remote.rs b/cortex-gui/src-tauri/src/remote.rs index 9d7118b4..786fdbb3 100755 --- a/cortex-gui/src-tauri/src/remote.rs +++ b/cortex-gui/src-tauri/src/remote.rs @@ -1757,7 +1757,9 @@ pub mod commands { /// Load a devcontainer.json configuration file #[tauri::command] - pub async fn devcontainer_load_config(config_path: String) -> Result<serde_json::Value, String> { + pub async fn devcontainer_load_config( + config_path: String, + ) -> Result<serde_json::Value, String> { tracing::warn!( "DevContainer load_config called for '{}' but feature is not implemented", config_path