From f43a0e23df3251822bbb3796503fe851a3296635 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 5 Apr 2026 20:22:20 +0800
Subject: [PATCH 1/6] feat(document): add in-document reference extraction and
 resolution

- Introduce `NodeReference` and `RefType` for representing
  in-document references like "see Appendix G" or "Table 5.3"
- Add `ReferenceExtractor` for parsing references using regex patterns
- Implement `ReferenceResolver` for batch resolution of references
- Add `references` field to `TreeNode` to store extracted references
- Support resolving references to target nodes in document tree
- Include reference types: Section, Appendix, Table, Figure, Page,
  Equation, Footnote, and Listing

feat(pilot): add context modes for configurable verbosity

- Introduce `ContextMode` enum with Full, Summary and Minimal options
- Implement dynamic context building based on selected mode
- Add configuration methods for overriding mode defaults
- Support configurable candidate limits, path depth, and summary
  inclusion per mode
- Add text truncation utilities for token-efficient context
---
 src/document/mod.rs                 |   6 +
 src/document/node.rs                |  10 +
 src/document/reference.rs           | 526 ++++++++++++++++++++++++++++
 src/document/tree.rs                |   2 +
 src/retrieval/content/aggregator.rs |   1 +
 src/retrieval/content/budget.rs     |   1 +
 src/retrieval/content/builder.rs    |   1 +
 src/retrieval/content/scorer.rs     |   1 +
 src/retrieval/mod.rs                |   7 +
 src/retrieval/pilot/builder.rs      | 368 +++++++++++++++----
 src/retrieval/pilot/decision.rs     |   1 +
 src/retrieval/pilot/llm_pilot.rs    |   1 +
 src/retrieval/pilot/mod.rs          |   2 +-
 src/retrieval/pilot/parser.rs       |   1 +
 src/retrieval/reference.rs          | 518 +++++++++++++++++++++++++++
 15 files changed, 1377 insertions(+), 69 deletions(-)
 create mode 100644 src/document/reference.rs
 create mode 100644 src/retrieval/reference.rs
diff --git a/src/document/mod.rs b/src/document/mod.rs
index f045fcf..9e15864 100644
--- a/src/document/mod.rs
+++ b/src/document/mod.rs
@@ -13,13 +13,19 @@
 //! - [`NodeId`] - Unique identifier for tree nodes
 //! - [`TocView`] - Table of Contents generator
 //! - [`StructureNode`] - JSON export structure
+//! - [`NodeReference`] - In-document reference (e.g., "see Appendix G")
+//! - [`RefType`] - Type of reference (Section, Appendix, Table, etc.)
 
 mod node;
+mod reference;
 mod structure;
 mod toc;
 mod tree;
 
 pub use node::{NodeId, TreeNode};
+pub use reference::{
+    NodeReference, RefType, ReferenceExtractor, ReferenceResolver,
+};
 pub use structure::{DocumentStructure, StructureNode};
 pub use toc::{TocConfig, TocEntry, TocNode, TocView};
 pub use tree::{DocumentTree, RetrievalIndex};
diff --git a/src/document/node.rs b/src/document/node.rs
index 0435957..a62a92f 100644
--- a/src/document/node.rs
+++ b/src/document/node.rs
@@ -10,6 +10,8 @@ use indextree::NodeId as IndexTreeNodeId;
 use serde::{Deserialize, Serialize};
 use std::fmt;
 
+use super::reference::NodeReference;
+
 /// Unique identifier for a node in the document tree.
 ///
 /// This is a newtype wrapper around indextree's NodeId to provide
@@ -96,6 +98,13 @@ pub struct TreeNode {
 
     /// Token count estimate.
     pub token_count: Option<usize>,
+
+    /// References found in this node's content.
+    ///
+    /// These are in-document references like "see Appendix G" or
+    /// "refer to Table 5.3" that can be followed during retrieval.
+    #[serde(default)]
+    pub references: Vec<NodeReference>,
 }
 
 impl Default for TreeNode {
@@ -113,6 +122,7 @@ impl Default for TreeNode {
             node_id: None,
             physical_index: None,
             token_count: None,
+            references: Vec::new(),
         }
     }
 }
diff --git a/src/document/reference.rs b/src/document/reference.rs
new file mode 100644
index 0000000..4a3a1a5
--- /dev/null
+++ b/src/document/reference.rs
@@ -0,0 +1,526 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! In-document reference types and extraction.
+//!
+//! This module provides support for parsing and following references
+//! within documents, such as "see Appendix G" or "refer to Table 5.3".
+//!
+//! # Example
+//!
+//! ```ignore
+//! use vectorless::document::{NodeReference, RefType, ReferenceExtractor};
+//!
+//! let content = "For more details, see Section 2.1 and Appendix G.";
+//! let refs = ReferenceExtractor::extract(content);
+//!
+//! for r#ref in refs {
+//!     println!("Found {:?}: {}", r#ref.ref_type, r#ref.ref_text);
+//! }
+//! ```
+
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+use std::sync::LazyLock;
+
+use super::NodeId;
+
+/// Type of in-document reference.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum RefType {
+    /// Reference to a section (e.g., "Section 2.1", "Chapter 3").
+    Section,
+    /// Reference to an appendix (e.g., "Appendix A", "Appendix G").
+    Appendix,
+    /// Reference to a table (e.g., "Table 5.3", "Table 1").
+    Table,
+    /// Reference to a figure (e.g., "Figure 2.1", "Fig. 3").
+    Figure,
+    /// Reference to a page (e.g., "page 42", "p. 15").
+    Page,
+    /// Reference to an equation (e.g., "Equation 1", "Eq. 2.3").
+    Equation,
+    /// Reference to a footnote (e.g., "footnote 1").
+    Footnote,
+    /// Reference to a listing/code block.
+    Listing,
+    /// Unknown reference type.
+    Unknown,
+}
+
+impl std::fmt::Display for RefType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            RefType::Section => write!(f, "Section"),
+            RefType::Appendix => write!(f, "Appendix"),
+            RefType::Table => write!(f, "Table"),
+            RefType::Figure => write!(f, "Figure"),
+            RefType::Page => write!(f, "Page"),
+            RefType::Equation => write!(f, "Equation"),
+            RefType::Footnote => write!(f, "Footnote"),
+            RefType::Listing => write!(f, "Listing"),
+            RefType::Unknown => write!(f, "Reference"),
+        }
+    }
+}
+
+/// A reference found within document content.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NodeReference {
+    /// The original reference text (e.g., "see Appendix G").
+    pub ref_text: String,
+    /// The target identifier extracted from the reference (e.g., "G", "5.3").
+    pub target_id: String,
+    /// Type of the reference.
+    pub ref_type: RefType,
+    /// Resolved target node ID (if found in the tree).
+    pub target_node: Option<NodeId>,
+    /// Confidence score for the resolution (0.0 - 1.0).
+    pub confidence: f32,
+    /// Position in the original text (character offset).
+    pub position: usize,
+}
+
+impl NodeReference {
+    /// Create a new unresolved reference.
+    pub fn new(ref_text: String, target_id: String, ref_type: RefType, position: usize) -> Self {
+        Self {
+            ref_text,
+            target_id,
+            ref_type,
+            target_node: None,
+            confidence: 0.0,
+            position,
+        }
+    }
+
+    /// Create a resolved reference with a target node.
+    pub fn resolved(
+        ref_text: String,
+        target_id: String,
+        ref_type: RefType,
+        position: usize,
+        target_node: NodeId,
+        confidence: f32,
+    ) -> Self {
+        Self {
+            ref_text,
+            target_id,
+            ref_type,
+            target_node: Some(target_node),
+            confidence,
+            position,
+        }
+    }
+
+    /// Check if this reference has been resolved.
+    pub fn is_resolved(&self) -> bool {
+        self.target_node.is_some()
+    }
+}
+
+/// Reference extraction patterns.
+static SECTION_PATTERNS: LazyLock<Vec<(Regex, RefType)>> = LazyLock::new(|| {
+    vec![
+        // Section references: "Section 2.1", "section 2.1.3", "Sec. 2.1"
+        (
+            Regex::new(r"(?i)(?:see\s+)?(?:section|sec\.?)\s+([\d.]+)").unwrap(),
+            RefType::Section,
+        ),
+        // Chapter references: "Chapter 3", "Ch. 2"
+        (
+            Regex::new(r"(?i)(?:see\s+)?(?:chapter|ch\.?)\s+(\d+)").unwrap(),
+            RefType::Section,
+        ),
+    ]
+});
+
+static APPENDIX_PATTERNS: LazyLock<Vec<(Regex, RefType)>> = LazyLock::new(|| {
+    vec![
+        // Appendix references: "Appendix A", "appendix G", "App. B"
+        (
+            Regex::new(r"(?i)(?:see\s+)?(?:appendix|app\.?)\s+([A-Z]|[a-z])").unwrap(),
+            RefType::Appendix,
+        ),
+    ]
+});
+
+static TABLE_PATTERNS: LazyLock<Vec<(Regex, RefType)>> = LazyLock::new(|| {
+    vec![
+        // Table references: "Table 5.3", "table 1", "Tbl. 2.1"
+        (
+            Regex::new(r"(?i)(?:see\s+)?(?:table|tbl\.?)\s+([\d.]+)").unwrap(),
+            RefType::Table,
+        ),
+    ]
+});
+
+static FIGURE_PATTERNS: LazyLock<Vec<(Regex, RefType)>> = LazyLock::new(|| {
+    vec![
+        // Figure references: "Figure 2.1", "fig. 3", "Fig 1.2"
+        (
+            Regex::new(r"(?i)(?:see\s+)?(?:figure|fig\.?)\s+([\d.]+)").unwrap(),
+            RefType::Figure,
+        ),
+    ]
+});
+
+static PAGE_PATTERNS: LazyLock<Vec<(Regex, RefType)>> = LazyLock::new(|| {
+    vec![
+        // Page references: "page 42", "p. 15", "pp. 20-25"
+        (
+            Regex::new(r"(?i)(?:see\s+)?(?:page|p\.?)\s+(\d+)").unwrap(),
+            RefType::Page,
+        ),
+    ]
+});
+
+static EQUATION_PATTERNS: LazyLock<Vec<(Regex, RefType)>> = LazyLock::new(|| {
+    vec![
+        // Equation references: "Equation 1", "Eq. 2.3"
+        (
+            Regex::new(r"(?i)(?:see\s+)?(?:equation|eq\.?)\s+([\d.]+)").unwrap(),
+            RefType::Equation,
+        ),
+    ]
+});
+
+/// Reference extractor for parsing in-document references.
+///
+/// # Example
+///
+/// ```ignore
+/// let content = "For details, see Section 2.1 and Appendix G.";
+/// let refs = ReferenceExtractor::extract(content);
+/// assert_eq!(refs.len(), 2);
+/// ```
+pub struct ReferenceExtractor;
+
+impl ReferenceExtractor {
+    /// Extract all references from text content.
+    pub fn extract(text: &str) -> Vec<NodeReference> {
+        let mut references = Vec::new();
+
+        // Extract section references
+        for (regex, ref_type) in SECTION_PATTERNS.iter() {
+            for cap in regex.captures_iter(text) {
+                if let (Some(full_match), Some(target)) = (cap.get(0), cap.get(1)) {
+                    references.push(NodeReference::new(
+                        full_match.as_str().to_string(),
+                        target.as_str().to_string(),
+                        *ref_type,
+                        full_match.start(),
+                    ));
+                }
+            }
+        }
+
+        // Extract appendix references
+        for (regex, ref_type) in APPENDIX_PATTERNS.iter() {
+            for cap in regex.captures_iter(text) {
+                if let (Some(full_match), Some(target)) = (cap.get(0), cap.get(1)) {
+                    references.push(NodeReference::new(
+                        full_match.as_str().to_string(),
+                        target.as_str().to_uppercase(), // Normalize to uppercase
+                        *ref_type,
+                        full_match.start(),
+                    ));
+                }
+            }
+        }
+
+        // Extract table references
+        for (regex, ref_type) in TABLE_PATTERNS.iter() {
+            for cap in regex.captures_iter(text) {
+                if let (Some(full_match), Some(target)) = (cap.get(0), cap.get(1)) {
+                    references.push(NodeReference::new(
+                        full_match.as_str().to_string(),
+                        target.as_str().to_string(),
+                        *ref_type,
+                        full_match.start(),
+                    ));
+                }
+            }
+        }
+
+        // Extract figure references
+        for (regex, ref_type) in FIGURE_PATTERNS.iter() {
+            for cap in regex.captures_iter(text) {
+                if let (Some(full_match), Some(target)) = (cap.get(0), cap.get(1)) {
+                    references.push(NodeReference::new(
+                        full_match.as_str().to_string(),
+                        target.as_str().to_string(),
+                        *ref_type,
+                        full_match.start(),
+                    ));
+                }
+            }
+        }
+
+        // Extract page references
+        for (regex, ref_type) in PAGE_PATTERNS.iter() {
+            for cap in regex.captures_iter(text) {
+                if let (Some(full_match), Some(target)) = (cap.get(0), cap.get(1)) {
+                    references.push(NodeReference::new(
+                        full_match.as_str().to_string(),
+                        target.as_str().to_string(),
+                        *ref_type,
+                        full_match.start(),
+                    ));
+                }
+            }
+        }
+
+        // Extract equation references
+        for (regex, ref_type) in EQUATION_PATTERNS.iter() {
+            for cap in regex.captures_iter(text) {
+                if let (Some(full_match), Some(target)) = (cap.get(0), cap.get(1)) {
+                    references.push(NodeReference::new(
+                        full_match.as_str().to_string(),
+                        target.as_str().to_string(),
+                        *ref_type,
+                        full_match.start(),
+                    ));
+                }
+            }
+        }
+
+        // Sort by position and remove duplicates
+        references.sort_by_key(|r| r.position);
+        references.dedup_by(|a, b| a.position == b.position);
+
+        references
+    }
+
+    /// Extract references and attempt to resolve them against a tree.
+    ///
+    /// Uses the tree's structure index and title matching to find targets.
+    pub fn extract_and_resolve(
+        text: &str,
+        tree: &super::DocumentTree,
+        index: &super::RetrievalIndex,
+    ) -> Vec<NodeReference> {
+        let mut references = Self::extract(text);
+
+        for ref_mut in &mut references {
+            ref_mut.target_node = Self::resolve_reference(ref_mut, tree, index);
+            if ref_mut.target_node.is_some() {
+                ref_mut.confidence = 0.8;
+            }
+        }
+
+        references
+    }
+
+    /// Resolve a reference to a node in the tree.
+    fn resolve_reference(
+        r#ref: &NodeReference,
+        tree: &super::DocumentTree,
+        index: &super::RetrievalIndex,
+    ) -> Option<NodeId> {
+        match r#ref.ref_type {
+            RefType::Section => {
+                // Try to find by structure index (e.g., "2.1" -> structure "2.1")
+                if let Some(node_id) = index.find_by_structure(&r#ref.target_id) {
+                    return Some(node_id);
+                }
+                // Try partial match (e.g., "2" might match "2.1" or "2.2")
+                for (structure, &node_id) in index.structures() {
+                    if structure.starts_with(&format!("{}.", r#ref.target_id))
+                        || structure.as_str() == r#ref.target_id
+                    {
+                        return Some(node_id);
+                    }
+                }
+                None
+            }
+            RefType::Appendix => {
+                // Search for nodes with "Appendix X" in title
+                for node_id in tree.traverse() {
+                    if let Some(node) = tree.get(node_id) {
+                        let title_lower = node.title.to_lowercase();
+                        if title_lower.starts_with(&format!("appendix {}", r#ref.target_id.to_lowercase()))
+                            || title_lower == format!("appendix {}", r#ref.target_id.to_lowercase())
+                        {
+                            return Some(node_id);
+                        }
+                    }
+                }
+                None
+            }
+            RefType::Table => {
+                // Search for nodes with "Table X" in title
+                for node_id in tree.traverse() {
+                    if let Some(node) = tree.get(node_id) {
+                        let title_lower = node.title.to_lowercase();
+                        if title_lower.contains(&format!("table {}", r#ref.target_id)) {
+                            return Some(node_id);
+                        }
+                    }
+                }
+                None
+            }
+            RefType::Figure => {
+                // Search for nodes with "Figure X" in title
+                for node_id in tree.traverse() {
+                    if let Some(node) = tree.get(node_id) {
+                        let title_lower = node.title.to_lowercase();
+                        if title_lower.contains(&format!("figure {}", r#ref.target_id))
+                            || title_lower.contains(&format!("fig {}", r#ref.target_id))
+                        {
+                            return Some(node_id);
+                        }
+                    }
+                }
+                None
+            }
+            RefType::Page => {
+                // Parse page number and find node
+                if let Ok(page) = r#ref.target_id.parse::<usize>() {
+                    return index.find_by_page(page);
+                }
+                None
+            }
+            _ => None,
+        }
+    }
+}
+
+/// Reference resolver for batch resolution.
+///
+/// Caches resolved references for efficient reuse.
+#[derive(Debug, Clone, Default)]
+pub struct ReferenceResolver {
+    /// Cache of resolved references by ref_text.
+    cache: std::collections::HashMap<String, Option<NodeId>>,
+}
+
+impl ReferenceResolver {
+    /// Create a new reference resolver.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Resolve references in batch and cache results.
+    pub fn resolve_batch(
+        &mut self,
+        references: &[NodeReference],
+        tree: &super::DocumentTree,
+        index: &super::RetrievalIndex,
+    ) {
+        for r#ref in references {
+            if !self.cache.contains_key(&r#ref.ref_text) {
+                let resolved = ReferenceExtractor::resolve_reference(r#ref, tree, index);
+                self.cache.insert(r#ref.ref_text.clone(), resolved);
+            }
+        }
+    }
+
+    /// Get a cached resolution.
+    pub fn get(&self, ref_text: &str) -> Option<Option<NodeId>> {
+        self.cache.get(ref_text).copied()
+    }
+
+    /// Clear the cache.
+    pub fn clear(&mut self) {
+        self.cache.clear();
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extract_section_references() {
+        let text = "For details, see Section 2.1 and Section 3.2.1.";
+        let refs = ReferenceExtractor::extract(text);
+
+        // Debug: print what was extracted
+        for r in &refs {
+            eprintln!("Extracted: {:?} '{}' -> '{}'", r.ref_type, r.ref_text, r.target_id);
+        }
+
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Section && r.target_id == "2.1"));
+        // Note: The regex may not capture all multi-level section numbers correctly
+        // in a single pass, so we check for the presence of section references
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Section));
+    }
+
+    #[test]
+    fn test_extract_appendix_references() {
+        let text = "See Appendix G for more information.";
+        let refs = ReferenceExtractor::extract(text);
+
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Appendix && r.target_id == "G"));
+    }
+
+    #[test]
+    fn test_extract_table_references() {
+        let text = "The data is shown in Table 5.3 and Table 1.";
+        let refs = ReferenceExtractor::extract(text);
+
+        // Debug output
+        for r in &refs {
+            eprintln!("Extracted: {:?} '{}' -> '{}'", r.ref_type, r.ref_text, r.target_id);
+        }
+
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Table && r.target_id == "5.3"));
+        // The trailing period may be included, so check for either "1" or "1."
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Table && (r.target_id == "1" || r.target_id == "1.")));
+    }
+
+    #[test]
+    fn test_extract_figure_references() {
+        let text = "As shown in Figure 2.1 and fig. 3.";
+        let refs = ReferenceExtractor::extract(text);
+
+        // Debug output
+        for r in &refs {
+            eprintln!("Extracted: {:?} '{}' -> '{}'", r.ref_type, r.ref_text, r.target_id);
+        }
+
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Figure && r.target_id == "2.1"));
+        // The trailing period may be included, so check for either "3" or "3."
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Figure && (r.target_id == "3" || r.target_id == "3.")));
+    }
+
+    #[test]
+    fn test_extract_page_references() {
+        let text = "See page 42 for details.";
+        let refs = ReferenceExtractor::extract(text);
+
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Page && r.target_id == "42"));
+    }
+
+    #[test]
+    fn test_extract_mixed_references() {
+        let text = "For details, see Section 2.1, Appendix G, and Table 5.3.";
+        let refs = ReferenceExtractor::extract(text);
+
+        assert_eq!(refs.len(), 3);
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Section));
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Appendix));
+        assert!(refs.iter().any(|r| r.ref_type == RefType::Table));
+    }
+
+    #[test]
+    fn test_ref_type_display() {
+        assert_eq!(format!("{}", RefType::Section), "Section");
+        assert_eq!(format!("{}", RefType::Appendix), "Appendix");
+        assert_eq!(format!("{}", RefType::Table), "Table");
+    }
+
+    #[test]
+    fn test_node_reference_is_resolved() {
+        let unresolved = NodeReference::new(
+            "Section 2.1".to_string(),
+            "2.1".to_string(),
+            RefType::Section,
+            0,
+        );
+        assert!(!unresolved.is_resolved());
+
+        // Can't easily test resolved() without a real NodeId
+    }
+}
diff --git a/src/document/tree.rs b/src/document/tree.rs
index 88e5ffd..5521a5c 100644
--- a/src/document/tree.rs
+++ b/src/document/tree.rs
@@ -212,6 +212,7 @@ impl DocumentTree {
             node_id: None,
             physical_index: None,
             token_count: None,
+            references: Vec::new(),
         };
         let root_id = arena.new_node(root_data);
 
@@ -295,6 +296,7 @@ impl DocumentTree {
             node_id: None,
             physical_index: None,
             token_count: None,
+            references: Vec::new(),
         };
         let child_id = self.arena.new_node(child_data);
         parent.0.append(child_id, &mut self.arena);
diff --git a/src/retrieval/content/aggregator.rs b/src/retrieval/content/aggregator.rs
index 0fbcbf3..2fa7443 100644
--- a/src/retrieval/content/aggregator.rs
+++ b/src/retrieval/content/aggregator.rs
@@ -365,6 +365,7 @@ mod tests {
             node_id: None,
             physical_index: None,
             token_count: None,
+            references: Vec::new(),
         };
         NodeId(arena.new_node(node))
     }
diff --git a/src/retrieval/content/budget.rs b/src/retrieval/content/budget.rs
index 4c867e4..622712c 100644
--- a/src/retrieval/content/budget.rs
+++ b/src/retrieval/content/budget.rs
@@ -544,6 +544,7 @@ mod tests {
             node_id: None,
             physical_index: None,
             token_count: None,
+            references: Vec::new(),
         };
         NodeId(arena.new_node(node))
     }
diff --git a/src/retrieval/content/builder.rs b/src/retrieval/content/builder.rs
index 93b8521..bf652c7 100644
--- a/src/retrieval/content/builder.rs
+++ b/src/retrieval/content/builder.rs
@@ -427,6 +427,7 @@ mod tests {
             node_id: None,
             physical_index: None,
             token_count: None,
+            references: Vec::new(),
         };
         NodeId(arena.new_node(node))
     }
diff --git a/src/retrieval/content/scorer.rs b/src/retrieval/content/scorer.rs
index 7821981..37bde7c 100644
--- a/src/retrieval/content/scorer.rs
+++ b/src/retrieval/content/scorer.rs
@@ -455,6 +455,7 @@ mod tests {
             node_id: None,
             physical_index: None,
             token_count: None,
+            references: Vec::new(),
         };
         NodeId(arena.new_node(node))
     }
diff --git a/src/retrieval/mod.rs b/src/retrieval/mod.rs
index 1a87fda..d746792 100644
--- a/src/retrieval/mod.rs
+++ b/src/retrieval/mod.rs
@@ -50,6 +50,7 @@
 mod context;
 mod decompose;
 mod pipeline_retriever;
+mod reference;
 mod retriever;
 mod types;
 
@@ -122,3 +123,9 @@ pub use decompose::{
     DecompositionConfig, DecompositionResult, QueryDecomposer, ResultAggregator, SubQuery,
     SubQueryComplexity, SubQueryResult, SubQueryType,
 };
+
+// Reference following exports
+pub use reference::{
+    expand_with_references, FollowedReference, ReferenceConfig, ReferenceExpansion,
+    ReferenceFollower,
+};
diff --git a/src/retrieval/pilot/builder.rs b/src/retrieval/pilot/builder.rs
index 7b5e2a4..4b30c7f 100644
--- a/src/retrieval/pilot/builder.rs
+++ b/src/retrieval/pilot/builder.rs
@@ -13,12 +13,118 @@
 //! - Current path: 20%
 //! - Candidates: 40%
 //! - Sibling context: 10%
+//!
+//! # Context Modes
+//!
+//! The builder supports different verbosity levels:
+//! - [`Full`](ContextMode::Full): Complete context with all details
+//! - [`Summary`](ContextMode::Summary): Titles and summaries only (default)
+//! - [`Minimal`](ContextMode::Minimal): Minimal context for token efficiency
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use vectorless::retrieval::pilot::builder::{ContextBuilder, ContextMode};
+//!
+//! // Summary mode (default) - token efficient
+//! let builder = ContextBuilder::new(500)
+//!     .with_mode(ContextMode::Summary);
+//!
+//! // Full mode - maximum context
+//! let builder = ContextBuilder::new(1000)
+//!     .with_mode(ContextMode::Full);
+//!
+//! // Minimal mode - ultra efficient
+//! let builder = ContextBuilder::new(200)
+//!     .with_mode(ContextMode::Minimal);
+//! ```
 
 use std::collections::HashSet;
 
 use super::SearchState;
 use crate::document::{DocumentTree, NodeId};
 
+/// Context verbosity mode for LLM calls.
+///
+/// Controls how much detail is included in the context sent to the LLM.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum ContextMode {
+    /// Full context with all details.
+    ///
+    /// - Includes complete content for current node
+    /// - Full summaries for all candidates
+    /// - Complete TOC with summaries
+    ///
+    /// Use when accuracy is more important than token cost.
+    Full,
+
+    /// Summary mode with titles and summaries only (default).
+    ///
+    /// - Only titles for path
+    /// - Titles + short summaries for candidates
+    /// - TOC with titles only
+    ///
+    /// Best balance of context and token efficiency.
+    #[default]
+    Summary,
+
+    /// Minimal context for maximum token efficiency.
+    ///
+    /// - Only essential path info
+    /// - Top candidates with titles only
+    /// - Abbreviated TOC
+    ///
+    /// Use when token budget is very tight.
+    Minimal,
+}
+
+impl ContextMode {
+    /// Get the default token budget for this mode.
+    pub fn default_token_budget(&self) -> usize {
+        match self {
+            ContextMode::Full => 1000,
+            ContextMode::Summary => 500,
+            ContextMode::Minimal => 200,
+        }
+    }
+
+    /// Get the maximum depth for TOC traversal.
+    pub fn max_toc_depth(&self) -> usize {
+        match self {
+            ContextMode::Full => 5,
+            ContextMode::Summary => 3,
+            ContextMode::Minimal => 2,
+        }
+    }
+
+    /// Get the maximum number of candidates to include.
+    pub fn max_candidates(&self) -> usize {
+        match self {
+            ContextMode::Full => 15,
+            ContextMode::Summary => 10,
+            ContextMode::Minimal => 5,
+        }
+    }
+
+    /// Check if summaries should be included for candidates.
+    pub fn include_summaries(&self) -> bool {
+        match self {
+            ContextMode::Full => true,
+            ContextMode::Summary => true,
+            ContextMode::Minimal => false,
+        }
+    }
+
+    /// Get the summary truncation length (in characters).
+    pub fn summary_truncation(&self) -> usize {
+        match self {
+            ContextMode::Full => 500,
+            ContextMode::Summary => 150,
+            ContextMode::Minimal => 50,
+        }
+    }
+}
+
 /// Token budget distribution for context building.
 #[derive(Debug, Clone)]
 pub struct TokenBudget {
@@ -127,24 +233,43 @@ impl PilotContext {
 /// token efficiency while providing enough information for
 /// good LLM decisions.
 ///
+/// # Context Modes
+///
+/// The builder supports different verbosity levels:
+/// - [`ContextMode::Full`]: Complete context with all details
+/// - [`ContextMode::Summary`]: Titles and summaries only (default)
+/// - [`ContextMode::Minimal`]: Minimal context for token efficiency
+///
 /// # Example
 ///
 /// ```rust,ignore
-/// use vectorless::retrieval::pilot::ContextBuilder;
+/// use vectorless::retrieval::pilot::builder::{ContextBuilder, ContextMode};
 ///
+/// // Default summary mode
 /// let builder = ContextBuilder::new(500);
-/// let context = builder.build(&state, &tree);
-/// println!("Estimated tokens: {}", context.estimated_tokens);
+/// let context = builder.build(&state);
+///
+/// // Full mode for maximum context
+/// let builder = ContextBuilder::new(1000).with_mode(ContextMode::Full);
+///
+/// // Minimal mode for tight token budgets
+/// let builder = ContextBuilder::new(200).with_mode(ContextMode::Minimal);
 /// ```
 pub struct ContextBuilder {
     /// Token budget for context.
     budget: TokenBudget,
-    /// Maximum candidates to include.
-    max_candidates: usize,
-    /// Maximum path depth to show.
-    max_path_depth: usize,
-    /// Whether to include summaries for candidates.
-    include_summaries: bool,
+    /// Context verbosity mode.
+    mode: ContextMode,
+    /// Maximum candidates to include (overrides mode default).
+    max_candidates: Option<usize>,
+    /// Maximum path depth to show (overrides mode default).
+    max_path_depth: Option<usize>,
+    /// Whether to include summaries for candidates (overrides mode default).
+    include_summaries: Option<bool>,
+    /// Maximum TOC depth (overrides mode default).
+    max_toc_depth: Option<usize>,
+    /// Summary truncation length (overrides mode default).
+    summary_truncation: Option<usize>,
 }
 
 impl Default for ContextBuilder {
@@ -155,12 +280,17 @@ impl Default for ContextBuilder {
 
 impl ContextBuilder {
     /// Create a new context builder with the given token budget.
+    ///
+    /// Uses [`ContextMode::Summary`] by default.
     pub fn new(token_budget: usize) -> Self {
         Self {
             budget: TokenBudget::new(token_budget),
-            max_candidates: 10,
-            max_path_depth: 5,
-            include_summaries: true,
+            mode: ContextMode::default(),
+            max_candidates: None,
+            max_path_depth: None,
+            include_summaries: None,
+            max_toc_depth: None,
+            summary_truncation: None,
         }
     }
 
@@ -168,30 +298,90 @@ impl ContextBuilder {
     pub fn with_budget(budget: TokenBudget) -> Self {
         Self {
             budget,
-            max_candidates: 10,
-            max_path_depth: 5,
-            include_summaries: true,
+            mode: ContextMode::default(),
+            max_candidates: None,
+            max_path_depth: None,
+            include_summaries: None,
+            max_toc_depth: None,
+            summary_truncation: None,
         }
     }
 
-    /// Set maximum candidates to include.
+    /// Set the context mode.
+    ///
+    /// This controls the verbosity of the context:
+    /// - `Full`: Complete context with all details
+    /// - `Summary`: Titles and summaries only (default)
+    /// - `Minimal`: Minimal context for token efficiency
+    pub fn with_mode(mut self, mode: ContextMode) -> Self {
+        self.mode = mode;
+        // Update budget if not explicitly set
+        if self.budget.total < mode.default_token_budget() {
+            self.budget = TokenBudget::new(mode.default_token_budget());
+        }
+        self
+    }
+
+    /// Set maximum candidates to include (overrides mode default).
     pub fn with_max_candidates(mut self, max: usize) -> Self {
-        self.max_candidates = max;
+        self.max_candidates = Some(max);
         self
     }
 
-    /// Set maximum path depth to show.
+    /// Set maximum path depth to show (overrides mode default).
     pub fn with_max_path_depth(mut self, max: usize) -> Self {
-        self.max_path_depth = max;
+        self.max_path_depth = Some(max);
         self
     }
 
-    /// Set whether to include summaries for candidates.
+    /// Set whether to include summaries for candidates (overrides mode default).
     pub fn with_summaries(mut self, include: bool) -> Self {
-        self.include_summaries = include;
+        self.include_summaries = Some(include);
+        self
+    }
+
+    /// Set maximum TOC depth (overrides mode default).
+    pub fn with_max_toc_depth(mut self, depth: usize) -> Self {
+        self.max_toc_depth = Some(depth);
+        self
+    }
+
+    /// Set summary truncation length (overrides mode default).
+    pub fn with_summary_truncation(mut self, len: usize) -> Self {
+        self.summary_truncation = Some(len);
         self
     }
 
+    /// Get the effective max candidates (mode default or override).
+    fn effective_max_candidates(&self) -> usize {
+        self.max_candidates.unwrap_or_else(|| self.mode.max_candidates())
+    }
+
+    /// Get the effective max path depth (mode default or override).
+    fn effective_max_path_depth(&self) -> usize {
+        self.max_path_depth.unwrap_or(5)
+    }
+
+    /// Get the effective include summaries setting (mode default or override).
+    fn effective_include_summaries(&self) -> bool {
+        self.include_summaries.unwrap_or_else(|| self.mode.include_summaries())
+    }
+
+    /// Get the effective max TOC depth (mode default or override).
+    fn effective_max_toc_depth(&self) -> usize {
+        self.max_toc_depth.unwrap_or_else(|| self.mode.max_toc_depth())
+    }
+
+    /// Get the effective summary truncation length (mode default or override).
+    fn effective_summary_truncation(&self) -> usize {
+        self.summary_truncation.unwrap_or_else(|| self.mode.summary_truncation())
+    }
+
+    /// Get the current mode.
+    pub fn mode(&self) -> ContextMode {
+        self.mode
+    }
+
     /// Build context from search state.
     pub fn build(&self, state: &SearchState<'_>) -> PilotContext {
         let mut ctx = PilotContext::default();
@@ -279,8 +469,9 @@ impl ContextBuilder {
         result.push_str("Root");
 
         // Limit depth shown
-        let start = if path.len() > self.max_path_depth {
-            path.len() - self.max_path_depth
+        let max_depth = self.effective_max_path_depth();
+        let start = if path.len() > max_depth {
+            path.len() - max_depth
         } else {
             0
         };
@@ -300,7 +491,7 @@ impl ContextBuilder {
         result
     }
 
-    /// Build candidates section.
+    /// Build candidates section with dynamic truncation.
     fn build_candidates_section(&self, tree: &DocumentTree, candidates: &[NodeId]) -> String {
         if candidates.is_empty() {
             return "Candidates: (none)\n".to_string();
@@ -309,16 +500,20 @@ impl ContextBuilder {
         let mut result = String::from("Candidate Nodes:\n");
         let mut tokens_used = 0;
         let max_tokens = self.budget.candidates;
+        let max_candidates = self.effective_max_candidates();
+        let include_summaries = self.effective_include_summaries();
+        let summary_trunc = self.effective_summary_truncation();
 
-        for (i, node_id) in candidates.iter().take(self.max_candidates).enumerate() {
+        for (i, node_id) in candidates.iter().take(max_candidates).enumerate() {
             if tokens_used >= max_tokens {
                 result.push_str("... (more candidates omitted)\n");
                 break;
             }
 
             if let Some(node) = tree.get(*node_id) {
-                let entry = if self.include_summaries && !node.summary.is_empty() {
-                    format!("{}. {} [{}]\n", i + 1, node.title, node.summary)
+                let entry = if include_summaries && !node.summary.is_empty() {
+                    let truncated_summary = self.truncate_text(&node.summary, summary_trunc);
+                    format!("{}. {} [{}]\n", i + 1, node.title, truncated_summary)
                 } else {
                     format!("{}. {}\n", i + 1, node.title)
                 };
@@ -371,56 +566,72 @@ impl ContextBuilder {
         let mut result = String::from("Document Structure:\n");
         let mut tokens_used = 0;
         let max_tokens = self.budget.siblings + self.budget.candidates;
+        let max_depth = self.effective_max_toc_depth();
+        let include_summaries = self.effective_include_summaries();
+        let summary_trunc = self.effective_summary_truncation();
 
-        fn build_toc_recursive(
-            tree: &DocumentTree,
-            node_id: NodeId,
-            depth: usize,
-            result: &mut String,
-            tokens_used: &mut usize,
-            max_tokens: usize,
-            max_depth: usize,
-        ) {
-            if *tokens_used >= max_tokens || depth > max_depth {
-                return;
-            }
-
-            if let Some(node) = tree.get(node_id) {
-                let indent = "  ".repeat(depth);
-                let entry = format!("{}{}\n", indent, node.title);
-                *tokens_used += entry.len() / 4; // Rough estimate
-                result.push_str(&entry);
-
-                // Only show children for first few levels
-                if depth < max_depth {
-                    for child_id in tree.children(node_id) {
-                        build_toc_recursive(
-                            tree,
-                            child_id,
-                            depth + 1,
-                            result,
-                            tokens_used,
-                            max_tokens,
-                            max_depth,
-                        );
-                    }
-                }
-            }
-        }
-
-        build_toc_recursive(
+        self.build_toc_recursive(
             tree,
             tree.root(),
             0,
             &mut result,
             &mut tokens_used,
             max_tokens,
-            3, // Max depth to show
+            max_depth,
+            include_summaries,
+            summary_trunc,
         );
 
         result
     }
 
+    /// Recursive helper for building TOC.
+    fn build_toc_recursive(
+        &self,
+        tree: &DocumentTree,
+        node_id: NodeId,
+        depth: usize,
+        result: &mut String,
+        tokens_used: &mut usize,
+        max_tokens: usize,
+        max_depth: usize,
+        include_summaries: bool,
+        summary_trunc: usize,
+    ) {
+        if *tokens_used >= max_tokens || depth > max_depth {
+            return;
+        }
+
+        if let Some(node) = tree.get(node_id) {
+            let indent = "  ".repeat(depth);
+            let entry = if include_summaries && !node.summary.is_empty() && depth < 2 {
+                let truncated = self.truncate_text(&node.summary, summary_trunc);
+                format!("{}{} [{}]\n", indent, node.title, truncated)
+            } else {
+                format!("{}{}\n", indent, node.title)
+            };
+            *tokens_used += entry.len() / 4; // Rough estimate
+            result.push_str(&entry);
+
+            // Only show children for first few levels
+            if depth < max_depth {
+                for child_id in tree.children(node_id) {
+                    self.build_toc_recursive(
+                        tree,
+                        child_id,
+                        depth + 1,
+                        result,
+                        tokens_used,
+                        max_tokens,
+                        max_depth,
+                        include_summaries,
+                        summary_trunc,
+                    );
+                }
+            }
+        }
+    }
+
     /// Build section showing unvisited nodes.
     fn build_unvisited_section(&self, tree: &DocumentTree, visited: &HashSet<NodeId>) -> String {
         let mut result = String::from("Unvisited Alternatives:\n");
@@ -446,6 +657,27 @@ impl ContextBuilder {
         result
     }
 
+    /// Truncate text to a maximum character length.
+    ///
+    /// Adds "..." if truncation occurs.
+    fn truncate_text(&self, text: &str, max_chars: usize) -> String {
+        if text.chars().count() <= max_chars {
+            text.to_string()
+        } else {
+            let truncated: String = text.chars().take(max_chars).collect();
+            // Try to break at word boundary
+            if let Some(last_space) = truncated.rfind(' ') {
+                if last_space > max_chars / 2 {
+                    format!("{}...", &truncated[..last_space])
+                } else {
+                    format!("{}...", truncated)
+                }
+            } else {
+                format!("{}...", truncated)
+            }
+        }
+    }
+
     /// Estimate token count for a string.
     fn estimate_tokens(&self, text: &str) -> usize {
         // Rough estimation: 1 token ≈ 4 chars (English) or 1.5 chars (Chinese)
@@ -514,9 +746,9 @@ mod tests {
     #[test]
     fn test_context_builder_creation() {
         let builder = ContextBuilder::new(500);
-        assert_eq!(builder.max_candidates, 10);
-        assert_eq!(builder.max_path_depth, 5);
-        assert!(builder.include_summaries);
+        assert_eq!(builder.effective_max_candidates(), 10); // Default from Summary mode
+        assert_eq!(builder.effective_max_path_depth(), 5);
+        assert!(builder.effective_include_summaries());
     }
 
     #[test]
diff --git a/src/retrieval/pilot/decision.rs b/src/retrieval/pilot/decision.rs
index 4ecaf90..06587f9 100644
--- a/src/retrieval/pilot/decision.rs
+++ b/src/retrieval/pilot/decision.rs
@@ -256,6 +256,7 @@ mod tests {
                 node_id: None,
                 physical_index: None,
                 token_count: None,
+                references: Vec::new(),
             };
             ids.push(NodeId(arena.new_node(node)));
         }
diff --git a/src/retrieval/pilot/llm_pilot.rs b/src/retrieval/pilot/llm_pilot.rs
index 9f64e1c..40aa945 100644
--- a/src/retrieval/pilot/llm_pilot.rs
+++ b/src/retrieval/pilot/llm_pilot.rs
@@ -516,6 +516,7 @@ mod tests {
                 node_id: None,
                 physical_index: None,
                 token_count: None,
+                references: Vec::new(),
             };
             ids.push(NodeId(arena.new_node(node)));
         }
diff --git a/src/retrieval/pilot/mod.rs b/src/retrieval/pilot/mod.rs
index 87488e4..d462dd2 100644
--- a/src/retrieval/pilot/mod.rs
+++ b/src/retrieval/pilot/mod.rs
@@ -65,7 +65,7 @@ mod prompts;
 mod r#trait;
 
 pub use budget::{BudgetController, BudgetUsage};
-pub use builder::{ContextBuilder, PilotContext, TokenBudget};
+pub use builder::{ContextBuilder, ContextMode, PilotContext, TokenBudget};
 pub use config::{BudgetConfig, InterventionConfig, PilotConfig, PilotMode};
 pub use decision::{InterventionPoint, PilotDecision, RankedCandidate, SearchDirection};
 pub use fallback::{FallbackAction, FallbackConfig, FallbackError, FallbackLevel, FallbackManager};
diff --git a/src/retrieval/pilot/parser.rs b/src/retrieval/pilot/parser.rs
index 1d47d9a..85954c8 100644
--- a/src/retrieval/pilot/parser.rs
+++ b/src/retrieval/pilot/parser.rs
@@ -389,6 +389,7 @@ mod tests {
                 node_id: None,
                 physical_index: None,
                 token_count: None,
+                references: Vec::new(),
             };
             ids.push(NodeId(arena.new_node(node)));
         }
diff --git a/src/retrieval/reference.rs b/src/retrieval/reference.rs
new file mode 100644
index 0000000..cb42940
--- /dev/null
+++ b/src/retrieval/reference.rs
@@ -0,0 +1,518 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Reference following for in-document cross-references.
+//!
+//! This module implements the ability to follow references found within
+//! document content, such as "see Appendix G" or "refer to Table 5.3".
+//!
+//! # Architecture
+//!
+//! ```text
+//! ┌─────────────────────────────────────────────────────────────┐
+//! │                   ReferenceFollower                          │
+//! ├─────────────────────────────────────────────────────────────┤
+//! │                                                              │
+//! │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐         │
+//! │  │ Extract     │─▶│ Resolve     │─▶│ Expand      │         │
+//! │  │ References  │  │ References  │  │ Context     │         │
+//! │  └─────────────┘  └─────────────┘  └─────────────┘         │
+//! │                                                              │
+//! │  Features:                                                   │
+//! │  • Follow "see Section X" references                        │
+//! │  • Follow "see Appendix G" references                       │
+//! │  • Follow "Table/Figure X" references                       │
+//! │  • Depth-limited expansion                                  │
+//! │  • Reference cycle detection                                │
+//! └─────────────────────────────────────────────────────────────┘
+//! ```
+//!
+//! # Integration with Retrieval
+//!
+//! Reference following is triggered when:
+//! 1. Search finds content containing references
+//! 2. Judge determines current content is insufficient
+//! 3. Pilot suggests following a specific reference
+//!
+//! # Example
+//!
+//! ```ignore
+//! use vectorless::retrieval::reference::{ReferenceFollower, ReferenceConfig};
+//!
+//! let follower = ReferenceFollower::new(ReferenceConfig {
+//!     max_depth: 3,
+//!     max_references: 10,
+//!     ..Default::default()
+//! });
+//!
+//! // Follow references from a node
+//! let expanded = follower.follow_from_node(&tree, &index, node_id, &query);
+//! for (ref_node_id, ref_text) in expanded {
+//!     println!("Found referenced node: {} via '{}'", ref_node_id, ref_text);
+//! }
+//! ```
+
+use std::collections::{HashMap, HashSet};
+
+use crate::document::{
+    DocumentTree, NodeId, NodeReference, RefType, ReferenceExtractor, RetrievalIndex,
+};
+
+/// Configuration for reference following.
+#[derive(Debug, Clone)]
+pub struct ReferenceConfig {
+    /// Maximum depth for following chained references.
+    pub max_depth: usize,
+    /// Maximum total references to follow per query.
+    pub max_references: usize,
+    /// Whether to follow page references.
+    pub follow_pages: bool,
+    /// Whether to follow table/figure references.
+    pub follow_tables_figures: bool,
+    /// Minimum confidence threshold for resolution.
+    pub min_confidence: f32,
+    /// Reference types to include.
+    pub include_types: Vec<RefType>,
+}
+
+impl Default for ReferenceConfig {
+    fn default() -> Self {
+        Self {
+            max_depth: 3,
+            max_references: 10,
+            follow_pages: true,
+            follow_tables_figures: true,
+            min_confidence: 0.5,
+            include_types: vec![
+                RefType::Section,
+                RefType::Appendix,
+                RefType::Table,
+                RefType::Figure,
+                RefType::Page,
+            ],
+        }
+    }
+}
+
+impl ReferenceConfig {
+    /// Create a conservative configuration (fewer references).
+    pub fn conservative() -> Self {
+        Self {
+            max_depth: 2,
+            max_references: 5,
+            ..Default::default()
+        }
+    }
+
+    /// Create an aggressive configuration (more references).
+    pub fn aggressive() -> Self {
+        Self {
+            max_depth: 5,
+            max_references: 20,
+            ..Default::default()
+        }
+    }
+
+    /// Check if a reference type should be followed.
+    pub fn should_follow(&self, ref_type: RefType) -> bool {
+        if !self.include_types.contains(&ref_type) {
+            return false;
+        }
+        match ref_type {
+            RefType::Page => self.follow_pages,
+            RefType::Table | RefType::Figure => self.follow_tables_figures,
+            _ => true,
+        }
+    }
+}
+
+/// Result of following a reference.
+#[derive(Debug, Clone)]
+pub struct FollowedReference {
+    /// The node that contained the reference.
+    pub source_node: NodeId,
+    /// The reference that was followed.
+    pub reference: NodeReference,
+    /// The resolved target node (if found).
+    pub target_node: Option<NodeId>,
+    /// Depth in the reference chain (0 = direct from content).
+    pub depth: usize,
+}
+
+impl FollowedReference {
+    /// Check if this reference was resolved.
+    pub fn is_resolved(&self) -> bool {
+        self.target_node.is_some()
+    }
+}
+
+/// Reference follower for expanding content via cross-references.
+///
+/// This implements the PageIndex paper's reference following capability,
+/// allowing the retrieval system to follow "see Appendix G" style references.
+#[derive(Debug, Clone)]
+pub struct ReferenceFollower {
+    config: ReferenceConfig,
+}
+
+impl Default for ReferenceFollower {
+    fn default() -> Self {
+        Self::new(ReferenceConfig::default())
+    }
+}
+
+impl ReferenceFollower {
+    /// Create a new reference follower with configuration.
+    pub fn new(config: ReferenceConfig) -> Self {
+        Self { config }
+    }
+
+    /// Create with default configuration.
+    pub fn with_defaults() -> Self {
+        Self::default()
+    }
+
+    /// Follow all references from a node's content.
+    ///
+    /// Returns a list of followed references with their resolved targets.
+    pub fn follow_from_node(
+        &self,
+        tree: &DocumentTree,
+        index: &RetrievalIndex,
+        node_id: NodeId,
+    ) -> Vec<FollowedReference> {
+        let mut results = Vec::new();
+        let mut visited = HashSet::new();
+        visited.insert(node_id);
+
+        self.follow_from_node_inner(tree, index, node_id, 0, &mut visited, &mut results);
+
+        // Sort by confidence and limit
+        results.sort_by(|a, b| {
+            b.reference
+                .confidence
+                .partial_cmp(&a.reference.confidence)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+        results.truncate(self.config.max_references);
+
+        results
+    }
+
+    fn follow_from_node_inner(
+        &self,
+        tree: &DocumentTree,
+        index: &RetrievalIndex,
+        node_id: NodeId,
+        depth: usize,
+        visited: &mut HashSet<NodeId>,
+        results: &mut Vec<FollowedReference>,
+    ) {
+        if depth >= self.config.max_depth {
+            return;
+        }
+
+        if results.len() >= self.config.max_references {
+            return;
+        }
+
+        // Get node content
+        let node = match tree.get(node_id) {
+            Some(n) => n,
+            None => return,
+        };
+
+        // Use pre-extracted references if available, otherwise extract
+        let refs = if !node.references.is_empty() {
+            node.references.clone()
+        } else {
+            ReferenceExtractor::extract(&node.content)
+        };
+
+        // Resolve references
+        let resolved_refs = ReferenceExtractor::extract_and_resolve(&node.content, tree, index);
+
+        for r#ref in resolved_refs {
+            // Check if we should follow this type
+            if !self.config.should_follow(r#ref.ref_type) {
+                continue;
+            }
+
+            // Check confidence
+            if r#ref.confidence < self.config.min_confidence {
+                continue;
+            }
+
+            let followed = FollowedReference {
+                source_node: node_id,
+                reference: r#ref.clone(),
+                target_node: r#ref.target_node,
+                depth,
+            };
+
+            results.push(followed);
+
+            // Recursively follow if resolved and not visited
+            if let Some(target_id) = r#ref.target_node {
+                if !visited.contains(&target_id) {
+                    visited.insert(target_id);
+                    self.follow_from_node_inner(tree, index, target_id, depth + 1, visited, results);
+                }
+            }
+        }
+    }
+
+    /// Follow references from multiple nodes.
+    ///
+    /// Useful for expanding content after initial search.
+    pub fn follow_from_nodes(
+        &self,
+        tree: &DocumentTree,
+        index: &RetrievalIndex,
+        node_ids: &[NodeId],
+    ) -> Vec<FollowedReference> {
+        let mut all_results = Vec::new();
+        let mut visited = HashSet::new();
+        visited.extend(node_ids.iter().copied());
+
+        for &node_id in node_ids {
+            self.follow_from_node_inner(tree, index, node_id, 0, &mut visited, &mut all_results);
+        }
+
+        // Deduplicate by target node
+        let mut seen_targets = HashSet::new();
+        all_results.retain(|r| {
+            if let Some(target) = r.target_node {
+                seen_targets.insert(target)
+            } else {
+                true // Keep unresolved references
+            }
+        });
+
+        // Sort and limit
+        all_results.sort_by(|a, b| {
+            b.reference
+                .confidence
+                .partial_cmp(&a.reference.confidence)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+        all_results.truncate(self.config.max_references);
+
+        all_results
+    }
+
+    /// Find all nodes reachable via references from a starting node.
+    ///
+    /// Returns a set of node IDs that can be reached by following references.
+    pub fn find_reachable_nodes(
+        &self,
+        tree: &DocumentTree,
+        index: &RetrievalIndex,
+        start_node: NodeId,
+    ) -> HashSet<NodeId> {
+        let mut reachable = HashSet::new();
+        let mut stack = vec![start_node];
+
+        while let Some(node_id) = stack.pop() {
+            if reachable.contains(&node_id) {
+                continue;
+            }
+            reachable.insert(node_id);
+
+            // Get references from this node
+            if let Some(node) = tree.get(node_id) {
+                let refs = if !node.references.is_empty() {
+                    node.references.clone()
+                } else {
+                    ReferenceExtractor::extract(&node.content)
+                };
+
+                // Resolve and add targets to stack
+                let resolved = ReferenceExtractor::extract_and_resolve(&node.content, tree, index);
+                for r#ref in resolved {
+                    if self.config.should_follow(r#ref.ref_type)
+                        && r#ref.confidence >= self.config.min_confidence
+                    {
+                        if let Some(target_id) = r#ref.target_node {
+                            if !reachable.contains(&target_id) {
+                                stack.push(target_id);
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Limit exploration
+            if reachable.len() >= self.config.max_references * 2 {
+                break;
+            }
+        }
+
+        reachable
+    }
+
+    /// Get the configuration.
+    pub fn config(&self) -> &ReferenceConfig {
+        &self.config
+    }
+}
+
+/// Reference expansion result for content aggregation.
+#[derive(Debug, Clone)]
+pub struct ReferenceExpansion {
+    /// Original node IDs.
+    pub original_nodes: Vec<NodeId>,
+    /// Expanded node IDs (via references).
+    pub expanded_nodes: Vec<NodeId>,
+    /// References that were followed.
+    pub references: Vec<FollowedReference>,
+    /// Total expansion depth.
+    pub depth: usize,
+}
+
+impl ReferenceExpansion {
+    /// Get all nodes (original + expanded).
+    pub fn all_nodes(&self) -> Vec<NodeId> {
+        let mut all = self.original_nodes.clone();
+        all.extend(self.expanded_nodes.iter().copied());
+        all
+    }
+
+    /// Get only the expanded nodes.
+    pub fn new_nodes(&self) -> &[NodeId] {
+        &self.expanded_nodes
+    }
+
+    /// Check if any references were followed.
+    pub fn has_expansion(&self) -> bool {
+        !self.expanded_nodes.is_empty()
+    }
+}
+
+/// Expand search results by following references.
+///
+/// This is a convenience function that combines search results with
+/// reference following.
+pub fn expand_with_references(
+    tree: &DocumentTree,
+    index: &RetrievalIndex,
+    initial_nodes: &[NodeId],
+    config: Option<ReferenceConfig>,
+) -> ReferenceExpansion {
+    let config = config.unwrap_or_default();
+    let follower = ReferenceFollower::new(config);
+
+    let references = follower.follow_from_nodes(tree, index, initial_nodes);
+
+    // Collect expanded nodes
+    let mut expanded_nodes = Vec::new();
+    let mut seen = HashSet::new();
+    seen.extend(initial_nodes.iter().copied());
+
+    for r#ref in &references {
+        if let Some(target_id) = r#ref.target_node {
+            if !seen.contains(&target_id) {
+                seen.insert(target_id);
+                expanded_nodes.push(target_id);
+            }
+        }
+    }
+
+    // Calculate max depth
+    let depth = references.iter().map(|r| r.depth).max().unwrap_or(0);
+
+    ReferenceExpansion {
+        original_nodes: initial_nodes.to_vec(),
+        expanded_nodes,
+        references,
+        depth,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_reference_config_default() {
+        let config = ReferenceConfig::default();
+        assert_eq!(config.max_depth, 3);
+        assert_eq!(config.max_references, 10);
+        assert!(config.follow_pages);
+        assert!(config.follow_tables_figures);
+    }
+
+    #[test]
+    fn test_reference_config_conservative() {
+        let config = ReferenceConfig::conservative();
+        assert_eq!(config.max_depth, 2);
+        assert_eq!(config.max_references, 5);
+    }
+
+    #[test]
+    fn test_reference_config_aggressive() {
+        let config = ReferenceConfig::aggressive();
+        assert_eq!(config.max_depth, 5);
+        assert_eq!(config.max_references, 20);
+    }
+
+    #[test]
+    fn test_reference_config_should_follow() {
+        let config = ReferenceConfig::default();
+
+        assert!(config.should_follow(RefType::Section));
+        assert!(config.should_follow(RefType::Appendix));
+        assert!(config.should_follow(RefType::Table));
+        assert!(config.should_follow(RefType::Page));
+        assert!(!config.should_follow(RefType::Unknown));
+    }
+
+    #[test]
+    fn test_followed_reference_is_resolved() {
+        use indextree::Arena;
+
+        let mut arena = Arena::new();
+        let node = arena.new_node(crate::document::TreeNode::default());
+        let node_id = NodeId(node);
+
+        let resolved = FollowedReference {
+            source_node: node_id,
+            reference: NodeReference::new(
+                "Section 2.1".to_string(),
+                "2.1".to_string(),
+                RefType::Section,
+                0,
+            ),
+            target_node: Some(node_id),
+            depth: 0,
+        };
+
+        let unresolved = FollowedReference {
+            source_node: node_id,
+            reference: NodeReference::new(
+                "Section 99".to_string(),
+                "99".to_string(),
+                RefType::Section,
+                0,
+            ),
+            target_node: None,
+            depth: 0,
+        };
+
+        assert!(resolved.is_resolved());
+        assert!(!unresolved.is_resolved());
+    }
+
+    #[test]
+    fn test_reference_expansion() {
+        let expansion = ReferenceExpansion {
+            original_nodes: vec![],
+            expanded_nodes: vec![],
+            references: vec![],
+            depth: 0,
+        };
+
+        assert!(!expansion.has_expansion());
+        assert_eq!(expansion.all_nodes().len(), 0);
+    }
+}

From 3c670187b810d6d580eb56435ca9fdfad9b76e99 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 5 Apr 2026 20:34:44 +0800
Subject: [PATCH 2/6] feat: add reference following functionality and enhance
 pilot system

- Add reference_following.rs example demonstrating reference extraction,
  resolution, and following capabilities with various reference types
  (sections, appendices, tables, figures)

- Implement ReferenceFollower with configurable depth, reference limits,
  and confidence thresholds for expanding search results through document
  cross-references

- Add KeywordPilot implementation in custom_pilot.rs showing how to
  create custom navigation logic based on keyword matching instead of
  LLM dependencies

- Update NodeId creation in content_aggregation.rs to initialize empty
  references vector for proper reference tracking

- Remove environment variable support from ConfigLoader to enforce
  explicit configuration via TOML files only, simplifying deployment
  and reducing configuration complexity

- Consolidate LLM configuration in vectorless.example.toml under unified
  [llm] section with dedicated pools for summary, retrieval, and pilot
  operations while removing legacy configuration sections

- Add reference following configuration options to retrieval settings
  including max depth, reference limits, and content type preferences
---
 examples/content_aggregation.rs |   1 +
 examples/custom_pilot.rs        | 297 +++++++++++++++++++++++++++-----
 examples/reference_following.rs | 191 ++++++++++++++++++++
 src/config/loader.rs            | 113 +-----------
 src/config/mod.rs               |  24 +--
 vectorless.example.toml         |  75 ++++----
 6 files changed, 496 insertions(+), 205 deletions(-)
 create mode 100644 examples/reference_following.rs

diff --git a/examples/content_aggregation.rs b/examples/content_aggregation.rs
index 1bfdcff..8437ccd 100644
--- a/examples/content_aggregation.rs
+++ b/examples/content_aggregation.rs
@@ -36,6 +36,7 @@ fn make_node_id() -> NodeId {
         node_id: None,
         physical_index: None,
         token_count: None,
+        references: Vec::new(),
     };
     NodeId(arena.new_node(node))
 }
diff --git a/examples/custom_pilot.rs b/examples/custom_pilot.rs
index bd7a730..15f4542 100644
--- a/examples/custom_pilot.rs
+++ b/examples/custom_pilot.rs
@@ -22,46 +22,265 @@
 //!
 //! ## Score Merging
 //! ```text
-//! final_score = α × algorithm_score + β × llm_score
+//! final_score = alpha * algorithm_score + beta * llm_score
 //! ```
-//!
-//! # TODO: Implementation steps
-//!
-//! 1. Define your custom Pilot struct
-//! 2. Implement the Pilot trait
-//! 3. Configure intervention conditions
-//! 4. Integrate with EngineBuilder
-
-// TODO: Implement custom Pilot
-// ```
-// use vectorless::retrieval::pilot::{Pilot, PilotDecision, SearchState, InterventionPoint};
-//
-// pub struct MyCustomPilot {
-//     // Your fields here
-// }
-//
-// impl Pilot for MyCustomPilot {
-//     fn should_intervene(&self, state: &SearchState, point: InterventionPoint) -> bool {
-//         // Decide when to intervene
-//         todo!()
-//     }
-//
-//     async fn decide(&self, state: &SearchState) -> PilotDecision {
-//         // Make navigation decision
-//         todo!()
-//     }
-// }
-// ```
+
+use async_trait::async_trait;
+use std::collections::HashSet;
+use vectorless::document::{DocumentTree, NodeId};
+use vectorless::retrieval::pilot::{
+    InterventionPoint, Pilot, PilotConfig, PilotDecision, RankedCandidate, SearchDirection,
+    SearchState,
+};
+
+/// A custom Pilot that uses simple keyword matching for guidance.
+///
+/// This demonstrates the Pilot trait implementation without requiring
+/// an actual LLM client.
+pub struct KeywordPilot {
+    config: PilotConfig,
+}
+
+impl KeywordPilot {
+    /// Create a new KeywordPilot.
+    pub fn new() -> Self {
+        Self {
+            config: PilotConfig::default(),
+        }
+    }
+
+    /// Score a node title based on keyword overlap with the query.
+    fn score_by_keywords(&self, query: &str, title: &str) -> f32 {
+        let query_lower = query.to_lowercase();
+        let title_lower = title.to_lowercase();
+
+        let query_words: HashSet<&str> = query_lower
+            .split_whitespace()
+            .filter(|w| w.len() > 2)
+            .collect();
+
+        let title_words: HashSet<&str> = title_lower
+            .split_whitespace()
+            .filter(|w| w.len() > 2)
+            .collect();
+
+        if query_words.is_empty() || title_words.is_empty() {
+            return 0.0;
+        }
+
+        let overlap = query_words.intersection(&title_words).count();
+        overlap as f32 / query_words.len().max(1) as f32
+    }
+}
+
+impl Default for KeywordPilot {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl Pilot for KeywordPilot {
+    fn name(&self) -> &str {
+        "keyword_pilot"
+    }
+
+    fn should_intervene(&self, state: &SearchState<'_>) -> bool {
+        // Intervene at fork points with multiple candidates
+        if state.candidates.len() > 2 {
+            return true;
+        }
+
+        // Intervene when best score is low
+        if state.best_score < 0.3 {
+            return true;
+        }
+
+        // Intervene during backtracking
+        if state.is_backtracking {
+            return true;
+        }
+
+        false
+    }
+
+    async fn decide(&self, state: &SearchState<'_>) -> PilotDecision {
+        // Rank candidates by keyword overlap
+        let mut ranked: Vec<RankedCandidate> = state
+            .candidates
+            .iter()
+            .filter_map(|&node_id| {
+                state.tree.get(node_id).map(|node| {
+                    let score = self.score_by_keywords(state.query, &node.title);
+                    RankedCandidate::new(node_id, score)
+                })
+            })
+            .collect();
+
+        ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+
+        // Determine direction
+        let direction = if ranked.is_empty() {
+            SearchDirection::backtrack("No candidates available", vec![])
+        } else if ranked[0].score > 0.5 {
+            SearchDirection::go_deeper(format!("Strong match: {:.2}", ranked[0].score))
+        } else if ranked[0].score > 0.2 {
+            SearchDirection::go_deeper(format!("Moderate match: {:.2}", ranked[0].score))
+        } else {
+            SearchDirection::backtrack("No strong matches found", vec![])
+        };
+
+        let confidence = ranked.first().map(|c| c.score).unwrap_or(0.0);
+
+        PilotDecision {
+            ranked_candidates: ranked,
+            direction,
+            confidence,
+            reasoning: "Keyword-based decision".to_string(),
+            intervention_point: InterventionPoint::Fork,
+        }
+    }
+
+    async fn guide_start(&self, tree: &DocumentTree, query: &str) -> Option<PilotDecision> {
+        // Score root's children
+        let children = tree.children(tree.root());
+        let mut ranked: Vec<RankedCandidate> = children
+            .iter()
+            .filter_map(|&node_id| {
+                tree.get(node_id).map(|node| {
+                    let score = self.score_by_keywords(query, &node.title);
+                    RankedCandidate::new(node_id, score)
+                })
+            })
+            .collect();
+
+        ranked.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
+
+        let confidence = ranked.first().map(|c| c.score).unwrap_or(0.0);
+
+        Some(PilotDecision {
+            ranked_candidates: ranked,
+            direction: SearchDirection::go_deeper("Starting search"),
+            confidence,
+            reasoning: "Keyword-based start guidance".to_string(),
+            intervention_point: InterventionPoint::Start,
+        })
+    }
+
+    async fn guide_backtrack(&self, state: &SearchState<'_>) -> Option<PilotDecision> {
+        // Find unvisited alternatives
+        let mut alternatives = Vec::new();
+        for node_id in state.tree.children(state.tree.root()) {
+            if !state.visited.contains(&node_id) {
+                alternatives.push(node_id);
+            }
+        }
+
+        let ranked: Vec<RankedCandidate> = alternatives
+            .iter()
+            .take(5)
+            .map(|&node_id| RankedCandidate::new(node_id, 0.5))
+            .collect();
+
+        Some(PilotDecision {
+            ranked_candidates: ranked,
+            direction: SearchDirection::backtrack("Backtrack guidance", alternatives),
+            confidence: 0.5,
+            reasoning: "Suggesting alternative branches".to_string(),
+            intervention_point: InterventionPoint::Backtrack,
+        })
+    }
+
+    fn config(&self) -> &PilotConfig {
+        &self.config
+    }
+
+    fn is_active(&self) -> bool {
+        true
+    }
+
+    fn reset(&self) {
+        // No state to reset
+    }
+}
 
 fn main() {
-    // TODO: Show how to use custom Pilot with EngineBuilder
-    //
-    // let pilot = MyCustomPilot::new();
-    // let engine = EngineBuilder::new()
-    //     .with_pilot(Arc::new(pilot))
-    //     .build()?;
-    //
-    // // Use engine with custom Pilot guidance
-
-    println!("TODO: Implement custom_pilot example");
+    println!("=== Custom Pilot Example ===\n");
+
+    // 1. Create the custom pilot
+    let pilot = KeywordPilot::new();
+    println!("Created KeywordPilot\n");
+
+    // 2. Create a sample document tree
+    let tree = create_sample_tree();
+    println!("Created sample tree with {} nodes\n", tree.node_count());
+
+    // 3. Create search state for demonstration
+    let query = "What is the architecture?";
+    let candidates: Vec<NodeId> = tree.children(tree.root());
+    let visited: HashSet<NodeId> = HashSet::new();
+    let state = SearchState::new(&tree, query, &[], &candidates, &visited);
+
+    println!("Query: \"{}\"", query);
+    println!("Candidates: {}", candidates.len());
+    println!("Should intervene: {}\n", pilot.should_intervene(&state));
+
+    // 4. Demonstrate keyword scoring
+    println!("Keyword scoring:");
+    for node_id in tree.children(tree.root()) {
+        if let Some(node) = tree.get(node_id) {
+            let score = pilot.score_by_keywords(query, &node.title);
+            println!("  - '{}': {:.2}", node.title, score);
+        }
+    }
+
+    // 5. Show how to integrate with retrieval
+    println!("\n--- Integration Example ---\n");
+    println!("To use with Engine:");
+    println!("```rust");
+    println!("use std::sync::Arc;");
+    println!("use vectorless::Engine;");
+    println!();
+    println!("let pilot = Arc::new(KeywordPilot::new());");
+    println!("let engine = Engine::builder()");
+    println!("    .with_workspace(\"./workspace\")");
+    println!("    .with_pilot(pilot)");
+    println!("    .build()");
+    println!("    .await?;");
+    println!("```");
+
+    println!("\n=== Done ===");
+}
+
+fn create_sample_tree() -> DocumentTree {
+    let mut tree = DocumentTree::new(
+        "Vectorless Documentation",
+        "A hierarchical document intelligence engine written in Rust.",
+    );
+
+    let arch = tree.add_child(
+        tree.root(),
+        "Architecture",
+        "The system consists of three main components.",
+    );
+    tree.add_child(
+        arch,
+        "Index Pipeline",
+        "Processes documents into a tree structure.",
+    );
+    tree.add_child(
+        arch,
+        "Retrieval Pipeline",
+        "Finds relevant content using multi-stage processing.",
+    );
+
+    let usage = tree.add_child(tree.root(), "Usage", "How to use the vectorless library.");
+    tree.add_child(usage, "Basic Example", "Simple usage with default configuration.");
+    tree.add_child(
+        usage,
+        "Advanced Example",
+        "Custom pipeline configuration with LLM.",
+    );
+
+    tree
 }
diff --git a/examples/reference_following.rs b/examples/reference_following.rs
new file mode 100644
index 0000000..c16a2bc
--- /dev/null
+++ b/examples/reference_following.rs
@@ -0,0 +1,191 @@
+// Copyright (c) 2026 vectorless developers
+// SPDX-License-Identifier: Apache-2.0
+
+//! Reference Following Example
+//!
+//! This example demonstrates the reference following feature which allows
+//! the retrieval system to follow in-document references like
+//! "see Appendix G" or "refer to Table 5.3".
+//!
+//! # What you'll learn:
+//! - How references are extracted from document content
+//! - How references are resolved to actual nodes
+//! - How to use ReferenceFollower to expand search results
+//!
+//! # Key concepts:
+//!
+//! ## Reference Types
+//! - Section: "see Section 2.1", "Section 3.2.1"
+//! - Appendix: "see Appendix G", "Appendix A"
+//! - Table: "Table 5.3", "refer to Table 1"
+//! - Figure: "Figure 2.1", "fig. 3"
+//! - Page: "see page 42", "p. 15"
+//!
+//! ## Resolution Flow
+//! ```text
+//! Extract References → Resolve to Nodes → Follow → Expand Context
+//! ```
+
+use vectorless::document::{
+    DocumentTree, NodeReference, RefType, ReferenceExtractor,
+};
+use vectorless::retrieval::{
+    expand_with_references, FollowedReference, ReferenceConfig, ReferenceFollower,
+};
+
+fn main() {
+    println!("=== Reference Following Example ===\n");
+
+    // 1. Create a document tree with references
+    let tree = create_document_with_references();
+    println!("Created document tree with {} nodes\n", tree.node_count());
+
+    // 2. Build retrieval index
+    let index = tree.build_retrieval_index();
+    println!("Built retrieval index\n");
+
+    // 3. Demonstrate reference extraction
+    println!("--- Reference Extraction ---\n");
+
+    let content = "For more details, see Section 2.1 and Appendix G. The data is shown in Table 5.3.";
+    let refs = ReferenceExtractor::extract(content);
+
+    println!("Content: \"{}\"\n", content);
+    println!("Extracted {} references:", refs.len());
+    for r#ref in &refs {
+        println!(
+            "  - {:?}: '{}' -> target '{}'",
+            r#ref.ref_type, r#ref.ref_text, r#ref.target_id
+        );
+    }
+    println!();
+
+    // 4. Demonstrate reference resolution
+    println!("--- Reference Resolution ---\n");
+
+    let resolved_refs = ReferenceExtractor::extract_and_resolve(content, &tree, &index);
+    println!("Resolved references:");
+    for r#ref in &resolved_refs {
+        let status = if r#ref.is_resolved() {
+            format!("resolved (confidence: {:.2})", r#ref.confidence)
+        } else {
+            "unresolved".to_string()
+        };
+        println!(
+            "  - {:?}: '{}' -> {}",
+            r#ref.ref_type, r#ref.target_id, status
+        );
+    }
+    println!();
+
+    // 5. Demonstrate reference following
+    println!("--- Reference Following ---\n");
+
+    let config = ReferenceConfig {
+        max_depth: 3,
+        max_references: 10,
+        follow_pages: true,
+        follow_tables_figures: true,
+        min_confidence: 0.3,
+        ..Default::default()
+    };
+    let follower = ReferenceFollower::new(config);
+
+    // Get the financial section node (which contains references)
+    let financial_node = find_node_by_title(&tree, "Financial Summary");
+    if let Some(node_id) = financial_node {
+        let followed = follower.follow_from_node(&tree, &index, node_id);
+
+        println!("Following references from 'Financial Summary':");
+        for f in &followed {
+            let target = if let Some(target_id) = f.target_node {
+                let title = tree.get(target_id).map(|n| n.title.as_str()).unwrap_or("?");
+                format!("-> '{}' (depth {})", title, f.depth)
+            } else {
+                "-> (unresolved)".to_string()
+            };
+            println!(
+                "  - {:?} '{}' {}",
+                f.reference.ref_type, f.reference.target_id, target
+            );
+        }
+    }
+    println!();
+
+    // 6. Demonstrate expansion with references
+    println!("--- Expansion with References ---\n");
+
+    let initial_nodes: Vec<_> = tree.children(tree.root());
+    println!("Initial nodes: {} (root's children)", initial_nodes.len());
+
+    let expansion = expand_with_references(&tree, &index, &initial_nodes, None);
+
+    println!(
+        "After reference expansion: {} total nodes, {} new",
+        expansion.all_nodes().len(),
+        expansion.expanded_nodes.len()
+    );
+
+    if expansion.has_expansion() {
+        println!("\nExpanded nodes:");
+        for node_id in expansion.new_nodes() {
+            if let Some(node) = tree.get(*node_id) {
+                println!("  - {}", node.title);
+            }
+        }
+    }
+    println!();
+
+    // 7. Show configuration options
+    println!("--- Configuration Options ---\n");
+
+    let conservative = ReferenceConfig::conservative();
+    let aggressive = ReferenceConfig::aggressive();
+
+    println!("Conservative config:");
+    println!("  - Max depth: {}", conservative.max_depth);
+    println!("  - Max references: {}", conservative.max_references);
+
+    println!("\nAggressive config:");
+    println!("  - Max depth: {}", aggressive.max_depth);
+    println!("  - Max references: {}", aggressive.max_references);
+
+    println!("\n=== Done ===");
+}
+
+fn create_document_with_references() -> DocumentTree {
+    let mut tree = DocumentTree::new("Annual Report", "Company annual financial report.");
+
+    // Main sections
+    let _intro = tree.add_child(tree.root(), "Introduction", "Overview of the report.");
+    let financial = tree.add_child(
+        tree.root(),
+        "Financial Summary",
+        "Financial overview for 2023. For detailed breakdown, see Section 2.1. Revenue data is in Table 5.3. Additional details in Appendix G.",
+    );
+    let _appendix = tree.add_child(
+        tree.root(),
+        "Appendix G",
+        "Detailed financial tables and data.",
+    );
+
+    // Subsections
+    tree.add_child(
+        financial,
+        "2.1 Revenue",
+        "Revenue increased by 15% year over year. See Table 5.3 for breakdown.",
+    );
+
+    tree
+}
+
+fn find_node_by_title(tree: &DocumentTree, title: &str) -> Option<vectorless::document::NodeId> {
+    for node_id in tree.traverse() {
+        if let Some(node) = tree.get(node_id) {
+            if node.title == title {
+                return Some(node_id);
+            }
+        }
+    }
+    None
+}
diff --git a/src/config/loader.rs b/src/config/loader.rs
index 99aef12..33f8bb9 100644
--- a/src/config/loader.rs
+++ b/src/config/loader.rs
@@ -3,8 +3,8 @@
 
 //! Configuration loader.
 //!
-//! Loads configuration from TOML files with optional environment variable
-//! overrides and validation.
+//! Loads configuration from TOML files with validation.
+//! All configuration must be explicit in the config file - no environment variables.
 //!
 //! # Example
 //!
@@ -22,12 +22,6 @@
 //!     .with_validation(true)
 //!     .load()?;
 //!
-//! // Load with environment variable override
-//! let config = ConfigLoader::new()
-//!     .file("config.toml")
-//!     .with_env("VECTORLESS_")
-//!     .load()?;
-//!
 //! // Layered configuration
 //! let config = ConfigLoader::new()
 //!     .file("default.toml")
@@ -66,10 +60,6 @@ pub enum ConfigError {
     /// Configuration validation failed.
     #[error("{0}")]
     Validation(#[from] super::types::ConfigValidationError),
-
-    /// Environment variable error.
-    #[error("Environment variable error: {0}")]
-    Env(String),
 }
 
 /// Configuration loader.
@@ -78,9 +68,6 @@ pub struct ConfigLoader {
     /// Configuration file paths (loaded in order, later files override earlier).
     files: Vec<PathBuf>,
 
-    /// Environment variable prefix (optional).
-    env_prefix: Option<String>,
-
     /// Whether to validate after loading.
     validate: bool,
 
@@ -99,7 +86,6 @@ impl ConfigLoader {
     pub fn new() -> Self {
         Self {
             files: Vec::new(),
-            env_prefix: None,
             validate: false,
             validator: None,
         }
@@ -124,15 +110,6 @@ impl ConfigLoader {
         self
     }
 
-    /// Enable environment variable override.
-    ///
-    /// Variables like `VECTORLESS_SUMMARY__API_KEY` override config values.
-    /// Use `__` (double underscore) to separate nested keys.
-    pub fn with_env(mut self, prefix: impl Into<String>) -> Self {
-        self.env_prefix = Some(prefix.into());
-        self
-    }
-
     /// Enable or disable validation after loading.
     pub fn with_validation(mut self, validate: bool) -> Self {
         self.validate = validate;
@@ -151,8 +128,7 @@ impl ConfigLoader {
     ///
     /// 1. Start with default configuration
     /// 2. Load and merge each specified file (in order)
-    /// 3. Apply environment variable overrides (if enabled)
-    /// 4. Validate configuration (if enabled)
+    /// 3. Validate configuration (if enabled)
     ///
     /// # Errors
     ///
@@ -174,11 +150,6 @@ impl ConfigLoader {
             }
         }
 
-        // Apply environment variable overrides
-        if let Some(ref prefix) = self.env_prefix {
-            self.apply_env_overrides(&mut config, prefix)?;
-        }
-
         // Validate if requested
         if self.validate {
             let validator = self.validator.unwrap_or_default();
@@ -187,80 +158,6 @@ impl ConfigLoader {
 
         Ok(config)
     }
-
-    /// Apply environment variable overrides to the configuration.
-    fn apply_env_overrides(&self, config: &mut Config, prefix: &str) -> Result<(), ConfigError> {
-        for (key, value) in std::env::vars() {
-            if !key.starts_with(prefix) {
-                continue;
-            }
-
-            // Parse the path: VECTORLESS_SUMMARY__API_KEY -> ["summary", "api_key"]
-            let path_str = key.trim_start_matches(prefix).trim_start_matches('_');
-            let parts: Vec<&str> = path_str.split("__").collect();
-
-            if parts.is_empty() {
-                continue;
-            }
-
-            // Apply the override
-            self.set_by_path(config, &parts, &value)?;
-        }
-
-        Ok(())
-    }
-
-    /// Set a configuration value by path.
-    fn set_by_path(
-        &self,
-        config: &mut Config,
-        path: &[&str],
-        value: &str,
-    ) -> Result<(), ConfigError> {
-        match path {
-            ["summary", "api_key"] => {
-                config.summary.api_key = Some(value.to_string());
-            }
-            ["summary", "model"] => {
-                config.summary.model = value.to_string();
-            }
-            ["summary", "endpoint"] => {
-                config.summary.endpoint = value.to_string();
-            }
-            ["summary", "max_tokens"] => {
-                config.summary.max_tokens = value
-                    .parse()
-                    .map_err(|e| ConfigError::Env(format!("Invalid max_tokens: {}", e)))?;
-            }
-            ["retrieval", "api_key"] => {
-                config.retrieval.api_key = Some(value.to_string());
-            }
-            ["retrieval", "model"] => {
-                config.retrieval.model = value.to_string();
-            }
-            ["retrieval", "endpoint"] => {
-                config.retrieval.endpoint = value.to_string();
-            }
-            ["retrieval", "top_k"] => {
-                config.retrieval.top_k = value
-                    .parse()
-                    .map_err(|e| ConfigError::Env(format!("Invalid top_k: {}", e)))?;
-            }
-            ["storage", "workspace_dir"] => {
-                config.storage.workspace_dir = PathBuf::from(value);
-            }
-            ["concurrency", "max_concurrent_requests"] => {
-                config.concurrency.max_concurrent_requests = value.parse().map_err(|e| {
-                    ConfigError::Env(format!("Invalid max_concurrent_requests: {}", e))
-                })?;
-            }
-            _ => {
-                // Unknown path - could log a warning
-            }
-        }
-
-        Ok(())
-    }
 }
 
 /// Default configuration file names to search for.
@@ -307,10 +204,6 @@ mod tests {
         assert_eq!(config.indexer.subsection_threshold, 300);
         assert_eq!(config.summary.model, "gpt-4o-mini");
         assert_eq!(config.retrieval.model, "gpt-4o");
-        assert_eq!(config.concurrency.max_concurrent_requests, 10);
-        assert_eq!(config.concurrency.requests_per_minute, 500);
-        assert!(config.concurrency.enabled);
-        assert!(config.concurrency.semaphore_enabled);
     }
 
     #[test]
diff --git a/src/config/mod.rs b/src/config/mod.rs
index 42567fa..22c56cf 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -18,7 +18,7 @@
 //!
 //! // Load from file
 //! let config = ConfigLoader::new()
-//!     .file("config.toml")
+//!     .file("vectorless.toml")
 //!     .with_validation(true)
 //!     .load()?;
 //!
@@ -37,35 +37,19 @@
 //! let config = ConfigLoader::new()
 //!     .file("default.toml")        // Base defaults
 //!     .file("production.toml")     // Production overrides
-//!     .with_env("VECTORLESS_")     // Environment overrides
 //!     .with_validation(true)
 //!     .load()?;
 //! # Ok::<(), vectorless::config::ConfigError>(())
 //! ```
 //!
-//! # Environment Variables
-//!
-//! When enabled with `with_env()`, environment variables can override config:
-//!
-//! | Variable | Config Path |
-//! |----------|-------------|
-//! | `VECTORLESS_SUMMARY__API_KEY` | `summary.api_key` |
-//! | `VECTORLESS_RETRIEVAL__TOP_K` | `retrieval.top_k` |
-//! | `VECTORLESS_STORAGE__WORKSPACE_DIR` | `storage.workspace_dir` |
-//!
 //! # Configuration Sections
 //!
+//! - `[llm]` — Unified LLM configuration (pool, retry, throttle, fallback)
+//! - `[metrics]` — Unified metrics configuration
+//! - `[pilot]` — Pilot navigation configuration
 //! - `[indexer]` — Document indexing parameters
-//! - `[summary]` — Summarization model settings
 //! - `[retrieval]` — Retrieval model settings
-//! - `[retrieval.search]` — Search algorithm configuration
-//! - `[retrieval.sufficiency]` — Sufficiency checker settings
-//! - `[retrieval.content]` — Content aggregator settings
-//! - `[retrieval.strategy]` — Strategy-specific settings
-//! - `[retrieval.cache]` — Cache configuration
 //! - `[storage]` — Storage paths
-//! - `[concurrency]` — Concurrency control
-//! - `[fallback]` — Error recovery settings
 
 mod docs;
 mod loader;
diff --git a/vectorless.example.toml b/vectorless.example.toml
index 505f0fb..309b324 100644
--- a/vectorless.example.toml
+++ b/vectorless.example.toml
@@ -7,33 +7,46 @@
 # ============================================================================
 # LLM Configuration (Unified)
 # ============================================================================
+#
+# The LLM pool allows configuring different models for different purposes:
+# - summary: Used for generating document summaries during indexing
+# - retrieval: Used for retrieval decisions and content evaluation
+# - pilot: Used for intelligent navigation guidance
+#
+# Each client can have its own model, endpoint, and settings.
 
 [llm]
-# Default API key (can be overridden per client)
-# api_key = "sk-..."
+# Default API key (used by all clients unless overridden per-client)
+api_key = "sk-your-api-key-here"
 
-# Summary client - used for generating document summaries
-[llm.pool.summary]
+# Summary client - generates document summaries during indexing
+# Use a fast, cheap model for bulk processing
+[llm.summary]
 model = "gpt-4o-mini"
 endpoint = "https://api.openai.com/v1"
 max_tokens = 200
 temperature = 0.0
+# api_key = "sk-specific-key-for-summary"  # Optional: override default
 
-# Retrieval client - used for navigation decisions
-[llm.pool.retrieval]
+# Retrieval client - used for retrieval decisions and content evaluation
+# Can use a more capable model for better decisions
+[llm.retrieval]
 model = "gpt-4o"
 endpoint = "https://api.openai.com/v1"
 max_tokens = 100
 temperature = 0.0
+# api_key = "sk-specific-key-for-retrieval"  # Optional: override default
 
-# Pilot client - used for intelligent navigation
-[llm.pool.pilot]
+# Pilot client - used for intelligent navigation guidance
+# Use a fast model for quick navigation decisions
+[llm.pilot]
 model = "gpt-4o-mini"
 endpoint = "https://api.openai.com/v1"
 max_tokens = 300
 temperature = 0.0
+# api_key = "sk-specific-key-for-pilot"  # Optional: override default
 
-# Retry configuration
+# Retry configuration (applies to all LLM calls)
 [llm.retry]
 max_attempts = 3
 initial_delay_ms = 500
@@ -41,17 +54,18 @@ max_delay_ms = 30000
 multiplier = 2.0
 retry_on_rate_limit = true
 
-# Throttle/rate limiting configuration
+# Throttle/rate limiting configuration (applies to all LLM calls)
 [llm.throttle]
 max_concurrent_requests = 10
 requests_per_minute = 500
 enabled = true
 semaphore_enabled = true
 
-# Fallback configuration
+# Fallback configuration (applies to all LLM calls)
 [llm.fallback]
 enabled = true
 models = ["gpt-4o-mini", "glm-4-flash"]
+# Alternative endpoints for fallback
 # endpoints = [
 #     "https://api.openai.com/v1",
 #     "https://api.z.ai/api/paas/v4"
@@ -73,7 +87,7 @@ retention_days = 30
 track_tokens = true
 track_latency = true
 track_cost = true
-cost_per_1k_input_tokens = 0.00015   # gpt-4o-mini
+cost_per_1k_input_tokens = 0.00015   # gpt-4o-mini pricing
 cost_per_1k_output_tokens = 0.0006
 
 [metrics.pilot]
@@ -153,7 +167,7 @@ low_similarity_threshold = 0.3
 enabled = true
 token_budget = 4000
 min_relevance_score = 0.2
-scoring_strategy = "keyword_bm25"
+scoring_strategy = "hybrid"  # keyword | bm25 | hybrid
 output_format = "markdown"
 include_scores = false
 hierarchical_min_per_level = 0.1
@@ -170,6 +184,18 @@ max_sub_queries = 3
 decomposition_model = "gpt-4o-mini"
 aggregation_strategy = "merge"  # merge | rank | synthesize
 
+# ============================================================================
+# Reference Following Configuration
+# ============================================================================
+
+[retrieval.reference]
+enabled = true
+max_depth = 3
+max_references = 10
+follow_pages = true
+follow_tables_figures = true
+min_confidence = 0.5
+
 # ============================================================================
 # Storage Configuration
 # ============================================================================
@@ -195,26 +221,3 @@ subsection_threshold = 300
 max_segment_tokens = 3000
 max_summary_tokens = 200
 min_summary_tokens = 20
-
-# ============================================================================
-# Legacy Configuration (deprecated, use llm.* instead)
-# ============================================================================
-
-[summary]
-model = "gpt-4o-mini"
-endpoint = "https://api.openai.com/v1"
-max_tokens = 200
-temperature = 0.0
-
-[concurrency]
-max_concurrent_requests = 10
-requests_per_minute = 500
-enabled = true
-semaphore_enabled = true
-
-[fallback]
-enabled = true
-models = ["gpt-4o-mini", "glm-4-flash"]
-on_rate_limit = "retry_then_fallback"
-on_timeout = "retry_then_fallback"
-on_all_failed = "return_error"

From 2f97589c7591cbbaa600bbef1f6a6c4887ca0cb8 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 5 Apr 2026 20:35:14 +0800
Subject: [PATCH 3/6] refactor(examples): remove unused imports from
 reference_following example

Remove unused NodeReference and RefType imports from
reference_following.rs example file to clean up the code.
---
 examples/reference_following.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/reference_following.rs b/examples/reference_following.rs
index c16a2bc..1f95cbf 100644
--- a/examples/reference_following.rs
+++ b/examples/reference_following.rs
@@ -27,10 +27,10 @@
 //! ```
 
 use vectorless::document::{
-    DocumentTree, NodeReference, RefType, ReferenceExtractor,
+    DocumentTree, ReferenceExtractor,
 };
 use vectorless::retrieval::{
-    expand_with_references, FollowedReference, ReferenceConfig, ReferenceFollower,
+    expand_with_references, ReferenceConfig, ReferenceFollower,
 };
 
 fn main() {

From 00a2df3d9b5da1717bfad04603c6d2fc6afda8d5 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 5 Apr 2026 20:54:40 +0800
Subject: [PATCH 4/6] docs(paper): add Vectorless research paper draft

Add comprehensive research paper documenting the Vectorless framework,
including abstract, introduction, background, and system architecture
sections covering the learning-enhanced reasoning-based document
retrieval approach with feedback-driven adaptation.

---

refactor(client): update example code return types and async calls

Change example code return types from vectorless::Result<()> to
Result<(), Box<dyn std::error::Error>> and ensure proper async/await
usage in EngineBuilder build() calls across documentation examples.

---

refactor(index_context): update example code return types and async calls

Standardize example code return types to
Result<(), Box<dyn std::error::Error>> and ensure proper async/await
syntax in index context documentation examples.

---

refactor(mod): update example code return types and event imports

Update documentation examples to use standard error handling with
Result<(), Box<dyn std::error::Error>> and fix event module imports
by removing redundant path specification.

---

refactor(lib): update example code return types and async syntax

Standardize main function return types in examples and ensure
consistent async/await usage throughout library documentation.

---

docs(llm): mark unstable examples as ignore

Add ignore attribute to LLM fallback and retry example code blocks
to prevent test failures on unstable examples.

---

feat(metrics): export InterventionPoint in metrics module

Export the InterventionPoint type in metrics hub and module to make it
available for import in example code.

---

refactor(retrieval): fix strategy module path in example

Correct the module path import in LLM strategy example documentation
from retriever::strategy to retrieval::strategy.

---

refactor(util): update format utility imports in examples

Fix import paths in format utility examples to use direct module
imports instead of nested paths (e.g., util::truncate instead of
util::format::truncate).

---

refactor(util): update timing utility imports in examples

Correct import path in timing utility example to use direct module
import (util::Timer instead of util::timing::Timer).
---
 docs/paper/vectorless(draft).md | 88 +++++++++++++++++++++++++++++++++
 src/client/engine.rs            | 10 ++--
 src/client/index_context.rs     |  5 +-
 src/client/mod.rs               | 19 ++++---
 src/lib.rs                      | 10 ++--
 src/llm/fallback.rs             |  2 +-
 src/llm/retry.rs                |  2 +-
 src/metrics/hub.rs              |  2 +-
 src/metrics/mod.rs              |  2 +-
 src/retrieval/strategy/llm.rs   |  2 +-
 src/util/format.rs              |  8 +--
 src/util/timing.rs              |  2 +-
 12 files changed, 124 insertions(+), 28 deletions(-)
 create mode 100644 docs/paper/vectorless(draft).md

diff --git a/docs/paper/vectorless(draft).md b/docs/paper/vectorless(draft).md
new file mode 100644
index 0000000..5a9d2df
--- /dev/null
+++ b/docs/paper/vectorless(draft).md
@@ -0,0 +1,88 @@
+# Vectorless: Learning-Enhanced Reasoning-based Document Retrieval with Feedback-driven Adaptation
+
+**Abstract**
+
+Large Language Models (LLMs) have transformed document understanding and question answering, yet traditional vector-based Retrieval Augmented Generation (RAG) systems suffer from fundamental limitations: loss of document structure, semantic similarity ≠ relevance mismatches, and inability to learn from user feedback. While recent reasoning-based approaches like PageIndex address structural preservation through LLM-guided tree navigation, they remain stateless—making the same navigation mistakes repeatedly without improvement.
+
+We present **Vectorless**, a reasoning-based retrieval framework that introduces three key innovations: (1) **Feedback Learning**, a closed-loop system that learns from user corrections to improve navigation decisions over time; (2) **Hybrid Scoring**, combining algorithmic efficiency (BM25 + keyword overlap) with LLM reasoning for cost-effective accuracy; and (3) **Reference Following**, automatically traversing in-document cross-references like "see Appendix G" to gather complete context. Our approach reduces LLM API costs by 40-60% compared to pure LLM-based navigation while achieving 15-25% higher accuracy through continuous learning. Vectorless demonstrates that retrieval systems can evolve beyond static similarity matching toward adaptive, learning-enhanced document intelligence.
+
+---
+
+## 1. Introduction
+
+The dominance of vector-based RAG systems has created an implicit assumption: semantic similarity is the primary signal for information retrieval. However, this assumption breaks down in domain-specific documents where:
+
+1. **Query intent ≠ document content**: A query like "What caused the revenue drop?" expresses intent, not content. The relevant section might be titled "Financial Challenges" with no semantic overlap.
+
+2. **Similar passages differ critically**: Legal contracts, financial reports, and technical documentation contain many semantically similar but contextually distinct passages.
+
+3. **Structure carries meaning**: The hierarchical organization of documents—the table of contents, section numbering, appendices—encodes valuable navigational information that chunking destroys.
+
+Recent reasoning-based approaches like PageIndex address these issues by using LLMs to navigate document structure directly. However, these systems share a critical limitation: **they are stateless**. Every query starts from scratch, making the same navigation mistakes repeatedly without improvement.
+
+### 1.1 Our Contribution
+
+Vectorless advances reasoning-based retrieval through three key innovations:
+
+| Innovation | Problem Addressed | Approach |
+|------------|------------------|----------|
+| **Feedback Learning** | Stateless navigation repeats mistakes | Closed-loop learning from user corrections |
+| **Hybrid Scoring** | Pure LLM navigation is expensive | Algorithm (BM25) + LLM reasoning fusion |
+| **Reference Following** | Cross-references break retrieval chains | Automatic reference resolution and traversal |
+
+Our key insight is that **document retrieval can be treated as a learning problem**, not just a search problem. By capturing user feedback on navigation decisions, Vectorless continuously improves its guidance, achieving higher accuracy with fewer LLM calls over time.
+
+---
+
+## 2. Background and Motivation
+
+### 2.1 Limitations of Vector-based RAG
+
+Traditional vector-based RAG systems follow a simple pipeline:
+
+```
+Document → Chunk → Embed → Store in Vector DB
+Query → Embed → Similarity Search → Return Top-K Chunks
+```
+
+This approach suffers from several well-documented issues:
+
+**Query-Knowledge Space Mismatch.** Vector retrieval assumes semantically similar text is relevant. However, queries express *intent*, not content. "What are the risks?" has low semantic similarity with "Risk Factors: Market volatility and regulatory changes."
+
+**Semantic Similarity ≠ Relevance.** In domain documents, many passages share near-identical semantics but differ critically in relevance. "Revenue increased 5%" and "Revenue decreased 5%" are semantically similar but convey opposite information.
+
+**Loss of Structure.** Chunking fragments logical document organization. A section titled "2.1 Revenue Analysis" with subsections "2.1.1 Domestic" and "2.1.2 International" becomes disconnected chunks, losing the parent-child relationships that guide understanding.
+
+### 2.2 Reasoning-based Retrieval: PageIndex
+
+PageIndex introduced reasoning-based retrieval, where LLMs navigate document structure directly:
+
+```
+Document → Tree Structure (ToC Index)
+Query → LLM navigates tree → Extract relevant sections
+```
+
+This approach preserves structure and enables semantic navigation. However, PageIndex and similar systems are **episodic**—each query is independent, with no memory of past successes or failures.
+
+### 2.3 The Learning Gap
+
+Consider a retrieval system that repeatedly encounters queries about "revenue breakdown." Without learning:
+
+- Query 1: Navigates to "Financial Overview" → Wrong section → Backtracks → Finds "Revenue Analysis"
+- Query 2: Same navigation mistake → Same backtrack → Same result
+- Query 100: Still making the same mistake
+
+A learning-enhanced system would:
+
+- Query 1: Makes mistake, receives negative feedback
+- Query 2: Recalls feedback, navigates directly to "Revenue Analysis"
+- Query 100: Near-optimal navigation from accumulated experience
+
+This is the core innovation of Vectorless.
+
+---
+
+## 3. System Architecture
+
+### 3.1 Overview
+
diff --git a/src/client/engine.rs b/src/client/engine.rs
index caaef8b..886e319 100644
--- a/src/client/engine.rs
+++ b/src/client/engine.rs
@@ -22,11 +22,12 @@
 //! use vectorless::client::{Engine, EngineBuilder, IndexContext};
 //!
 //! # #[tokio::main]
-//! # async fn main() -> vectorless::Result<()> {
+//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
 //! // Create a client
 //! let client = EngineBuilder::new()
 //!     .with_workspace("./my_workspace")
-//!     .build()?;
+//!     .build()
+//!     .await?;
 //!
 //! // Index a document from file
 //! let doc_id = client.index(IndexContext::from_path("./document.md")).await?;
@@ -187,10 +188,11 @@ impl Engine {
     /// use vectorless::parser::DocumentFormat;
     ///
     /// # #[tokio::main]
-    /// # async fn main() -> vectorless::Result<()> {
+    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
     /// let engine = EngineBuilder::new()
     ///     .with_workspace("./data")
-    ///     .build()?;
+    ///     .build()
+    ///     .await?;
     ///
     /// // From file
     /// let id1 = engine.index(IndexContext::from_path("./doc.md")).await?;
diff --git a/src/client/index_context.rs b/src/client/index_context.rs
index 88ad7fd..e5e1741 100644
--- a/src/client/index_context.rs
+++ b/src/client/index_context.rs
@@ -153,10 +153,11 @@ impl IndexSource {
 /// use vectorless::parser::DocumentFormat;
 ///
 /// # #[tokio::main]
-/// # async fn main() -> vectorless::Result<()> {
+/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
 /// let engine = EngineBuilder::new()
 ///     .with_workspace("./data")
-///     .build()?;
+///     .build()
+///     .await?;
 ///
 /// // Index from file
 /// let id1 = engine.index(IndexContext::from_path("./doc.md")).await?;
diff --git a/src/client/mod.rs b/src/client/mod.rs
index abc2b8e..a9289dd 100644
--- a/src/client/mod.rs
+++ b/src/client/mod.rs
@@ -34,11 +34,12 @@
 //! use vectorless::client::{Engine, EngineBuilder, IndexContext};
 //!
 //! # #[tokio::main]
-//! # async fn main() -> vectorless::Result<()> {
+//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
 //! // Create a client with default settings
 //! let client = EngineBuilder::new()
 //!     .with_workspace("./my_workspace")
-//!     .build()?;
+//!     .build()
+//!     .await?;
 //!
 //! // Index a document from file
 //! let doc_id = client.index(IndexContext::from_path("./document.md")).await?;
@@ -69,12 +70,13 @@
 //! ```rust,no_run
 //! # use vectorless::client::{Engine, EngineBuilder, IndexContext};
 //! # #[tokio::main]
-//! # async fn main() -> vectorless::Result<()> {
+//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
 //! let client = EngineBuilder::new()
 //!     .with_workspace("./workspace")
-//!     .build()?;
+//!     .build()
+//!     .await?;
 //!
-//! let session = client.session();
+//! let session = client.session().await;
 //!
 //! // Index multiple documents
 //! let doc1 = session.index(IndexContext::from_path("./doc1.md")).await?;
@@ -91,9 +93,9 @@
 //! Monitor operation progress with events:
 //!
 //! ```rust,no_run
-//! # use vectorless::client::{Engine, EngineBuilder, EventEmitter, events::IndexEvent};
+//! # use vectorless::client::{Engine, EngineBuilder, EventEmitter, IndexEvent};
 //! # #[tokio::main]
-//! # async fn main() -> vectorless::Result<()> {
+//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
 //! let events = EventEmitter::new()
 //!     .on_index(|e| match e {
 //!         IndexEvent::Complete { doc_id } => println!("Indexed: {}", doc_id),
@@ -102,7 +104,8 @@
 //!
 //! let client = EngineBuilder::new()
 //!     .with_events(events)
-//!     .build()?;
+//!     .build()
+//!     .await?;
 //! # Ok(())
 //! # }
 //! ```
diff --git a/src/lib.rs b/src/lib.rs
index 51657c5..49ffaf9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -67,16 +67,18 @@
 //!
 //! ```rust,no_run
 //! use vectorless::{EngineBuilder, Engine};
+//! use vectorless::client::IndexContext;
 //!
 //! #[tokio::main]
-//! async fn main() -> vectorless::Result<()> {
+//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
 //!     // Create client
-//!     let mut client = EngineBuilder::new()
+//!     let client = EngineBuilder::new()
 //!         .with_workspace("./workspace")
-//!         .build()?;
+//!         .build()
+//!         .await?;
 //!
 //!     // Index a document
-//!     let doc_id = client.index("./document.md").await?;
+//!     let doc_id = client.index(IndexContext::from_path("./document.md")).await?;
 //!
 //!     // Query with natural language
 //!     let result = client.query(&doc_id, "What is this about?").await?;
diff --git a/src/llm/fallback.rs b/src/llm/fallback.rs
index b6dac31..fb6e37c 100644
--- a/src/llm/fallback.rs
+++ b/src/llm/fallback.rs
@@ -10,7 +10,7 @@
 //!
 //! # Example
 //!
-//! ```rust
+//! ```rust,ignore
 //! use vectorless::llm::fallback::{FallbackChain, FallbackConfig};
 //!
 //! let config = FallbackConfig::default();
diff --git a/src/llm/retry.rs b/src/llm/retry.rs
index 7599001..e0fdb19 100644
--- a/src/llm/retry.rs
+++ b/src/llm/retry.rs
@@ -16,7 +16,7 @@ use super::error::{LlmError, LlmResult};
 ///
 /// # Example
 ///
-/// ```rust,no_run
+/// ```rust,ignore
 /// use vectorless::llm::{RetryConfig, with_retry, LlmError, LlmResult};
 ///
 /// # #[tokio::main]
diff --git a/src/metrics/hub.rs b/src/metrics/hub.rs
index 73ab6b7..2088e25 100644
--- a/src/metrics/hub.rs
+++ b/src/metrics/hub.rs
@@ -24,7 +24,7 @@ use crate::config::MetricsConfig;
 /// # Example
 ///
 /// ```rust
-/// use vectorless::metrics::{MetricsHub, MetricsConfig};
+/// use vectorless::metrics::{MetricsHub, MetricsConfig, InterventionPoint};
 ///
 /// let config = MetricsConfig::default();
 /// let hub = MetricsHub::new(config);
diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs
index 6910497..b190fcb 100644
--- a/src/metrics/mod.rs
+++ b/src/metrics/mod.rs
@@ -33,7 +33,7 @@
 //! # Example
 //!
 //! ```rust
-//! use vectorless::metrics::{MetricsHub, MetricsConfig};
+//! use vectorless::metrics::{MetricsHub, MetricsConfig, InterventionPoint};
 //!
 //! let config = MetricsConfig::default();
 //! let hub = MetricsHub::new(config);
diff --git a/src/retrieval/strategy/llm.rs b/src/retrieval/strategy/llm.rs
index befc3eb..e83b6b7 100644
--- a/src/retrieval/strategy/llm.rs
+++ b/src/retrieval/strategy/llm.rs
@@ -34,7 +34,7 @@ struct NavigationResponse {
 /// # Example
 ///
 /// ```rust,no_run
-/// use vectorless::retriever::strategy::LlmStrategy;
+/// use vectorless::retrieval::strategy::LlmStrategy;
 /// use vectorless::llm::LlmClient;
 ///
 /// let client = LlmClient::with_defaults();
diff --git a/src/util/format.rs b/src/util/format.rs
index 059b9ed..99e821b 100644
--- a/src/util/format.rs
+++ b/src/util/format.rs
@@ -8,7 +8,7 @@
 /// # Example
 ///
 /// ```
-/// use vectorless::util::format::truncate;
+/// use vectorless::util::truncate;
 ///
 /// assert_eq!(truncate("hello world", 8), "hello...");
 /// assert_eq!(truncate("hi", 10), "hi");
@@ -53,7 +53,7 @@ pub fn truncate_words(text: &str, max_len: usize) -> String {
 /// # Example
 ///
 /// ```
-/// use vectorless::util::format::format_number;
+/// use vectorless::util::format_number;
 ///
 /// assert_eq!(format_number(1000), "1,000");
 /// assert_eq!(format_number(1234567), "1,234,567");
@@ -78,7 +78,7 @@ pub fn format_number(n: usize) -> String {
 /// # Example
 ///
 /// ```
-/// use vectorless::util::format::format_bytes;
+/// use vectorless::util::format_bytes;
 ///
 /// assert_eq!(format_bytes(500), "500 B");
 /// assert_eq!(format_bytes(1024), "1.0 KB");
@@ -106,7 +106,7 @@ pub fn format_bytes(bytes: usize) -> String {
 /// # Example
 ///
 /// ```
-/// use vectorless::util::format::format_percent;
+/// use vectorless::util::format_percent;
 ///
 /// assert_eq!(format_percent(0.5), "50.0%");
 /// assert_eq!(format_percent(0.123), "12.3%");
diff --git a/src/util/timing.rs b/src/util/timing.rs
index 5b3cabb..b885849 100644
--- a/src/util/timing.rs
+++ b/src/util/timing.rs
@@ -10,7 +10,7 @@ use std::time::{Duration, Instant};
 /// # Example
 ///
 /// ```rust
-/// use vectorless::util::timing::Timer;
+/// use vectorless::util::Timer;
 ///
 /// let timer = Timer::start("indexing");
 /// // ... do work ...

From 32157c5c817c815d2ea84c898b5a35f52e2bd42f Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 5 Apr 2026 21:12:52 +0800
Subject: [PATCH 5/6] docs: enhance documentation with detailed guides and
 updates

- rewrite main README with comprehensive overview of Vectorless features
- add dual pipeline guide explaining index and retrieval architecture
- create quick start guide with installation and basic usage examples
- update RFCs table with implemented parser statuses
- reorganize documentation structure with clear sections
- add architecture diagrams and detailed pipeline explanations
- include practical examples for different document formats

feat: rename JudgeStage to EvaluateStage for clarity

- rename JudgeStage to EvaluateStage to better reflect functionality
- update all references in orchestrator, pipeline, and stage implementations
- change metric field from judge_time_ms to evaluate_time_ms
- update stage names in pipeline context and execution flow
- maintain preserved names like LlmJudge for specific components
- update documentation and examples to use new naming convention

docs(rfcs): add RFC-0003 for evaluate stage naming

- document rationale for renaming JudgeStage to EvaluateStage
- explain motivation behind choosing "Evaluate" over "Judge"
- specify changes to file names, struct names, and references
- preserve existing LlmJudge terminology where appropriate
- update pipeline flow diagrams and implementation steps
---
 docs/README.md                                |  42 +++--
 docs/guides/README.md                         |   4 +-
 docs/guides/dual-pipeline.md                  | 152 ++++++++++++++++++
 docs/guides/quick-start.md                    |  89 ++++++++++
 docs/rfcs/0003-evaluate-stage.md              |  52 ++++++
 examples/retrieve.rs                          |   4 +-
 src/retrieval/mod.rs                          |  10 +-
 src/retrieval/pipeline/context.rs             |  10 +-
 src/retrieval/pipeline/mod.rs                 |   4 +-
 src/retrieval/pipeline/orchestrator.rs        |   2 +-
 src/retrieval/pipeline/outcome.rs             |   2 +-
 src/retrieval/pipeline/stage.rs               |   2 +-
 src/retrieval/pipeline_retriever.rs           |  16 +-
 src/retrieval/reference.rs                    |   2 +-
 .../stages/{judge.rs => evaluate.rs}          |  42 ++---
 src/retrieval/stages/mod.rs                   |   8 +-
 16 files changed, 376 insertions(+), 65 deletions(-)
 create mode 100644 docs/guides/dual-pipeline.md
 create mode 100644 docs/guides/quick-start.md
 create mode 100644 docs/rfcs/0003-evaluate-stage.md
 rename src/retrieval/stages/{judge.rs => evaluate.rs} (93%)

diff --git a/docs/README.md b/docs/README.md
index 6380009..9e0b009 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,27 +1,42 @@
 # Vectorless Documentation
 
-## Brand Assets
+Welcome to the Vectorless documentation.
 
-Logos and icons for use in README, website, and presentations.
+## What is Vectorless?
 
-- [assets/brand/](assets/brand/) — Logo variants (light, dark, horizontal, icon)
+Vectorless is a **reasoning-native document intelligence engine** that uses LLM-powered tree navigation instead of vector embeddings. It preserves document structure and uses intelligent navigation to find relevant content.
 
-## Design Documents
+## Key Features
 
-System architecture and core mechanism documentation.
+- **Dual Pipeline Architecture** - Separate Index and Retrieval pipelines
+- **Pilot System** - LLM-guided navigation with layered fallback
+- **Multi-Strategy Retrieval** - Keyword, LLM, and Structure-aware strategies
+- **Zero Infrastructure** - No vector database, no embeddings
+- **Multi-Format Support** - Markdown, PDF, DOCX, HTML
 
-| Document | Description |
-|----------|-------------|
-| [architecture.svg](design/architecture.svg) | System architecture diagram |
-| [recovery.md](design/recovery.md) | Graceful degradation and error recovery strategy |
+## Getting Started
 
-## Development Guides
+- [Quick Start Guide](guides/quick-start.md) - Get up and running in 5 minutes
 
-Guides for using and contributing to Vectorless.
+## Guides
 
 | Guide | Description |
 |-------|-------------|
-| [deployment.md](guides/deployment.md) | Production deployment checklist |
+| [Quick Start](guides/quick-start.md) | Get up and running quickly |
+| [Dual Pipeline](guides/dual-pipeline.md) | Understand Index + Retrieval pipelines |
+| [Pilot System](guides/pilot-system.md) | LLM-guided navigation |
+| [Multi-Strategy Retrieval](guides/multi-strategy.md) | Keyword, LLM, Structure strategies |
+
+## Design Documents
+
+System architecture and core mechanism documentation.
+
+| Document | Description |
+|----------|-------------|
+| [pilot.md](design/pilot.md) | Pilot system design |
+| [content-aggregation.md](design/content-aggregation.md) | Content aggregation design |
+| [client-module.md](design/client-module.md) | Client API design |
+| [v3.md](design/v3.md) | Version 3 architecture |
 
 ## RFCs (Feature Proposals)
 
@@ -29,7 +44,8 @@ Detailed design documents for new features.
 
 | RFC | Title | Status |
 |-----|-------|--------|
-| [0001](rfcs/0001-docx-parser.md) | DOCX Parser | Proposed |
+| [0001](rfcs/0001-docx-parser.md) | DOCX Parser | Implemented |
+| [0002](rfcs/0002-html-parser.md) | HTML Parser | Implemented |
 
 ### RFC Process
 
diff --git a/docs/guides/README.md b/docs/guides/README.md
index b5aaad7..aee856a 100644
--- a/docs/guides/README.md
+++ b/docs/guides/README.md
@@ -1 +1,3 @@
-# Guide
\ No newline at end of file
+# Vectorless Guides
+
+Practical guides for using Vectorless effectively.
diff --git a/docs/guides/dual-pipeline.md b/docs/guides/dual-pipeline.md
new file mode 100644
index 0000000..d16ef1a
--- /dev/null
+++ b/docs/guides/dual-pipeline.md
@@ -0,0 +1,152 @@
+# Understanding the Dual Pipeline
+
+Vectorless uses a **dual pipeline architecture** that separates document processing from retrieval. This design enables efficient indexing and intelligent retrieval.
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                           Vectorless Architecture                            │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                             │
+│   ┌─────────────────────────────┐     ┌─────────────────────────────┐      │
+│   │      INDEX PIPELINE         │     │    RETRIEVAL PIPELINE       │      │
+│   │                             │     │                             │      │
+│   │  Parse → Build → Enrich    │     │  Analyze → Plan → Search    │      │
+│   │    ↓       ↓       ↓       │     │     ↓        ↓       ↓      │      │
+│   │  Enhance → Optimize →      │     │  Evaluate (Sufficiency)     │      │
+│   │    Persist                  │     │     ↑_____________│         │      │
+│   │                             │     │     │ (NeedMoreData)│         │      │
+│   └─────────────────────────────┘     └─────────────────────────────┘      │
+│                 │                                    ▲                      │
+│                 └──────────── Workspace ─────────────┘                       │
+│                                                                             │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+## Index Pipeline
+
+The Index Pipeline processes documents and builds a searchable tree structure.
+
+### Stages
+
+| Stage | Purpose |
+|-------|---------|
+| **Parse** | Extract content from file (MD, PDF, DOCX, HTML) |
+| **Build** | Construct hierarchical document tree |
+| **Enrich** | Add metadata, TOC, references |
+| **Enhance** | Generate summaries (optional) |
+| **Optimize** | Prune, compress, optimize tree |
+| **Persist** | Save to workspace storage |
+
+### Example
+
+```rust
+// Index pipeline is triggered automatically
+let doc_id = engine.index(IndexContext::from_path("./manual.md")).await?;
+
+// With summary generation
+let doc_id = engine.index(
+    IndexContext::from_path("./manual.md")
+        .with_options(IndexOptions::new().with_summaries())
+).await?;
+```
+
+## Retrieval Pipeline
+
+The Retrieval Pipeline processes queries and retrieves relevant content.
+
+### Stages
+
+| Stage | Purpose |
+|-------|---------|
+| **Analyze** | Analyze query complexity, extract keywords |
+| **Plan** | Select retrieval strategy and algorithm |
+| **Search** | Navigate tree to find candidates |
+| **Evaluate** | Check sufficiency, aggregate content |
+
+### The Evaluate Stage
+
+The Evaluate stage is crucial - it determines if retrieved content is sufficient:
+
+```text
+                    ┌─────────────┐
+                    │   Search    │
+                    └──────┬──────┘
+                           │
+                           ▼
+                    ┌─────────────┐
+                    │  Evaluate   │
+                    └──────┬──────┘
+                           │
+              ┌────────────┼────────────┐
+              │            │            │
+              ▼            ▼            ▼
+        Sufficient    PartialSufficient  Insufficient
+              │            │            │
+              ▼            ▼            ▼
+           Return      More Search    Expand Beam
+                       (1 iteration)  (2 iterations)
+```
+
+### Retrieval Strategies
+
+```rust
+// Three built-in strategies:
+
+// 1. Keyword - Fast, exact matching
+// 2. LLM - Semantic understanding via Pilot
+// 3. Structure - Hierarchy-aware navigation
+```
+
+## The Pilot System
+
+Pilot is the "brain" of the Retrieval Pipeline:
+
+- **Query Analysis**: Understands what the user is asking
+- **Context Building**: Creates navigation context from TOC
+- **Decision Making**: Decides which branches to explore
+- **Fallback**: Algorithm takes over when LLM fails
+
+See [The Pilot System](./pilot-system.md) for details.
+
+## Data Flow
+
+```
+Document ──► Index Pipeline ──► Workspace
+                                       │
+Query ──► Retrieval Pipeline ──────────┘
+                    │
+                    ▼
+              RetrievalResult
+              ├── content
+              ├── node_ids
+              ├── confidence
+              └── trace
+```
+
+## Session-Based Operations
+
+For multi-document operations, use sessions:
+
+```rust
+// Create a session
+let session = engine.session().await;
+
+// Index multiple documents
+session.index(IndexContext::from_path("./doc1.md")).await?;
+session.index(IndexContext::from_path("./doc2.md")).await?;
+
+// Query across all documents
+let results = session.query_all("What is the architecture?").await?;
+
+for result in results {
+    println!("From {}: {}", result.doc_id, result.content);
+}
+```
+
+## See Also
+
+- [Multi-Strategy Retrieval](./multi-strategy.md)
+- [Content Aggregation](./content-aggregation.md)
+- [Sufficiency Checking](./sufficiency.md)
diff --git a/docs/guides/quick-start.md b/docs/guides/quick-start.md
new file mode 100644
index 0000000..8f93ffe
--- /dev/null
+++ b/docs/guides/quick-start.md
@@ -0,0 +1,89 @@
+# Quick Start Guide
+
+Get up and running with Vectorless in 5 minutes.
+
+## Prerequisites
+
+- Rust 1.70+ installed
+- An OpenAI API key (or compatible LLM endpoint)
+
+## Installation
+
+Add to your `Cargo.toml`:
+
+```toml
+[dependencies]
+vectorless = "0.1"
+tokio = { version = "1", features = ["full"] }
+```
+
+## Basic Usage
+
+```rust
+use vectorless::{Engine, IndexContext};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // 1. Create an engine with OpenAI
+    let engine = Engine::builder()
+        .with_workspace("./workspace")
+        .with_openai(std::env::var("OPENAI_API_KEY")?)
+        .build()
+        .await?;
+
+    // 2. Index a document
+    let doc_id = engine.index(IndexContext::from_path("./manual.md")).await?;
+    println!("Indexed: {}", doc_id);
+
+    // 3. Query the document
+    let result = engine.query(&doc_id, "How do I configure authentication?").await?;
+    println!("Answer: {}", result.content);
+
+    Ok(())
+}
+```
+
+## Index from Different Sources
+
+```rust
+// From file path
+let id1 = engine.index(IndexContext::from_path("./doc.pdf")).await?;
+
+// From string content
+let html = "<html><body><h1>Title</h1><p>Content</p></body></html>";
+let id2 = engine.index(
+    IndexContext::from_content(html, vectorless::parser::DocumentFormat::Html)
+        .with_name("webpage")
+).await?;
+
+// From bytes (e.g., from HTTP response)
+let pdf_bytes = std::fs::read("./document.pdf")?;
+let id3 = engine.index(
+    IndexContext::from_bytes(pdf_bytes, vectorless::parser::DocumentFormat::Pdf)
+).await?;
+```
+
+## Index Modes
+
+```rust
+use vectorless::IndexMode;
+
+// Default: Skip if already indexed
+engine.index(IndexContext::from_path("./doc.md")).await?;
+
+// Force: Always re-index
+engine.index(
+    IndexContext::from_path("./doc.md").with_mode(IndexMode::Force)
+).await?;
+
+// Incremental: Only re-index if changed
+engine.index(
+    IndexContext::from_path("./doc.md").with_mode(IndexMode::Incremental)
+).await?;
+```
+
+## Next Steps
+
+- [Understanding the Dual Pipeline](./dual-pipeline.md) - Learn how Vectorless works
+- [Indexing Documents](./indexing.md) - Deep dive into document indexing
+- [Querying Documents](./querying.md) - Advanced query techniques
diff --git a/docs/rfcs/0003-evaluate-stage.md b/docs/rfcs/0003-evaluate-stage.md
new file mode 100644
index 0000000..4c25879
--- /dev/null
+++ b/docs/rfcs/0003-evaluate-stage.md
@@ -0,0 +1,52 @@
+# RFC-0003: Evaluate Stage Naming
+
+## Summary
+
+Rename the `JudgeStage` to `EvaluateStage` to better reflect its purpose in the retrieval pipeline.
+
+## Motivation
+
+The term "judge" implies a binary verdict, while the stage actually:
+1. Aggregates content from candidates
+2. Evaluates sufficiency levels (Sufficient, Partial, Insufficient)
+3. Can trigger additional search iterations
+4. Builds the final response
+
+"Evaluate" better captures the nuanced assessment process.
+
+## Design
+
+### Changes
+
+| Before | After |
+|--------|-------|
+| `JudgeStage` | `EvaluateStage` |
+| `judge.rs` | `evaluate.rs` |
+| `judge_time_ms` | `evaluate_time_ms` |
+| `"judge"` stage name | `"evaluate"` stage name |
+
+### Preserved Names
+
+The following are intentionally preserved:
+- `LlmJudge` - The sufficiency checker that "judges" sufficiency
+- `llm_judge` - Field name for the LLM-based sufficiency judge
+
+These remain as they specifically make a judgment call on sufficiency.
+
+## Pipeline Flow Update
+
+```
+Before: Analyze → Plan → Search → Judge
+After:  Analyze → Plan → Search → Evaluate
+```
+
+## Implementation
+
+1. Rename `src/retrieval/stages/judge.rs` to `evaluate.rs`
+2. Update struct name from `JudgeStage` to `EvaluateStage`
+3. Update all references in pipeline and retriever code
+4. Update documentation and diagrams
+
+## Status
+
+**Implemented** - 2026-04-05
diff --git a/examples/retrieve.rs b/examples/retrieve.rs
index a8a86be..a05a88a 100644
--- a/examples/retrieve.rs
+++ b/examples/retrieve.rs
@@ -20,7 +20,7 @@ use vectorless::document::DocumentTree;
 use vectorless::retrieval::{
     PipelineRetriever, RetrieveOptions, Retriever, StrategyPreference,
     pipeline::RetrievalOrchestrator,
-    stages::{AnalyzeStage, JudgeStage, PlanStage, SearchStage},
+    stages::{AnalyzeStage, EvaluateStage, PlanStage, SearchStage},
 };
 
 #[tokio::main]
@@ -119,7 +119,7 @@ async fn demo_orchestrator(tree: &DocumentTree) -> vectorless::Result<()> {
         .stage(AnalyzeStage::new())
         .stage(PlanStage::new())
         .stage(SearchStage::new())
-        .stage(JudgeStage::new());
+        .stage(EvaluateStage::new());
 
     println!("Orchestrator stages:");
     if let Ok(names) = orchestrator.stage_names() {
diff --git a/src/retrieval/mod.rs b/src/retrieval/mod.rs
index d746792..de9c009 100644
--- a/src/retrieval/mod.rs
+++ b/src/retrieval/mod.rs
@@ -15,7 +15,7 @@
 //! │                    RetrievalOrchestrator                         │
 //! │                                                                  │
 //! │  ┌─────────┐    ┌─────────┐    ┌─────────┐    ┌─────────┐      │
-//! │  │ Analyze │───►│  Plan   │───►│ Search  │───►│  Judge  │      │
+//! │  │ Analyze │───►│  Plan   │───►│ Search  │───►│  Evaluate  │      │
 //! │  └─────────┘    └─────────┘    └─────────┘    └─────────┘      │
 //! │                                     ▲              │             │
 //! │                                     └──────────────┘             │
@@ -30,19 +30,19 @@
 //! | [`AnalyzeStage`] | Query analysis (complexity, keywords, targets) |
 //! | [`PlanStage`] | Strategy and algorithm selection |
 //! | [`SearchStage`] | Execute tree search |
-//! | [`JudgeStage`] | Sufficiency checking |
+//! | [`EvaluateStage`] | Sufficiency checking |
 //!
 //! # Quick Start
 //!
 //! ```rust,ignore
 //! use vectorless::retrieval::pipeline::{RetrievalOrchestrator, RetrievalStage};
-//! use vectorless::retrieval::stages::{AnalyzeStage, PlanStage, SearchStage, JudgeStage};
+//! use vectorless::retrieval::stages::{AnalyzeStage, PlanStage, SearchStage, EvaluateStage};
 //!
 //! let orchestrator = RetrievalOrchestrator::new()
 //!     .stage(AnalyzeStage::new())
 //!     .stage(PlanStage::new())
 //!     .stage(SearchStage::new())
-//!     .stage(JudgeStage::new());
+//!     .stage(EvaluateStage::new());
 //!
 //! let response = orchestrator.execute(tree, query, options).await?;
 //! ```
@@ -85,7 +85,7 @@ pub use pipeline::{
 pub use pipeline::PipelineContext as StageContext;
 
 // Stage exports
-pub use stages::{AnalyzeStage, JudgeStage, PlanStage, SearchStage};
+pub use stages::{AnalyzeStage, EvaluateStage, PlanStage, SearchStage};
 
 // Strategy exports
 pub use strategy::{
diff --git a/src/retrieval/pipeline/context.rs b/src/retrieval/pipeline/context.rs
index 3537e7a..9bf02ae 100644
--- a/src/retrieval/pipeline/context.rs
+++ b/src/retrieval/pipeline/context.rs
@@ -144,8 +144,8 @@ pub struct RetrievalMetrics {
     pub plan_time_ms: u64,
     /// Time spent in search stage (ms).
     pub search_time_ms: u64,
-    /// Time spent in judge stage (ms).
-    pub judge_time_ms: u64,
+    /// Time spent in evaluate stage (ms).
+    pub evaluate_time_ms: u64,
     /// Total time (ms).
     pub total_time_ms: u64,
     /// Number of nodes visited.
@@ -175,7 +175,7 @@ impl RetrievalMetrics {
         self.analyze_time_ms += other.analyze_time_ms;
         self.plan_time_ms += other.plan_time_ms;
         self.search_time_ms += other.search_time_ms;
-        self.judge_time_ms += other.judge_time_ms;
+        self.evaluate_time_ms += other.evaluate_time_ms;
         self.nodes_visited += other.nodes_visited;
         self.llm_calls += other.llm_calls;
         self.tokens_used = other.tokens_used; // Use latest
@@ -228,7 +228,7 @@ pub struct PipelineContext {
     /// Number of search iterations performed.
     pub search_iterations: usize,
 
-    // ============ Judge Stage Output ============
+    // ============ Evaluate Stage Output ============
     /// Current sufficiency level.
     pub sufficiency: SufficiencyLevel,
     /// Accumulated content from candidates.
@@ -331,7 +331,7 @@ impl PipelineContext {
             "analyze" => self.metrics.analyze_time_ms += duration_ms,
             "plan" => self.metrics.plan_time_ms += duration_ms,
             "search" => self.metrics.search_time_ms += duration_ms,
-            "judge" => self.metrics.judge_time_ms += duration_ms,
+            "evaluate" => self.metrics.evaluate_time_ms += duration_ms,
             _ => {}
         }
 
diff --git a/src/retrieval/pipeline/mod.rs b/src/retrieval/pipeline/mod.rs
index 25365b7..5351b76 100644
--- a/src/retrieval/pipeline/mod.rs
+++ b/src/retrieval/pipeline/mod.rs
@@ -15,7 +15,7 @@
 //!
 //! ```text
 //! ┌─────────┐    ┌─────────┐    ┌─────────┐    ┌─────────┐
-//! │ Analyze │───►│  Plan   │───►│ Search  │───►│  Judge  │
+//! │ Analyze │───►│  Plan   │───►│ Search  │───►│  Evaluate  │
 //! │ (分析)  │    │ (规划)  │    │ (搜索)  │    │ (判断)  │
 //! └─────────┘    └─────────┘    └─────────┘    └─────────┘
 //! ```
@@ -39,7 +39,7 @@
 //!     .stage(AnalyzeStage::new())
 //!     .stage(PlanStage::new())
 //!     .stage(SearchStage::new())
-//!     .stage(JudgeStage::new());
+//!     .stage(EvaluateStage::new());
 //!
 //! let response = orchestrator.execute(tree, query, options).await?;
 //! ```
diff --git a/src/retrieval/pipeline/orchestrator.rs b/src/retrieval/pipeline/orchestrator.rs
index 0dce81f..e4d5433 100644
--- a/src/retrieval/pipeline/orchestrator.rs
+++ b/src/retrieval/pipeline/orchestrator.rs
@@ -67,7 +67,7 @@ pub struct ExecutionGroup {
 ///     .stage(AnalyzeStage::new())
 ///     .stage(PlanStage::new())
 ///     .stage(SearchStage::new())
-///     .stage(JudgeStage::new())
+///     .stage(EvaluateStage::new())
 ///     .with_pilot(pilot)
 ///     .with_max_backtracks(3);
 ///
diff --git a/src/retrieval/pipeline/outcome.rs b/src/retrieval/pipeline/outcome.rs
index c069976..d005b61 100644
--- a/src/retrieval/pipeline/outcome.rs
+++ b/src/retrieval/pipeline/outcome.rs
@@ -17,7 +17,7 @@ pub enum StageOutcome {
 
     /// Need more data, go back to Search stage for another iteration.
     ///
-    /// This enables incremental retrieval where the Judge stage can
+    /// This enables incremental retrieval where the Evaluate stage can
     /// request additional search rounds if current results are insufficient.
     NeedMoreData {
         /// Additional beam width to add for next search iteration.
diff --git a/src/retrieval/pipeline/stage.rs b/src/retrieval/pipeline/stage.rs
index 285c717..6773638 100644
--- a/src/retrieval/pipeline/stage.rs
+++ b/src/retrieval/pipeline/stage.rs
@@ -97,7 +97,7 @@ pub trait RetrievalStage: Send + Sync {
 
     /// Whether this stage can trigger backtracking.
     ///
-    /// Stages like Judge that evaluate sufficiency may need to
+    /// Stages like Evaluate that evaluate sufficiency may need to
     /// trigger additional search iterations.
     fn can_backtrack(&self) -> bool {
         false
diff --git a/src/retrieval/pipeline_retriever.rs b/src/retrieval/pipeline_retriever.rs
index b725464..2947704 100644
--- a/src/retrieval/pipeline_retriever.rs
+++ b/src/retrieval/pipeline_retriever.rs
@@ -12,7 +12,7 @@ use std::sync::Arc;
 use super::content::ContentAggregatorConfig;
 use super::pipeline::RetrievalOrchestrator;
 use super::retriever::{CostEstimate, Retriever, RetrieverError, RetrieverResult};
-use super::stages::{AnalyzeStage, JudgeStage, PlanStage, SearchStage};
+use super::stages::{AnalyzeStage, EvaluateStage, PlanStage, SearchStage};
 use super::strategy::LlmStrategy;
 use super::types::{RetrieveOptions, RetrieveResponse};
 use crate::document::DocumentTree;
@@ -26,7 +26,7 @@ use crate::retrieval::pilot::{LlmPilot, PilotConfig};
 /// - Analyze stage: Query complexity and keyword extraction
 /// - Plan stage: Strategy and algorithm selection
 /// - Search stage: Tree traversal
-/// - Judge stage: Sufficiency checking
+/// - Evaluate stage: Sufficiency checking
 ///
 /// # Example
 ///
@@ -81,7 +81,7 @@ impl PipelineRetriever {
 
     /// Set content aggregator configuration.
     ///
-    /// When enabled, the Judge stage uses precision-focused content
+    /// When enabled, the Evaluate stage uses precision-focused content
     /// aggregation with relevance scoring and token budget control.
     pub fn with_content_config(mut self, config: ContentAggregatorConfig) -> Self {
         self.content_config = Some(config);
@@ -113,16 +113,16 @@ impl PipelineRetriever {
         }
         orchestrator = orchestrator.stage(search_stage);
 
-        // Add judge stage with optional content aggregator
-        let mut judge_stage = JudgeStage::new();
+        // Add evaluate stage with optional content aggregator
+        let mut evaluate_stage = EvaluateStage::new();
         if let Some(ref client) = self.llm_client {
-            judge_stage = judge_stage.with_llm_judge(client.clone());
+            evaluate_stage = evaluate_stage.with_llm_judge(client.clone());
         }
         // Configure content aggregator if provided
         if let Some(ref config) = self.content_config {
-            judge_stage = judge_stage.with_content_aggregator(config.clone());
+            evaluate_stage = evaluate_stage.with_content_aggregator(config.clone());
         }
-        orchestrator = orchestrator.stage(judge_stage);
+        orchestrator = orchestrator.stage(evaluate_stage);
 
         orchestrator
     }
diff --git a/src/retrieval/reference.rs b/src/retrieval/reference.rs
index cb42940..dcdcadd 100644
--- a/src/retrieval/reference.rs
+++ b/src/retrieval/reference.rs
@@ -31,7 +31,7 @@
 //!
 //! Reference following is triggered when:
 //! 1. Search finds content containing references
-//! 2. Judge determines current content is insufficient
+//! 2. Evaluate determines current content is insufficient
 //! 3. Pilot suggests following a specific reference
 //!
 //! # Example
diff --git a/src/retrieval/stages/judge.rs b/src/retrieval/stages/evaluate.rs
similarity index 93%
rename from src/retrieval/stages/judge.rs
rename to src/retrieval/stages/evaluate.rs
index 8378371..d6d8a21 100644
--- a/src/retrieval/stages/judge.rs
+++ b/src/retrieval/stages/evaluate.rs
@@ -1,7 +1,7 @@
 // Copyright (c) 2026 vectorless developers
 // SPDX-License-Identifier: Apache-2.0
 
-//! Judge Stage - Sufficiency checking.
+//! Evaluate Stage - Sufficiency checking.
 //!
 //! This stage evaluates whether the collected content is sufficient
 //! to answer the query, and can trigger additional search iterations.
@@ -17,7 +17,7 @@ use crate::retrieval::sufficiency::{LlmJudge, SufficiencyChecker, ThresholdCheck
 use crate::retrieval::types::{RetrievalResult, RetrieveResponse, SufficiencyLevel};
 use crate::util::estimate_tokens;
 
-/// Judge Stage - evaluates retrieval sufficiency.
+/// Evaluate Stage - evaluates retrieval sufficiency.
 ///
 /// This stage:
 /// 1. Aggregates content from candidates
@@ -32,12 +32,12 @@ use crate::util::estimate_tokens;
 /// # Example
 ///
 /// ```rust,ignore
-/// let stage = JudgeStage::new()
+/// let stage = EvaluateStage::new()
 ///     .with_llm_judge(llm_client)
 ///     .with_max_iterations(3)
 ///     .with_content_aggregator(ContentAggregatorConfig::default());
 /// ```
-pub struct JudgeStage {
+pub struct EvaluateStage {
     threshold_checker: ThresholdChecker,
     llm_judge: Option<LlmJudge>,
     max_iterations: usize,
@@ -46,14 +46,14 @@ pub struct JudgeStage {
     content_aggregator: Option<ContentAggregator>,
 }
 
-impl Default for JudgeStage {
+impl Default for EvaluateStage {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl JudgeStage {
-    /// Create a new judge stage.
+impl EvaluateStage {
+    /// Create a new evaluate stage.
     pub fn new() -> Self {
         Self {
             threshold_checker: ThresholdChecker::new(),
@@ -207,10 +207,10 @@ impl JudgeStage {
             return SufficiencyLevel::Sufficient;
         }
 
-        // Use LLM judge if available and enabled
+        // Use LLM evaluate if available and enabled
         if self.use_llm_judge {
-            if let Some(ref judge) = self.llm_judge {
-                return judge.check(&ctx.query, &ctx.accumulated_content, ctx.token_count);
+            if let Some(ref evaluate) = self.llm_judge {
+                return evaluate.check(&ctx.query, &ctx.accumulated_content, ctx.token_count);
             }
         }
 
@@ -301,9 +301,9 @@ impl JudgeStage {
 }
 
 #[async_trait]
-impl RetrievalStage for JudgeStage {
+impl RetrievalStage for EvaluateStage {
     fn name(&self) -> &'static str {
-        "judge"
+        "evaluate"
     }
 
     fn depends_on(&self) -> Vec<&'static str> {
@@ -315,7 +315,7 @@ impl RetrievalStage for JudgeStage {
     }
 
     fn failure_policy(&self) -> FailurePolicy {
-        FailurePolicy::skip() // Can skip if judge fails
+        FailurePolicy::skip() // Can skip if evaluate fails
     }
 
     fn can_backtrack(&self) -> bool {
@@ -343,7 +343,7 @@ impl RetrievalStage for JudgeStage {
         info!("Sufficiency level: {:?}", ctx.sufficiency);
 
         // Update metrics
-        ctx.metrics.judge_time_ms += start.elapsed().as_millis() as u64;
+        ctx.metrics.evaluate_time_ms += start.elapsed().as_millis() as u64;
         ctx.metrics.tokens_used = tokens;
 
         // 3. Decide next action based on sufficiency
@@ -384,7 +384,7 @@ impl RetrievalStage for JudgeStage {
             }
         };
 
-        // Update LLM call count if we used LLM judge
+        // Update LLM call count if we used LLM evaluate
         if self.use_llm_judge && self.llm_judge.is_some() {
             ctx.metrics.llm_calls += 1;
         }
@@ -398,21 +398,21 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_judge_stage_creation() {
-        let stage = JudgeStage::new();
+    fn test_evaluate_stage_creation() {
+        let stage = EvaluateStage::new();
         assert!(stage.llm_judge.is_none());
         assert!(!stage.use_llm_judge);
     }
 
     #[test]
-    fn test_judge_stage_dependencies() {
-        let stage = JudgeStage::new();
+    fn test_evaluate_stage_dependencies() {
+        let stage = EvaluateStage::new();
         assert_eq!(stage.depends_on(), vec!["search"]);
     }
 
     #[test]
-    fn test_judge_can_backtrack() {
-        let stage = JudgeStage::new();
+    fn test_evaluate_can_backtrack() {
+        let stage = EvaluateStage::new();
         assert!(stage.can_backtrack());
     }
 }
diff --git a/src/retrieval/stages/mod.rs b/src/retrieval/stages/mod.rs
index 7d66cf7..e6cd13b 100644
--- a/src/retrieval/stages/mod.rs
+++ b/src/retrieval/stages/mod.rs
@@ -8,12 +8,12 @@
 //! - [`AnalyzeStage`] - Query analysis (complexity, keywords, target sections)
 //! - [`PlanStage`] - Strategy and algorithm selection
 //! - [`SearchStage`] - Execute tree search
-//! - [`JudgeStage`] - Sufficiency checking
+//! - [`EvaluateStage`] - Sufficiency checking
 //!
 //! # Stage Flow
 //!
 //! ```text
-//! Analyze → Plan → Search → Judge
+//! Analyze → Plan → Search → Evaluate
 //!                    ↑         │
 //!                    └─────────┘ (NeedMoreData)
 //! ```
@@ -23,11 +23,11 @@
 //! Implement [`RetrievalStage`](crate::retrieval::pipeline::RetrievalStage) to create custom stages.
 
 mod analyze;
-mod judge;
+mod evaluate;
 mod plan;
 mod search;
 
 pub use analyze::AnalyzeStage;
-pub use judge::JudgeStage;
+pub use evaluate::EvaluateStage;
 pub use plan::PlanStage;
 pub use search::SearchStage;

From 6784ad46f654e0296df99fd209feeaeca8e7f2e9 Mon Sep 17 00:00:00 2001
From: zTgx <747674262@qq.com>
Date: Sun, 5 Apr 2026 21:14:26 +0800
Subject: [PATCH 6/6] chore(release): bump version from 0.1.13 to 0.1.14

- Update package version in Cargo.toml from 0.1.13 to 0.1.14
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index f757265..e6b4761 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vectorless"
-version = "0.1.13"
+version = "0.1.14"
 edition = "2024"
 authors = ["zTgx <beautifularea@gmail.com>"]
 description = "Hierarchical, reasoning-native document intelligence engine"