Skip to content
Merged

Dev #17

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vectorless"
version = "0.1.16"
version = "0.1.17"
edition = "2024"
authors = ["zTgx <beautifularea@gmail.com>"]
description = "Hierarchical, reasoning-native document intelligence engine"
Expand Down
192 changes: 192 additions & 0 deletions src/config/types/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,163 @@ pub struct StrategyConfig {
/// Low similarity threshold for "explore" decision.
#[serde(default = "default_low_similarity_threshold")]
pub low_similarity_threshold: f32,

/// Hybrid strategy configuration (BM25 + LLM refinement).
#[serde(default)]
pub hybrid: HybridStrategyConfig,

/// Cross-document strategy configuration.
#[serde(default)]
pub cross_document: CrossDocumentStrategyConfig,

/// Page-range strategy configuration.
#[serde(default)]
pub page_range: PageRangeStrategyConfig,
}

/// Hybrid strategy configuration (BM25 pre-filter + LLM refinement).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HybridStrategyConfig {
/// Enable hybrid strategy.
#[serde(default = "default_true")]
pub enabled: bool,

/// BM25 pre-filter: keep top N% of candidates.
#[serde(default = "default_pre_filter_ratio")]
pub pre_filter_ratio: f32,

/// Minimum candidates to pass to LLM.
#[serde(default = "default_min_candidates")]
pub min_candidates: usize,

/// Maximum candidates for LLM refinement.
#[serde(default = "default_max_candidates")]
pub max_candidates: usize,

/// BM25 score for auto-accept (skip LLM).
#[serde(default = "default_auto_accept_threshold")]
pub auto_accept_threshold: f32,

/// BM25 score for auto-reject (skip LLM).
#[serde(default = "default_auto_reject_threshold")]
pub auto_reject_threshold: f32,

/// Weight for BM25 score in final scoring.
#[serde(default = "default_bm25_weight")]
pub bm25_weight: f32,

/// Weight for LLM score in final scoring.
#[serde(default = "default_llm_weight")]
pub llm_weight: f32,
}

fn default_true() -> bool { true }
fn default_pre_filter_ratio() -> f32 { 0.3 }
fn default_min_candidates() -> usize { 2 }
fn default_max_candidates() -> usize { 5 }
fn default_auto_accept_threshold() -> f32 { 0.85 }
fn default_auto_reject_threshold() -> f32 { 0.15 }
fn default_bm25_weight() -> f32 { 0.4 }
fn default_llm_weight() -> f32 { 0.6 }

impl Default for HybridStrategyConfig {
fn default() -> Self {
Self {
enabled: true,
pre_filter_ratio: default_pre_filter_ratio(),
min_candidates: default_min_candidates(),
max_candidates: default_max_candidates(),
auto_accept_threshold: default_auto_accept_threshold(),
auto_reject_threshold: default_auto_reject_threshold(),
bm25_weight: default_bm25_weight(),
llm_weight: default_llm_weight(),
}
}
}

/// Cross-document strategy configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrossDocumentStrategyConfig {
/// Enable cross-document strategy.
#[serde(default = "default_true")]
pub enabled: bool,

/// Maximum documents to search.
#[serde(default = "default_max_documents")]
pub max_documents: usize,

/// Maximum results per document.
#[serde(default = "default_max_results_per_doc")]
pub max_results_per_doc: usize,

/// Maximum total results.
#[serde(default = "default_max_total_results")]
pub max_total_results: usize,

/// Minimum score threshold.
#[serde(default = "default_min_score")]
pub min_score: f32,

/// Merge strategy: TopK, BestPerDocument, WeightedByRelevance.
#[serde(default = "default_merge_strategy")]
pub merge_strategy: String,

/// Search documents in parallel.
#[serde(default = "default_true")]
pub parallel_search: bool,
}

fn default_max_documents() -> usize { 10 }
fn default_max_results_per_doc() -> usize { 3 }
fn default_max_total_results() -> usize { 10 }
fn default_min_score() -> f32 { 0.3 }
fn default_merge_strategy() -> String { "TopK".to_string() }

impl Default for CrossDocumentStrategyConfig {
fn default() -> Self {
Self {
enabled: true,
max_documents: default_max_documents(),
max_results_per_doc: default_max_results_per_doc(),
max_total_results: default_max_total_results(),
min_score: default_min_score(),
merge_strategy: default_merge_strategy(),
parallel_search: true,
}
}
}

/// Page-range strategy configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PageRangeStrategyConfig {
/// Enable page-range strategy.
#[serde(default = "default_true")]
pub enabled: bool,

/// Include nodes that span across the boundary.
#[serde(default = "default_true")]
pub include_boundary_nodes: bool,

/// Expand range by N pages for context.
#[serde(default)]
pub expand_context_pages: usize,

/// Minimum overlap ratio for node inclusion.
#[serde(default = "default_min_overlap_ratio")]
pub min_overlap_ratio: f32,
}

fn default_min_overlap_ratio() -> f32 { 0.1 }

impl Default for PageRangeStrategyConfig {
fn default() -> Self {
Self {
enabled: true,
include_boundary_nodes: true,
expand_context_pages: 0,
min_overlap_ratio: default_min_overlap_ratio(),
}
}
}

fn default_exploration_weight() -> f32 {
Expand All @@ -362,6 +519,9 @@ impl Default for StrategyConfig {
similarity_threshold: default_similarity_threshold(),
high_similarity_threshold: default_high_similarity_threshold(),
low_similarity_threshold: default_low_similarity_threshold(),
hybrid: HybridStrategyConfig::default(),
cross_document: CrossDocumentStrategyConfig::default(),
page_range: PageRangeStrategyConfig::default(),
}
}
}
Expand Down Expand Up @@ -453,5 +613,37 @@ mod tests {
let config = StrategyConfig::default();
assert!((config.exploration_weight - 1.414).abs() < 0.001);
assert_eq!(config.similarity_threshold, 0.5);
assert!(config.hybrid.enabled);
assert!(config.cross_document.enabled);
assert!(config.page_range.enabled);
}

#[test]
fn test_hybrid_strategy_config_defaults() {
let config = HybridStrategyConfig::default();
assert!(config.enabled);
assert!((config.pre_filter_ratio - 0.3).abs() < f32::EPSILON);
assert_eq!(config.min_candidates, 2);
assert_eq!(config.max_candidates, 5);
assert!((config.auto_accept_threshold - 0.85).abs() < f32::EPSILON);
}

#[test]
fn test_cross_document_strategy_config_defaults() {
let config = CrossDocumentStrategyConfig::default();
assert!(config.enabled);
assert_eq!(config.max_documents, 10);
assert_eq!(config.max_results_per_doc, 3);
assert_eq!(config.merge_strategy, "TopK");
assert!(config.parallel_search);
}

#[test]
fn test_page_range_strategy_config_defaults() {
let config = PageRangeStrategyConfig::default();
assert!(config.enabled);
assert!(config.include_boundary_nodes);
assert_eq!(config.expand_context_pages, 0);
assert!((config.min_overlap_ratio - 0.1).abs() < f32::EPSILON);
}
}
5 changes: 4 additions & 1 deletion src/retrieval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ pub use stages::{AnalyzeStage, EvaluateStage, PlanStage, SearchStage};

// Strategy exports
pub use strategy::{
KeywordStrategy, LlmStrategy, RetrievalStrategy, SemanticStrategy, StrategyCapabilities,
CrossDocumentConfig, CrossDocumentStrategy, DocumentEntry, DocumentId, DocumentResult,
HybridConfig, HybridStrategy, KeywordStrategy, LlmStrategy, MergeStrategy,
PageRange, PageRangeConfig, PageRangeStrategy, RetrievalStrategy, SemanticStrategy,
StrategyCapabilities, StrategyCost,
};

// Search exports
Expand Down
49 changes: 48 additions & 1 deletion src/retrieval/stages/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ use crate::retrieval::pipeline::{
use crate::retrieval::search::{
BeamSearch, GreedySearch, SearchConfig as SearchAlgConfig, SearchTree,
};
use crate::retrieval::strategy::{KeywordStrategy, LlmStrategy, RetrievalStrategy};
use crate::retrieval::strategy::{
HybridConfig, HybridStrategy, KeywordStrategy, LlmStrategy, RetrievalStrategy,
};
use crate::retrieval::types::StrategyPreference;

/// Search Stage - executes tree search with optional Pilot guidance.
Expand Down Expand Up @@ -52,6 +54,7 @@ pub struct SearchStage {
keyword_strategy: KeywordStrategy,
llm_strategy: Option<Arc<LlmStrategy>>,
semantic_strategy: Option<Arc<dyn RetrievalStrategy>>,
hybrid_strategy: Option<Arc<dyn RetrievalStrategy>>,
/// Pilot for navigation guidance (optional).
pilot: Option<Arc<dyn Pilot>>,
}
Expand All @@ -69,6 +72,7 @@ impl SearchStage {
keyword_strategy: KeywordStrategy::new(),
llm_strategy: None,
semantic_strategy: None,
hybrid_strategy: None,
pilot: None,
}
}
Expand All @@ -95,6 +99,26 @@ impl SearchStage {
self
}

/// Add hybrid strategy (BM25 + LLM refinement).
///
/// If no LLM strategy is set, creates one from the provided LLM strategy.
pub fn with_hybrid_strategy(mut self, strategy: Arc<dyn RetrievalStrategy>) -> Self {
self.hybrid_strategy = Some(strategy);
self
}

/// Configure hybrid strategy with custom config using the LLM strategy.
pub fn with_hybrid_config(mut self, config: HybridConfig) -> Self {
if let Some(ref llm) = self.llm_strategy {
// Clone the LlmStrategy and box it
let llm_boxed: Box<dyn RetrievalStrategy> = Box::new((**llm).clone());
self.hybrid_strategy = Some(Arc::new(
HybridStrategy::new(llm_boxed).with_config(config)
));
}
self
}

/// Check if Pilot is available and active.
pub fn has_pilot(&self) -> bool {
self.pilot.as_ref().map(|p| p.is_active()).unwrap_or(false)
Expand Down Expand Up @@ -127,6 +151,29 @@ impl SearchStage {
Arc::new(self.keyword_strategy.clone())
}
}
StrategyPreference::ForceHybrid => {
if let Some(ref strategy) = self.hybrid_strategy {
info!("Using Hybrid strategy");
strategy.clone()
} else if let Some(ref llm) = self.llm_strategy {
info!("Using Hybrid strategy (auto-created from LLM)");
let llm_boxed: Box<dyn RetrievalStrategy> = Box::new((**llm).clone());
Arc::new(HybridStrategy::new(llm_boxed))
} else {
warn!("Hybrid strategy requested but no LLM available, falling back to Keyword");
Arc::new(self.keyword_strategy.clone())
}
}
StrategyPreference::ForceCrossDocument | StrategyPreference::ForcePageRange => {
// These require special setup, fall back to hybrid or keyword
if let Some(ref strategy) = self.hybrid_strategy {
info!("Using Hybrid strategy as fallback for {:?})", preference);
strategy.clone()
} else {
warn!("{:?} requires special configuration, falling back to Keyword", preference);
Arc::new(self.keyword_strategy.clone())
}
}
StrategyPreference::Auto => {
// Default to keyword, let plan stage decide
Arc::new(self.keyword_strategy.clone())
Expand Down
Loading