vectorless/vectorless.example.toml at main · vectorlessflow/vectorless · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# Vectorless Configuration Example
# Copy this file to vectorless.toml and fill in your API keys
#
# All configuration is loaded from this file only.
# No environment variables are used - this ensures explicit, traceable configuration.

# ============================================================================
# LLM Configuration (Unified)
# ============================================================================
#
# The LLM pool allows configuring different models for different purposes:
# - summary: Used for generating document summaries during indexing
# - retrieval: Used for retrieval decisions and content evaluation
# - pilot: Used for intelligent navigation guidance
#
# Each client can have its own model, endpoint, and settings.

[llm]
# Default API key (used by all clients unless overridden per-client)
api_key = "sk-your-api-key-here"

# Summary client - generates document summaries during indexing
# Use a fast, cheap model for bulk processing
[llm.summary]
model = "gpt-4o-mini"
endpoint = "https://api.openai.com/v1"
max_tokens = 200
temperature = 0.0
# api_key = "sk-specific-key-for-summary"  # Optional: override default

# Retrieval client - used for retrieval decisions and content evaluation
# Can use a more capable model for better decisions
[llm.retrieval]
model = "gpt-4o"
endpoint = "https://api.openai.com/v1"
max_tokens = 100
temperature = 0.0
# api_key = "sk-specific-key-for-retrieval"  # Optional: override default

# Pilot client - used for intelligent navigation guidance
# Use a fast model for quick navigation decisions
[llm.pilot]
model = "gpt-4o-mini"
endpoint = "https://api.openai.com/v1"
max_tokens = 300
temperature = 0.0
# api_key = "sk-specific-key-for-pilot"  # Optional: override default

# Retry configuration (applies to all LLM calls)
[llm.retry]
max_attempts = 3
initial_delay_ms = 500
max_delay_ms = 30000
multiplier = 2.0
retry_on_rate_limit = true

# Throttle/rate limiting configuration (applies to all LLM calls)
[llm.throttle]
max_concurrent_requests = 10
requests_per_minute = 500
enabled = true
semaphore_enabled = true

# Fallback configuration (applies to all LLM calls)
[llm.fallback]
enabled = true
models = ["gpt-4o-mini", "glm-4-flash"]
# Alternative endpoints for fallback
# endpoints = [
#     "https://api.openai.com/v1",
#     "https://api.z.ai/api/paas/v4"
# ]
on_rate_limit = "retry_then_fallback"
on_timeout = "retry_then_fallback"
on_all_failed = "return_error"

# ============================================================================
# Metrics Configuration (Unified)
# ============================================================================

[metrics]
enabled = true
storage_path = "./workspace/metrics"
retention_days = 30

[metrics.llm]
track_tokens = true
track_latency = true
track_cost = true
cost_per_1k_input_tokens = 0.00015   # gpt-4o-mini pricing
cost_per_1k_output_tokens = 0.0006

[metrics.pilot]
track_decisions = true
track_accuracy = true
track_feedback = true

[metrics.retrieval]
track_paths = true
track_scores = true
track_iterations = true
track_cache = true

# ============================================================================
# Pilot Configuration
# ============================================================================

[pilot]
mode = "Balanced"  # Aggressive | Balanced | Conservative | AlgorithmOnly
guide_at_start = true
guide_at_backtrack = true

[pilot.budget]
max_tokens_per_query = 2000
max_tokens_per_call = 500
max_calls_per_query = 5
max_calls_per_level = 2
hard_limit = true

[pilot.intervention]
fork_threshold = 3
score_gap_threshold = 0.15
low_score_threshold = 0.3
max_interventions_per_level = 2

[pilot.feedback]
enabled = true
storage_path = "./workspace/feedback"
learning_rate = 0.1
min_samples_for_learning = 10

# ============================================================================
# Retrieval Configuration
# ============================================================================

[retrieval]
model = "gpt-4o"
endpoint = "https://api.openai.com/v1"
top_k = 3
max_tokens = 1000
temperature = 0.0

[retrieval.search]
top_k = 5
beam_width = 3
max_iterations = 10
min_score = 0.1

[retrieval.sufficiency]
min_tokens = 500
target_tokens = 2000
max_tokens = 4000
min_content_length = 200
confidence_threshold = 0.7

[retrieval.cache]
max_entries = 1000
ttl_secs = 3600

[retrieval.strategy]
exploration_weight = 1.414
similarity_threshold = 0.5
high_similarity_threshold = 0.8
low_similarity_threshold = 0.3

# Hybrid Strategy Configuration (BM25 + LLM refinement)
# Recommended for most use cases - reduces LLM calls while maintaining accuracy
[retrieval.strategy.hybrid]
enabled = true
pre_filter_ratio = 0.3        # Keep top 30% of BM25 candidates
min_candidates = 2            # Minimum candidates to pass to LLM
max_candidates = 5            # Maximum candidates for LLM refinement
auto_accept_threshold = 0.85  # BM25 score for auto-accept (skip LLM)
auto_reject_threshold = 0.15  # BM25 score for auto-reject (skip LLM)
bm25_weight = 0.4             # Weight for BM25 score in final scoring
llm_weight = 0.6              # Weight for LLM score in final scoring

# Cross-Document Retrieval Configuration
# For searching across multiple documents simultaneously
[retrieval.strategy.cross_document]
enabled = true
max_documents = 10            # Maximum documents to search
max_results_per_doc = 3       # Maximum results per document
max_total_results = 10        # Maximum total results
min_score = 0.3               # Minimum score threshold
merge_strategy = "TopK"       # TopK | BestPerDocument | WeightedByRelevance
parallel_search = true        # Search documents in parallel

# Page-Range Strategy Configuration
# For filtering by page range before retrieval
[retrieval.strategy.page_range]
enabled = true
include_boundary_nodes = true # Include nodes spanning across boundary
expand_context_pages = 0      # Expand range by N pages for context
min_overlap_ratio = 0.1       # Minimum overlap ratio for node inclusion

[retrieval.content]
enabled = true
token_budget = 4000
min_relevance_score = 0.2
scoring_strategy = "hybrid"  # keyword | bm25 | hybrid
output_format = "markdown"
include_scores = false
hierarchical_min_per_level = 0.1
deduplicate = true
dedup_threshold = 0.9

# ============================================================================
# Multi-turn Retrieval Configuration
# ============================================================================

[retrieval.multiturn]
enabled = true
max_sub_queries = 3
decomposition_model = "gpt-4o-mini"
aggregation_strategy = "merge"  # merge | rank | synthesize

# ============================================================================
# Reference Following Configuration
# ============================================================================

[retrieval.reference]
enabled = true
max_depth = 3
max_references = 10
follow_pages = true
follow_tables_figures = true
min_confidence = 0.5

# ============================================================================
# Storage Configuration
# ============================================================================

[storage]
workspace_dir = "./workspace"
cache_size = 100
atomic_writes = true
file_lock = true
checksum_enabled = true

[storage.compression]
enabled = false
algorithm = "gzip"
level = 6

# ============================================================================
# Indexer Configuration
# ============================================================================

[indexer]
subsection_threshold = 300
max_segment_tokens = 3000
max_summary_tokens = 200
min_summary_tokens = 20