Conversation
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
Benchmarks: FineWeb NVMeSummary
Detailed Results Table
|
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
Benchmarks: FineWeb S3Summary
Detailed Results Table
|
Benchmarks: Statistical and Population GeneticsSummary
Detailed Results Table
|
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
Benchmarks: Clickbench on NVMESummary
Detailed Results Table
|
Polar Signals Profiling ResultsLatest Run
Previous Runs (1)
Powered by Polar Signals Cloud |
Benchmarks: Random AccessSummary
|
| if: matrix.remote_storage == null || github.event.pull_request.head.repo.fork == true | ||
| shell: bash | ||
| env: | ||
| VORTEX_USE_SCAN_API: "1" |
There was a problem hiding this comment.
this should be 0 or remove the old one?
| pub struct LayoutReaderDataSource { | ||
| reader: LayoutReaderRef, | ||
| session: VortexSession, | ||
| split_size: u64, |
Benchmarks: CompressionSummary
Detailed Results Table
|
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Deploying vortex-bench with
|
| Latest commit: |
7aaf8b6
|
| Status: | ✅ Deploy successful! |
| Preview URL: | https://b9e83af8.vortex-93b.pages.dev |
| Branch Preview URL: | https://ngates-scan-api.vortex-93b.pages.dev |
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
| loop { | ||
| // Try to pull from the current child's split stream. | ||
| if let Some(ref mut child_stream) = current_stream { | ||
| match child_stream.next().await { | ||
| Some(Ok(split)) => { | ||
| if let Some(ref mut s) = state | ||
| && let Some(ref mut limit) = s.remaining_limit | ||
| { | ||
| let est = split.row_count_estimate(); | ||
| *limit = limit.saturating_sub(est.upper.unwrap_or(est.lower)); | ||
| } | ||
| return Some((Ok(split), (state, current_stream))); | ||
| } | ||
| Some(Err(e)) => { | ||
| return Some((Err(e), (None, None))); | ||
| } | ||
| None => { | ||
| // Current child exhausted, move to next. | ||
| drop(current_stream.take()); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| let s = state.as_mut()?; | ||
|
|
||
| if s.remaining_limit.is_some_and(|l| l == 0) { | ||
| return None; | ||
| } | ||
|
|
||
| // Get the next data source. | ||
| let source = match s.next_source().await { | ||
| Ok(Some(source)) => source, | ||
| Ok(None) => return None, | ||
| Err(e) => return Some((Err(e), (None, None))), | ||
| }; | ||
|
|
||
| if source.dtype() != &s.dtype { | ||
| return Some(( | ||
| Err(vortex_err!( | ||
| "MultiDataSource dtype mismatch: expected {}, got {}", | ||
| s.dtype, | ||
| source.dtype() | ||
| )), | ||
| (None, None), | ||
| )); | ||
| } | ||
|
|
||
| let mut child_request = s.request.clone(); | ||
| child_request.limit = s.remaining_limit; | ||
| let child_scan = match source.scan(child_request) { | ||
| Ok(scan) => scan, | ||
| Err(e) => return Some((Err(e), (None, None))), | ||
| }; | ||
|
|
||
| current_stream = Some(child_scan.splits()); | ||
| } |
There was a problem hiding this comment.
This can be extracted into a helper method that takes &mut current_stream, then you can do:
impl DataSourceScan for MultiDataSourceScan {
fn splits(self: Box<Self>) -> SplitStream {
stream::unfold(
(*self, None::<SplitStream>),
|(mut scan, mut current_stream)| async move {
let result = scan.next_split(&mut current_stream).await?;
Some((result, (scan, current_stream)))
},
)
.boxed()
}
}Doesn't seem like you need it to be Some(*self)
| _ => Precision::Absent, | ||
| }; | ||
|
|
||
| let byte_size_est = self.data_source.row_count_estimate(); |
There was a problem hiding this comment.
This is probably wrong?
Experiment to use the Scan API from DuckDB and DataFusion integrations.