archebase · zhexuany · Feb 26, 2026 · Feb 25, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -23,7 +23,7 @@ roboflow-distributed = { path = "crates/roboflow-distributed", version = "0.2.0"
 
 # External dependencies
 # Pinned to specific commit for reproducible builds
-robocodec = { git = "https://github.com/archebase/robocodec", rev = "019baae541f1cb1d89439e9940d5fbef98f38898" }
+robocodec = { git = "https://github.com/archebase/robocodec", rev = "b27acf0e5881c38f8cc7f09e25c15787a440ae13" }
 chrono = { version = "0.4", features = ["serde"] }
 async-trait = "0.1"
 tokio = { version = "1.40", features = ["rt-multi-thread", "sync"] }

diff --git a/crates/roboflow-dataset/src/sources/bag.rs b/crates/roboflow-dataset/src/sources/bag.rs
@@ -48,6 +48,7 @@ impl BagSource {
         }
     }
 
+    #[cfg(test)]
     fn is_cloud_url(&self) -> bool {
         self.path.starts_with("s3://") || self.path.starts_with("oss://")
     }
@@ -195,12 +196,6 @@ impl Source for BagSource {
             self.path = path.clone();
         }
 
-        if self.is_cloud_url() {
-            return Err(SourceError::InvalidConfig(
-                "Cloud URLs not yet supported for BagSource. Use local files.".to_string(),
-            ));
-        }
-
         let (metadata, rx, handle) =
             initialize_threaded_source(&self.path, "bag-decoder", |path, meta_tx, msg_tx| {
                 spawn_local_decoder(path, meta_tx, msg_tx, "bag")
@@ -308,6 +303,7 @@ impl BagSourceBatched {
         }
     }
 
+    #[cfg(test)]
     fn is_cloud_url(&self) -> bool {
         self.path.starts_with("s3://") || self.path.starts_with("oss://")
     }
@@ -470,12 +466,6 @@ impl Source for BagSourceBatched {
             self.path = path.clone();
         }
 
-        if self.is_cloud_url() {
-            return Err(SourceError::InvalidConfig(
-                "Batched mode not supported for cloud URLs yet".to_string(),
-            ));
-        }
-
         let batch_size = self.batch_size;
         let (metadata, rx, handle) = initialize_threaded_source_batched(
             &self.path,
@@ -601,6 +591,7 @@ impl BagSourceBlocking {
         }
     }
 
+    #[cfg(test)]
     fn is_cloud_url(&self) -> bool {
         self.path.starts_with("s3://") || self.path.starts_with("oss://")
     }
@@ -709,12 +700,6 @@ impl Source for BagSourceBlocking {
             self.path = path.clone();
         }
 
-        if self.is_cloud_url() {
-            return Err(SourceError::InvalidConfig(
-                "Blocking mode not supported for cloud URLs".to_string(),
-            ));
-        }
-
         let batch_size = self.batch_size;
         let (tx, rx) = crossbeam_channel::bounded(16);
         let (meta_tx, meta_rx) = tokio::sync::oneshot::channel();
@@ -1025,3 +1010,32 @@ mod tests {
         assert!(!source.is_cloud_url());
     }
 }
+
+#[cfg(test)]
+mod s3_url_tests {
+    //! Tests verifying S3/OSS URLs are accepted (not rejected).
+    //! These tests verify that the artificial "Cloud URLs not yet supported"
+    //! restriction has been removed.
+
+    use super::*;
+
+    #[test]
+    fn test_bag_source_accepts_s3_url() {
+        let source = BagSource::new("s3://bucket/file.bag");
+        assert!(source.is_ok(), "BagSource should accept S3 URLs");
+        let source = source.unwrap();
+        assert!(source.is_cloud_url());
+    }
+
+    #[test]
+    fn test_bag_source_batched_accepts_s3_url() {
+        let source = BagSourceBatched::new("s3://bucket/file.bag", 100);
+        assert!(source.is_ok(), "BagSourceBatched should accept S3 URLs");
+    }
+
+    #[test]
+    fn test_bag_source_blocking_accepts_s3_url() {
+        let source = BagSourceBlocking::new("s3://bucket/file.bag", 100);
+        assert!(source.is_ok(), "BagSourceBlocking should accept S3 URLs");
+    }
+}
diff --git a/crates/roboflow-dataset/src/sources/mcap.rs b/crates/roboflow-dataset/src/sources/mcap.rs
@@ -48,6 +48,7 @@ impl McapSource {
         }
     }
 
+    #[cfg(test)]
     fn is_cloud_url(&self) -> bool {
         self.path.starts_with("s3://") || self.path.starts_with("oss://")
     }
@@ -195,12 +196,6 @@ impl Source for McapSource {
             self.path = path.clone();
         }
 
-        if self.is_cloud_url() {
-            return Err(SourceError::InvalidConfig(
-                "Cloud URLs not yet supported for McapSource. Use local files.".to_string(),
-            ));
-        }
-
         let (metadata, rx, handle) =
             initialize_threaded_source(&self.path, "mcap-decoder", |path, meta_tx, msg_tx| {
                 spawn_local_decoder(path, meta_tx, msg_tx, "mcap")

diff --git a/crates/roboflow-distributed/src/batch/controller.rs b/crates/roboflow-distributed/src/batch/controller.rs
@@ -9,7 +9,7 @@
 
 use super::key::{BatchIndexKeys, BatchKeys, WorkUnitKeys};
 use super::spec::BatchSpec;
-use super::status::{BatchPhase, BatchStatus, DiscoveryStatus};
+use super::status::{BatchPhase, BatchStatus, DiscoveryStatus, FailedWorkUnit};
 use super::work_unit::{WorkUnit, WorkUnitStatus};
 use crate::tikv::{TikvClient, TikvError};
 
@@ -373,12 +373,33 @@ impl BatchController {
         let mut completed = 0u32;
         let mut failed = 0u32;
         let mut processing = 0u32;
+        let mut failed_work_units = Vec::new();
 
         for (key, value) in work_units {
             match bincode::deserialize::<WorkUnit>(&value) {
                 Ok(unit) => match unit.status {
                     WorkUnitStatus::Complete => completed += 1,
-                    WorkUnitStatus::Failed | WorkUnitStatus::Dead => failed += 1,
+                    WorkUnitStatus::Failed | WorkUnitStatus::Dead => {
+                        failed += 1;
+                        // Collect error details from failed work units
+                        let error = unit
+                            .error
+                            .as_ref()
+                            .cloned()
+                            .unwrap_or_else(|| "Unknown error".to_string());
+                        let source_file = unit
+                            .files
+                            .first()
+                            .map(|f| f.url.clone())
+                            .unwrap_or_else(|| "unknown".to_string());
+                        failed_work_units.push(FailedWorkUnit {
+                            id: unit.id.clone(),
+                            source_file,
+                            error,
+                            retries: unit.attempts,
+                            failed_at: unit.updated_at,
+                        });
+                    }
-                    WorkUnitStatus::Failed | WorkUnitStatus::Dead => {
-                        failed += 1;
-                        // Collect error details from failed work units
-                        if let Some(error) = &unit.error {
-                            let source_file = unit
-                                .files
-                                .first()
-                                .map(|f| f.url.clone())
-                                .unwrap_or_else(|| "unknown".to_string());
-                            failed_work_units.push(FailedWorkUnit {
-                                id: unit.id.clone(),
-                                source_file,
-                                error: error.clone(),
-                                retries: unit.attempts,
-                                failed_at: unit.updated_at,
-                            });
-                        }
-                    }
+                    WorkUnitStatus::Failed | WorkUnitStatus::Dead => {
+                        failed += 1;
+                        // Collect error details from failed work units
+                        let error = unit
+                            .error
+                            .clone()
+                            .unwrap_or_else(|| "No error details available".to_string());
+                        let source_file = unit
+                            .files
+                            .first()
+                            .map(|f| f.url.clone())
+                            .unwrap_or_else(|| "unknown".to_string());
+                        failed_work_units.push(FailedWorkUnit {
+                            id: unit.id.clone(),
+                            source_file,
+                            error,
+                            retries: unit.attempts,
+                            failed_at: unit.updated_at,
+                        });
+                    }
-                    WorkUnitStatus::Failed | WorkUnitStatus::Dead => {
-                        failed += 1;
-                        // Collect error details from failed work units
-                        if let Some(error) = &unit.error {
-                            let source_file = unit
-                                .files
-                                .first()
-                                .map(|f| f.url.clone())
-                                .unwrap_or_else(|| "unknown".to_string());
-                            failed_work_units.push(FailedWorkUnit {
-                                id: unit.id.clone(),
-                                source_file,
-                                error: error.clone(),
-                                retries: unit.attempts,
-                                failed_at: unit.updated_at,
-                            });
-                        }
-                    }
+                    WorkUnitStatus::Failed | WorkUnitStatus::Dead => {
+                        failed += 1;
+                        // Collect error details from failed work units
+                        let error = unit
+                            .error
+                            .clone()
+                            .unwrap_or_else(|| "No error details available".to_string());
+                        let source_file = unit
+                            .files
+                            .first()
+                            .map(|f| f.url.clone())
+                            .unwrap_or_else(|| "unknown".to_string());
+                        failed_work_units.push(FailedWorkUnit {
+                            id: unit.id.clone(),
+                            source_file,
+                            error,
+                            retries: unit.attempts,
+                            failed_at: unit.updated_at,
+                        });
+                    }
                     WorkUnitStatus::Processing => processing += 1,
                     _ => {}
                 },
@@ -416,6 +437,7 @@ impl BatchController {
         status.files_completed = completed;
         status.files_failed = failed;
         status.files_active = processing;
+        status.failed_work_units = failed_work_units;
 
         if matches!(status.phase, BatchPhase::Failed)
             && failed == 0

diff --git a/crates/roboflow-distributed/tests/test_controller_new.rs b/crates/roboflow-distributed/tests/test_controller_new.rs
@@ -0,0 +1,142 @@
+// SPDX-FileCopyrightText: 2026 ArcheBase
+//
+// SPDX-License-Identifier: MulanPSL-2.0
+
+//! Integration test for failed_work_units population in batch status.
+
+use roboflow_distributed::batch::{
+    BatchController, BatchIndexKeys, BatchKeys, BatchPhase, BatchSpec, BatchStatus, WorkFile,
+    WorkUnit, WorkUnitKeys, WorkUnitStatus,
+};
+use roboflow_distributed::tikv::client::TikvClient;
+use std::sync::Arc;
+
+fn unique_batch_id(prefix: &str) -> String {
+    format!("jobs:{}-{}", prefix, uuid::Uuid::new_v4())
+}
+
+async fn get_tikv_client() -> Option<Arc<TikvClient>> {
+    match TikvClient::from_env().await {
+        Ok(client) => Some(Arc::new(client)),
+        Err(e) => {
+            println!("Skipping test: TiKV not available: {}", e);
+            None
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_reconcile_populates_failed_work_units_with_error_details() {
+    //! Verify that reconcile populates failed_work_units with error details.
+    //!
+    //! This tests the fix for the issue where work unit failures showed no error
+    //! details in batch status output.
+    let tikv = match get_tikv_client().await {
+        Some(client) => client,
+        None => return,
+    };
+    let controller = BatchController::with_client(tikv.clone());
+
+    let batch_id = unique_batch_id("test-failed-work-units");
+    let batch_name = batch_id.strip_prefix("jobs:").unwrap();
+
+    // Create batch
+    let spec = BatchSpec::new(
+        batch_name,
+        vec!["s3://test/file.bag".to_string()],
+        "s3://output/".to_string(),
+    );
+    controller.submit_batch(&spec).await.unwrap();
+
+    // Create work units: one complete, one failed with error
+    let complete_unit_id = "unit-complete";
+    let failed_unit_id = "unit-failed";
+    let error_message = "Test error: codec failure";
+
+    // Create complete work unit
+    let mut complete_unit = WorkUnit::with_id(
+        complete_unit_id.to_string(),
+        batch_id.to_string(),
+        vec![WorkFile::new("s3://test/file1.bag".to_string(), 1024)],
+        "s3://output/".to_string(),
+        "config-hash".to_string(),
+    );
+    complete_unit.status = WorkUnitStatus::Complete;
+
+    // Create failed work unit with error
+    let mut failed_unit = WorkUnit::with_id(
+        failed_unit_id.to_string(),
+        batch_id.to_string(),
+        vec![WorkFile::new("s3://test/file2.bag".to_string(), 2048)],
+        "s3://output/".to_string(),
+        "config-hash".to_string(),
+    );
+    failed_unit.status = WorkUnitStatus::Dead;
+    failed_unit.error = Some(error_message.to_string());
+    failed_unit.attempts = 3;
+
+    // Store work units in TiKV
+    let complete_key = WorkUnitKeys::unit(&batch_id, complete_unit_id);
+    let failed_key = WorkUnitKeys::unit(&batch_id, failed_unit_id);
+    tikv.put(
+        complete_key.clone(),
+        bincode::serialize(&complete_unit).unwrap(),
+    )
+    .await
+    .unwrap();
+    tikv.put(
+        failed_key.clone(),
+        bincode::serialize(&failed_unit).unwrap(),
+    )
+    .await
+    .unwrap();
+
+    // Transition batch to Running phase
+    let mut status = BatchStatus::new();
+    status.transition_to(BatchPhase::Running);
+    status.set_work_units_total(2);
+    let status_key = BatchKeys::status(&batch_id);
+    tikv.put(status_key.clone(), bincode::serialize(&status).unwrap())
+        .await
+        .unwrap();
+
+    // Trigger reconciliation using public API
+    let result = controller.reconcile_batch_id(&batch_id).await;
+    assert!(result.is_ok(), "Reconciliation should succeed");
+
+    // Get updated status
+    let updated_status = controller
+        .get_batch_status(&batch_id)
+        .await
+        .unwrap()
+        .unwrap();
+
+    // Verify failed_work_units is populated
+    assert_eq!(
+        updated_status.failed_work_units.len(),
+        1,
+        "Should have one failed work unit"
+    );
+
+    let failed = &updated_status.failed_work_units[0];
+    assert_eq!(failed.id, failed_unit_id);
+    assert_eq!(failed.source_file, "s3://test/file2.bag");
+    assert_eq!(failed.error, error_message);
+    assert_eq!(failed.retries, 3);
+
+    // Verify counts
+    assert_eq!(updated_status.work_units_completed, 1);
+    assert_eq!(updated_status.work_units_failed, 1);
+
+    // Cleanup
+    let _ = tikv.delete(BatchKeys::spec(&batch_id)).await;
+    let _ = tikv.delete(BatchKeys::status(&batch_id)).await;
+    let _ = tikv.delete(complete_key).await;
+    let _ = tikv.delete(failed_key).await;
+    let _ = tikv
+        .delete(BatchIndexKeys::phase(BatchPhase::Pending, &batch_id))
+        .await;
+    let _ = tikv
+        .delete(BatchIndexKeys::phase(BatchPhase::Running, &batch_id))
+        .await;
+}