diff --git a/cmd/keystone-edge/main.go b/cmd/keystone-edge/main.go index d3acfe6..b5f1664 100644 --- a/cmd/keystone-edge/main.go +++ b/cmd/keystone-edge/main.go @@ -35,24 +35,30 @@ var ( ) func main() { - // Load .env file - if err := godotenv.Load(); err != nil { - logger.Printf("[SERVER] Failed to load .env file: %v", err) - } - - // Command line flags showVersion := flag.Bool("version", false, "Show version information") configPath := flag.String("config", "/etc/keystone-edge/config.toml", "Configuration file path") flag.Parse() - // Show version if *showVersion { fmt.Printf("Keystone Edge %s (built: %s)\n", version, buildTime) os.Exit(0) } - // Initialize logger - logger.Init(logger.DefaultOptions()) + logFile, err := os.OpenFile("keystone-edge.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0600) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to open keystone-edge.log: %v\n", err) + os.Exit(1) + } + defer func() { + _ = logFile.Close() + }() + + logger.InitWithWriter(logFile, logger.DefaultOptions()) + + if err := godotenv.Load(); err != nil { + logger.Printf("[SERVER] Failed to load .env file: %v", err) + } + logger.Printf("[SERVER] Starting Keystone Edge %s", version) logger.Printf("[SERVER] Config file: %s", *configPath) diff --git a/docs/designs/production-units.md b/docs/designs/production-units.md index f97752d..9ec52a6 100644 --- a/docs/designs/production-units.md +++ b/docs/designs/production-units.md @@ -65,8 +65,10 @@ Organization ── owns ──► Order ── has many ──► Batch ── This document does not restate full table schemas; it only defines key field semantics (see the migration file for details). - **Order** - - `target_count`: the maximum number of Tasks expected to be generated for the order (currently used to cap `POST /tasks`). - - `completed_count`: a derived statistic (based on the number of completed Tasks). + - `target_count`: the desired number of **completed** Tasks for the order (see §6.1). + - `task_count`: a derived statistic: `COUNT(tasks)` under the order (non-deleted; includes all statuses). + - `completed_count`: a derived statistic: number of Tasks with `status='completed'` (non-deleted). + - `cancelled_count` / `failed_count`: derived statistics from Tasks (non-deleted). - **Batch** - `batch_id`: a human-readable ID (for display/traceability). - `episode_count`: the number of persisted Episodes (see 3.2). @@ -104,13 +106,23 @@ This document does not restate full table schemas; it only defines key field sem | Method | Path | Notes | |------|------|------| | GET | `/batches` | List (filters: `order_id/workstation_id/status/limit/offset`) | +| POST | `/batches` | **Create Batch + Tasks atomically** (`task_groups`) | | GET | `/batches/:id` | Detail | -| PATCH | `/batches/:id` | Status only; allows `pending -> active/cancelled`, `active -> completed/cancelled` | +| GET | `/batches/:id/tasks` | List tasks under a batch | +| POST | `/batches/:id/tasks` | Declaratively adjust task quantities (`task_groups`) | +| PATCH | `/batches/:id` | **Cancel only** (`pending/active -> cancelled`) | +| POST | `/batches/:id/recall` | Recall (`active/completed -> recalled`) | | DELETE | `/batches/:id` | Soft delete (only allowed when `status=cancelled`) | **Design constraints:** -- There is currently no `POST /batches`. Batches are created/reused implicitly during Task generation (see 5.3). +- **Cancellation semantics (PATCH)**: `PATCH /batches/:id` only supports transitioning to `cancelled`. It cascades cancellation to tasks in `pending/ready/in_progress` under the batch, and best-effort notifies Axon recorder: + - `ready` tasks → recorder `clear` + - `in_progress` tasks → recorder `cancel {task_id}` +- **Automatic advancement**: + - `pending -> active`: automatic (triggered when a task under the batch reaches a terminal state; see §6.2). + - `active -> completed`: automatic (when **all** non-deleted tasks are terminal: `completed/failed/cancelled`; see §6.2). +- **Recall semantics (POST)**: recall is a separate endpoint (`POST /batches/:id/recall`), not a `PATCH` status update. --- @@ -118,37 +130,66 @@ This document does not restate full table schemas; it only defines key field sem #### 5.3.1 Create (`POST /tasks`) -`POST /tasks` is the entry point for creating Tasks per **(order + workstation)**: +`POST /tasks` exists for backwards compatibility. It creates Tasks per **(order + workstation)**: - **Request fields**: `order_id`, `sop_id`, `subscene_id`, `workstation_id`, optional `quantity` (default 1, range 1..1000) -- **Quantity constraint**: total Tasks under the same Order must not exceed `orders.target_count`. - **Batch association (implicit)**: - Prefer reusing a batch under the same `(order_id, workstation_id)` with status `pending/active`; - Otherwise create a new `pending` batch. +**Quantity constraint (current implementation):** + +- `POST /tasks`: caps by **existing task rows** (non-deleted) under the order: `existing_tasks + quantity <= target_count`. +- `POST /batches`: caps by **completed tasks only** under the order: `completed_count + requested_total_quantity <= target_count`. + +This difference is a known inconsistency (see §8). + #### 5.3.2 Query and config | Method | Path | Notes | |------|------|------| | GET | `/tasks` | List (filters: `workstation_id/status/limit/offset`) | | GET | `/tasks/:id` | Detail (includes `episode` if linked) | -| PUT | `/tasks/:id` | Status update (restricted transitions) | +| PUT | `/tasks/:id` | Status update (restricted transitions; see §6.2) | | GET | `/tasks/:id/config` | Generate recorder config (requires workstation robot + collector bindings) | --- ## 6. State machines (design constraints + current implementation) -### 6.1 Task states +### 6.1 Order states + +- **State set**: `created` | `in_progress` | `paused` | `completed` | `cancelled` +- **Auto-advance rules (completed-only)**: + - `created -> in_progress`: when the order has **at least one** `completed` task. + - `in_progress -> completed`: when `completed_count == target_count`. +- **Order completion side-effects (current implementation)**: + - When auto-advancing to `completed`, Keystone finalizes any still-open batches for this order: + - Cancels runnable tasks (`pending/ready/in_progress`) under batches in `pending/active`. + - Marks those batches `completed`. + - Best-effort notifies Axon recorder (clear/cancel) for affected `ready/in_progress` tasks. + +### 6.2 Task states - **State set**: `pending` | `ready` | `in_progress` | `completed` | `failed` | `cancelled` - **Prepare (pending→ready)**: triggered by UI/scheduler (currently via `PUT /tasks/:id`). -- **Complete (→completed)**: set after Episode persistence via Transfer Verified ACK (current implementation). +- **Run (ready→in_progress)**: triggered by UI/device workflow (currently via `PUT /tasks/:id`). +- **Transfer ACK**: + - On verified upload ACK, Keystone marks task `in_progress -> completed` (only if currently `in_progress`). + - On `upload_failed`, Keystone marks task `in_progress -> failed`. +- **Revert to pending (ready/in_progress→pending)**: used for recovery when Transfer disconnects (to avoid stuck runnable tasks). -### 6.2 Batch states +### 6.3 Batch states `pending` | `active` | `completed` | `cancelled` | `recalled` -Currently `PATCH /batches/:id` only supports limited transitions (see 5.2) to control the batch lifecycle. + +**Current transition rules:** + +- **Manual cancellation**: `pending/active -> cancelled` via `PATCH /batches/:id` (and cascade-cancel tasks). +- **Manual recall**: `active/completed -> recalled` via `POST /batches/:id/recall` (and labels Episodes). +- **Automatic advancement**: + - `pending -> active`: when a task under the batch reaches a terminal state (`completed` or `failed`). + - `active -> completed`: when **all** non-deleted tasks under the batch are terminal (`completed/failed/cancelled`). --- @@ -173,7 +214,7 @@ When the device reports `upload_complete`, Keystone runs the Verified ACK flow: - **Idempotent**: if an Episode already exists for this `task_id`, do not insert again - Insert into `episodes` (persist denormalized fields such as `batch_id/order_id/scene_id/...`) - `batches.episode_count += 1` (only when a new Episode is inserted) - - Update `tasks.status` to **`completed`** (and set `completed_at`) + - Update `tasks.status` to **`completed`** (and set `completed_at`) **only when current status is `in_progress`** 3. **Send `upload_ack`** to the device --- @@ -182,7 +223,11 @@ When the device reports `upload_complete`, Keystone runs the Verified ACK flow: - **In-recording state**: `callbacks/start` does not persist state today; `ready -> in_progress` validation/persistence is not implemented yet. - **Failure path**: an end-to-end `failed` terminal state and error attribution are not fully implemented (callbacks/transfer need to be extended). -- **Controlled order transitions**: Order status updates currently only validate enum values; it should converge to controlled transitions aligned with Primer and linked to Task statistics. +- **Quota consistency**: + - `POST /tasks` caps by **task row count** under an order. + - `POST /batches` caps by **completed task count** under an order. + These should converge to a single definition (either “planned tasks” or “completed target”) to avoid surprising client behavior. +- **Controlled order transitions**: `PUT /orders/:id` validates enum values, but auto-advance also exists (see §6.1). These should converge to a single, explicit policy aligned with Primer. --- diff --git a/internal/api/handlers/auth.go b/internal/api/handlers/auth.go index bf60a5b..6330fd1 100644 --- a/internal/api/handlers/auth.go +++ b/internal/api/handlers/auth.go @@ -10,6 +10,7 @@ import ( "strconv" "strings" + "archebase.com/keystone-edge/internal/logger" "github.com/gin-gonic/gin" "github.com/jmoiron/sqlx" "golang.org/x/crypto/bcrypt" @@ -59,6 +60,28 @@ func (h *AuthHandler) RegisterRoutes(r *gin.RouterGroup) { r.POST("/auth/login", h.LoginCollector) r.POST("/auth/logout", h.Logout) r.GET("/auth/me", h.Me) + r.POST("/auth/me/station/break", h.MeStationBreak) + r.POST("/auth/me/station/end-break", h.MeStationEndBreak) +} + +// requireCollectorClaims parses Bearer JWT and returns collector claims, or writes 401 and returns false. +func (h *AuthHandler) requireCollectorClaims(c *gin.Context) (*auth.Claims, bool) { + authHeader := c.GetHeader("Authorization") + if strings.TrimSpace(authHeader) == "" { + c.JSON(http.StatusUnauthorized, gin.H{"error": "missing authorization header"}) + return nil, false + } + parts := strings.SplitN(authHeader, " ", 2) + if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") || strings.TrimSpace(parts[1]) == "" { + c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid authorization header format"}) + return nil, false + } + claims, err := auth.ParseToken(parts[1], h.cfg) + if err != nil { + c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid or expired token"}) + return nil, false + } + return claims, true } // LoginCollector authenticates data collector and returns JWT access token. @@ -103,6 +126,43 @@ func (h *AuthHandler) LoginCollector(c *gin.Context) { _, _ = h.db.Exec("UPDATE data_collectors SET last_login_at = NOW() WHERE id = ?", row.ID) + // On successful login, set workstation status to active/inactive based on + // whether there is an active batch for this workstation. + // + // Best-effort only: if the lookup/update fails, do not block login. + var wsID int64 + if err := h.db.Get(&wsID, ` + SELECT id + FROM workstations + WHERE data_collector_id = ? AND deleted_at IS NULL + LIMIT 1 + `, row.ID); err == nil { + var hasActiveBatch bool + if err := h.db.Get(&hasActiveBatch, ` + SELECT EXISTS( + SELECT 1 + FROM batches + WHERE workstation_id = ? AND status = 'active' AND deleted_at IS NULL + ) + `, wsID); err == nil { + newStatus := "inactive" + if hasActiveBatch { + newStatus = "active" + } + if _, err := h.db.Exec(` + UPDATE workstations + SET status = ?, updated_at = NOW() + WHERE id = ? AND deleted_at IS NULL + `, newStatus, wsID); err != nil { + logger.Printf("[AUTH] Failed to update workstation status on login (ws=%d): %v", wsID, err) + } + } else { + logger.Printf("[AUTH] Failed to query active batch for workstation on login (ws=%d): %v", wsID, err) + } + } else if err != sql.ErrNoRows { + logger.Printf("[AUTH] Failed to query workstation for collector on login (collector=%d): %v", row.ID, err) + } + claims := auth.NewCollectorClaims(row.ID, row.OperatorID) token, err := auth.GenerateToken(claims, h.cfg) if err != nil { @@ -121,29 +181,46 @@ func (h *AuthHandler) LoginCollector(c *gin.Context) { c.JSON(http.StatusOK, resp) } -// Logout ends the current session. In this MVP it is a no-op on the server side. +// Logout acknowledges logout. The client discards the token; if a valid Bearer +// token is present, the handler best-effort sets the workstation status to offline. func (h *AuthHandler) Logout(c *gin.Context) { - // MVP logout: client drops token. Keep endpoint for symmetry. + // MVP logout: client drops token. + // + // Additionally, best-effort update workstation status to offline for the + // authenticated data collector. + authHeader := c.GetHeader("Authorization") + if strings.TrimSpace(authHeader) != "" { + parts := strings.SplitN(authHeader, " ", 2) + if len(parts) == 2 && strings.EqualFold(parts[0], "Bearer") && strings.TrimSpace(parts[1]) != "" { + if claims, err := auth.ParseToken(parts[1], h.cfg); err == nil { + var wsID int64 + if err := h.db.Get(&wsID, ` + SELECT id + FROM workstations + WHERE data_collector_id = ? AND deleted_at IS NULL + LIMIT 1 + `, claims.CollectorID); err == nil { + if _, err := h.db.Exec(` + UPDATE workstations + SET status = 'offline', updated_at = NOW() + WHERE id = ? AND deleted_at IS NULL + `, wsID); err != nil { + logger.Printf("[AUTH] Failed to update workstation status on logout (ws=%d): %v", wsID, err) + } + } else if err != sql.ErrNoRows { + logger.Printf("[AUTH] Failed to query workstation for collector on logout (collector=%d): %v", claims.CollectorID, err) + } + } + } + } + c.JSON(http.StatusOK, gin.H{"ok": true}) } // Me returns the current authenticated collector identity. func (h *AuthHandler) Me(c *gin.Context) { - authHeader := c.GetHeader("Authorization") - if strings.TrimSpace(authHeader) == "" { - c.JSON(http.StatusUnauthorized, gin.H{"error": "missing authorization header"}) - return - } - - parts := strings.SplitN(authHeader, " ", 2) - if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") || strings.TrimSpace(parts[1]) == "" { - c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid authorization header format"}) - return - } - - claims, err := auth.ParseToken(parts[1], h.cfg) - if err != nil { - c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid or expired token"}) + claims, ok := h.requireCollectorClaims(c) + if !ok { return } @@ -166,10 +243,164 @@ func (h *AuthHandler) Me(c *gin.Context) { return } + var wsRow struct { + ID int64 `db:"id"` + RobotID int64 `db:"robot_id"` + } + err := h.db.Get(&wsRow, ` + SELECT id, robot_id + FROM workstations + WHERE data_collector_id = ? AND deleted_at IS NULL + LIMIT 1 + `, claims.CollectorID) + var workstationID *string + var robotID *string + if err != nil { + if err != sql.ErrNoRows { + c.JSON(http.StatusInternalServerError, gin.H{"error": "database error"}) + return + } + } else { + ws := strconv.FormatInt(wsRow.ID, 10) + rb := strconv.FormatInt(wsRow.RobotID, 10) + workstationID = &ws + robotID = &rb + } + + c.JSON(http.StatusOK, gin.H{ + "collector_id": claims.CollectorID, + "operator_id": row.OperatorID, + "name": row.Name, + "role": claims.Role, + "workstation_id": workstationID, + "robot_id": robotID, + }) +} + +// MeStationBreak sets the collector's workstation status to break (unless the workstation is offline). +func (h *AuthHandler) MeStationBreak(c *gin.Context) { + claims, ok := h.requireCollectorClaims(c) + if !ok { + return + } + + var wsID int64 + err := h.db.Get(&wsID, ` + SELECT id + FROM workstations + WHERE data_collector_id = ? AND deleted_at IS NULL + LIMIT 1 + `, claims.CollectorID) + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error": "workstation not assigned"}) + return + } + if err != nil { + logger.Printf("[AUTH] MeStationBreak: failed to resolve workstation: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "database error"}) + return + } + + var cur string + if err := h.db.Get(&cur, `SELECT status FROM workstations WHERE id = ? AND deleted_at IS NULL`, wsID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error": "workstation not found"}) + return + } + logger.Printf("[AUTH] MeStationBreak: failed to read status: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "database error"}) + return + } + if cur == "offline" { + c.JSON(http.StatusConflict, gin.H{"error": "workstation is offline"}) + return + } + if cur != "break" { + if _, err := h.db.Exec(` + UPDATE workstations + SET status = 'break', updated_at = NOW() + WHERE id = ? AND deleted_at IS NULL + `, wsID); err != nil { + logger.Printf("[AUTH] MeStationBreak: failed to update workstation %d: %v", wsID, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update workstation"}) + return + } + } + + c.JSON(http.StatusOK, gin.H{ + "workstation_id": strconv.FormatInt(wsID, 10), + "status": "break", + }) +} + +// MeStationEndBreak sets the collector's workstation to active or inactive depending on whether +// an active batch exists (same rule as login). Does not override offline (returns 409). +func (h *AuthHandler) MeStationEndBreak(c *gin.Context) { + claims, ok := h.requireCollectorClaims(c) + if !ok { + return + } + + var wsID int64 + err := h.db.Get(&wsID, ` + SELECT id + FROM workstations + WHERE data_collector_id = ? AND deleted_at IS NULL + LIMIT 1 + `, claims.CollectorID) + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error": "workstation not assigned"}) + return + } + if err != nil { + logger.Printf("[AUTH] MeStationEndBreak: failed to resolve workstation: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "database error"}) + return + } + + var cur string + if err := h.db.Get(&cur, `SELECT status FROM workstations WHERE id = ? AND deleted_at IS NULL`, wsID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error": "workstation not found"}) + return + } + logger.Printf("[AUTH] MeStationEndBreak: failed to read status: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "database error"}) + return + } + if cur == "offline" { + c.JSON(http.StatusConflict, gin.H{"error": "workstation is offline"}) + return + } + + var hasActiveBatch bool + if err := h.db.Get(&hasActiveBatch, ` + SELECT EXISTS( + SELECT 1 + FROM batches + WHERE workstation_id = ? AND status = 'active' AND deleted_at IS NULL + ) + `, wsID); err != nil { + logger.Printf("[AUTH] MeStationEndBreak: failed to query batches: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "database error"}) + return + } + newStatus := "inactive" + if hasActiveBatch { + newStatus = "active" + } + if _, err := h.db.Exec(` + UPDATE workstations + SET status = ?, updated_at = NOW() + WHERE id = ? AND deleted_at IS NULL + `, newStatus, wsID); err != nil { + logger.Printf("[AUTH] MeStationEndBreak: failed to update workstation %d: %v", wsID, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update workstation"}) + return + } + c.JSON(http.StatusOK, gin.H{ - "collector_id": claims.CollectorID, - "operator_id": row.OperatorID, - "name": row.Name, - "role": claims.Role, + "workstation_id": strconv.FormatInt(wsID, 10), + "status": newStatus, }) } diff --git a/internal/api/handlers/axon_rpc.go b/internal/api/handlers/axon_rpc.go index 8653c9a..7722f91 100644 --- a/internal/api/handlers/axon_rpc.go +++ b/internal/api/handlers/axon_rpc.go @@ -10,6 +10,7 @@ import ( "encoding/json" "errors" "net/http" + "strings" "time" "github.com/coder/websocket" @@ -164,6 +165,7 @@ func (h *RecorderHandler) HandleWebSocket(w http.ResponseWriter, r *http.Request rc := h.hub.NewRecorderConn(conn, deviceID, remoteIP) h.hub.Connect(deviceID, rc) defer h.hub.Disconnect(deviceID) + defer revertRunnableTasksOnDeviceDisconnect(h.db, deviceID, nil, 0, false) // #nosec G706 -- Set aside for now logger.Printf("[RECORDER] Recorder %s connected from %s", deviceID, remoteIP) @@ -206,7 +208,40 @@ func (h *RecorderHandler) Config(c *gin.Context) { if !ok { return } - h.callRPC(c, "config", params) + + var taskID string + if params != nil { + if tc, ok := params["task_config"].(map[string]interface{}); ok { + taskID, _ = tc["task_id"].(string) + taskID = strings.TrimSpace(taskID) + } + } + + if !h.callRPC(c, "config", params) { + return + } + + // If RPC succeeded (HTTP 200), advance task status: pending -> ready. + // This is best-effort; failures should not change the RPC response. + if taskID != "" && h.db != nil { + now := time.Now().UTC() + res, err := h.db.Exec( + `UPDATE tasks + SET + status = 'ready', + ready_at = CASE WHEN ready_at IS NULL THEN ? ELSE ready_at END, + updated_at = ? + WHERE task_id = ? AND status = 'pending' AND deleted_at IS NULL`, + now, now, taskID, + ) + if err != nil { + logger.Printf("[RECORDER] Device %s: failed to advance task pending->ready after config: task=%s err=%v", c.Param("device_id"), taskID, err) + return + } + if n, _ := res.RowsAffected(); n == 0 { + logger.Printf("[RECORDER] Device %s: task pending->ready skipped after config (not found or not pending): task=%s", c.Param("device_id"), taskID) + } + } } // Begin sends begin recording RPC to the recorder. @@ -228,7 +263,38 @@ func (h *RecorderHandler) Begin(c *gin.Context) { if !ok { return } - h.callRPC(c, "begin", params) + + var taskID string + if params != nil { + if v, ok := params["task_id"].(string); ok { + taskID = strings.TrimSpace(v) + } + } + + if !h.callRPC(c, "begin", params) { + return + } + + // If RPC succeeded (HTTP 200), advance task status: ready -> in_progress. + if taskID != "" && h.db != nil { + now := time.Now().UTC() + res, err := h.db.Exec( + `UPDATE tasks + SET + status = 'in_progress', + started_at = CASE WHEN started_at IS NULL THEN ? ELSE started_at END, + updated_at = ? + WHERE task_id = ? AND status = 'ready' AND deleted_at IS NULL`, + now, now, taskID, + ) + if err != nil { + logger.Printf("[RECORDER] Device %s: failed to advance task ready->in_progress after begin: task=%s err=%v", c.Param("device_id"), taskID, err) + return + } + if n, _ := res.RowsAffected(); n == 0 { + logger.Printf("[RECORDER] Device %s: task ready->in_progress skipped after begin (not found or not ready): task=%s", c.Param("device_id"), taskID) + } + } } // Finish sends finish recording RPC to the recorder. @@ -290,7 +356,7 @@ func (h *RecorderHandler) Resume(c *gin.Context) { // Cancel sends cancel RPC to the recorder. // // @Summary Cancel recording -// @Description Sends cancel RPC to the Axon recorder +// @Description Sends cancel RPC to the Axon recorder. If task_id is provided and the RPC succeeds, sets task status from ready or in_progress back to pending (other statuses are left unchanged). Does not advance batch status. // @Tags recorder // @Accept json // @Produce json @@ -301,24 +367,98 @@ func (h *RecorderHandler) Resume(c *gin.Context) { // @Failure 504 {object} map[string]interface{} // @Router /recorder/{device_id}/cancel [post] func (h *RecorderHandler) Cancel(c *gin.Context) { - h.callRPC(c, "cancel", nil) + params, ok := h.bindOptionalParams(c) + if !ok { + return + } + + var taskID string + if params != nil { + if v, ok := params["task_id"].(string); ok { + taskID = strings.TrimSpace(v) + } + } + + if !h.callRPC(c, "cancel", params) { + return + } + + // If RPC succeeded (HTTP 200), only when the task is ready or in_progress: revert to pending. + // Best-effort: failures should not change the RPC response. + if taskID != "" && h.db != nil { + deviceID := c.Param("device_id") + now := time.Now().UTC() + res, err := h.db.Exec( + `UPDATE tasks + SET + status = 'pending', + updated_at = ? + WHERE task_id = ? AND status IN ('ready', 'in_progress') AND deleted_at IS NULL`, + now, taskID, + ) + if err != nil { + logger.Printf("[RECORDER] Device %s: failed to revert task after cancel RPC: task=%s err=%v", deviceID, taskID, err) + return + } + n, _ := res.RowsAffected() + if n == 0 { + logger.Printf("[RECORDER] Device %s: task revert skipped after cancel RPC (not found or not ready/in_progress): task=%s", deviceID, taskID) + } + } } // Clear sends clear RPC to the recorder. // // @Summary Clear recorder -// @Description Sends clear RPC to the Axon recorder +// @Description Sends clear RPC to the Axon recorder. If task_id is provided and the RPC succeeds, it will revert task status from ready to pending. // @Tags recorder // @Accept json // @Produce json // @Param device_id path string true "Recorder device ID" -// @Param body body object false "Empty body" +// @Param body body object false "Optional body (supports task_id)" // @Success 200 {object} map[string]interface{} // @Failure 404 {object} map[string]interface{} +// @Failure 400 {object} map[string]interface{} // @Failure 504 {object} map[string]interface{} // @Router /recorder/{device_id}/clear [post] func (h *RecorderHandler) Clear(c *gin.Context) { - h.callRPC(c, "clear", nil) + params, ok := h.bindOptionalParams(c) + if !ok { + return + } + + var taskID string + if params != nil { + if v, ok := params["task_id"].(string); ok { + taskID = strings.TrimSpace(v) + } + } + + // Do not forward params to recorder; keep RPC payload stable. + if !h.callRPC(c, "clear", nil) { + return + } + + // If RPC succeeded (HTTP 200), revert task status: ready -> pending. + // Best-effort: failures should not change the RPC response. + if taskID != "" && h.db != nil { + now := time.Now().UTC() + res, err := h.db.Exec( + `UPDATE tasks + SET + status = 'pending', + updated_at = ? + WHERE task_id = ? AND status = 'ready' AND deleted_at IS NULL`, + now, taskID, + ) + if err != nil { + logger.Printf("[RECORDER] Device %s: failed to revert task ready->pending after clear: task=%s err=%v", c.Param("device_id"), taskID, err) + return + } + if n, _ := res.RowsAffected(); n == 0 { + logger.Printf("[RECORDER] Device %s: task ready->pending skipped after clear (not found or not ready): task=%s", c.Param("device_id"), taskID) + } + } } // Quit sends quit RPC to the recorder. @@ -346,19 +486,56 @@ func (h *RecorderHandler) Quit(c *gin.Context) { // @Accept json // @Produce json // @Param device_id path string true "Recorder device ID" -// @Success 200 {object} map[string]interface{} -// @Failure 404 {object} map[string]interface{} +// @Success 200 {object} map[string]interface{} "connected + data (optional error when device reports failure)" // @Failure 504 {object} map[string]interface{} +// @Failure 500 {object} map[string]interface{} // @Router /recorder/{device_id}/stats [get] func (h *RecorderHandler) GetStats(c *gin.Context) { - h.callRPC(c, "get_stats", nil) + deviceID := c.Param("device_id") + if deviceID == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "device_id is required"}) + return + } + if h.hub.Get(deviceID) == nil { + c.JSON(http.StatusOK, disconnectedRecorderStatsResponse()) + return + } + + response, err := h.hub.SendRPC(c.Request.Context(), deviceID, "get_stats", nil, time.Duration(h.cfg.ResponseTimeout)*time.Second) + if err != nil { + switch { + case errors.Is(err, services.ErrRecorderNotConnected): + c.JSON(http.StatusOK, disconnectedRecorderStatsResponse()) + case errors.Is(err, services.ErrRecorderRPCTimeout): + c.JSON(http.StatusGatewayTimeout, gin.H{"error": err.Error()}) + default: + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + } + return + } + + out := gin.H{ + "connected": true, + "data": response.Data, + } + if !response.Success && strings.TrimSpace(response.Message) != "" { + out["error"] = response.Message + } + c.JSON(http.StatusOK, out) +} + +func disconnectedRecorderStatsResponse() gin.H { + return gin.H{ + "connected": false, + "data": map[string]interface{}{}, + } } -func (h *RecorderHandler) callRPC(c *gin.Context, action string, params map[string]interface{}) { +func (h *RecorderHandler) callRPC(c *gin.Context, action string, params map[string]interface{}) bool { deviceID := c.Param("device_id") if deviceID == "" { c.JSON(http.StatusBadRequest, gin.H{"error": "device_id is required"}) - return + return false } response, err := h.hub.SendRPC(c.Request.Context(), deviceID, action, params, time.Duration(h.cfg.ResponseTimeout)*time.Second) @@ -371,10 +548,11 @@ func (h *RecorderHandler) callRPC(c *gin.Context, action string, params map[stri default: c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) } - return + return false } c.JSON(http.StatusOK, response) + return true } func (h *RecorderHandler) bindOptionalParams(c *gin.Context) (map[string]interface{}, bool) { @@ -411,17 +589,38 @@ func (h *RecorderHandler) ListDevices(c *gin.Context) { // @Accept json // @Produce json // @Param device_id path string true "Recorder device ID" -// @Success 200 {object} map[string]interface{} -// @Failure 404 {object} map[string]interface{} +// @Success 200 {object} map[string]interface{} "connected=false when recorder WS is not active" // @Router /recorder/{device_id}/state [get] func (h *RecorderHandler) GetState(c *gin.Context) { deviceID := c.Param("device_id") rc := h.hub.Get(deviceID) if rc == nil { - c.JSON(http.StatusNotFound, gin.H{"error": services.ErrRecorderNotConnected.Error()}) + c.JSON(http.StatusOK, gin.H{ + "connected": false, + "current_state": "unknown", + "previous_state": "", + "task_id": "", + "updated_at": time.Now().UTC(), + }) return } - c.JSON(http.StatusOK, rc.GetState()) + st := rc.GetState() + prev := recorderPreviousStateFromRaw(st.Raw) + c.JSON(http.StatusOK, gin.H{ + "connected": true, + "current_state": st.CurrentState, + "previous_state": prev, + "task_id": st.TaskID, + "updated_at": st.UpdatedAt, + }) +} + +func recorderPreviousStateFromRaw(raw map[string]interface{}) string { + if raw == nil { + return "" + } + s, _ := raw["previous"].(string) + return strings.TrimSpace(s) } func (h *RecorderHandler) handleMessage(deviceID string, rc *services.RecorderConn, msg map[string]interface{}) { @@ -461,68 +660,10 @@ func (h *RecorderHandler) handleStateUpdate(rc *services.RecorderConn, msg map[s Raw: data, } rc.UpdateState(state) - h.syncTaskStatusFromRecorderState(rc.DeviceID, stringValue(data, "previous"), state.CurrentState, state.TaskID) // #nosec G706 -- Set aside for now logger.Printf("[RECORDER] Recorder %s state=%s task=%s", rc.DeviceID, state.CurrentState, state.TaskID) } -func (h *RecorderHandler) syncTaskStatusFromRecorderState(deviceID, previousState, currentState, taskID string) { - if h.db == nil { - return - } - if taskID == "" { - // #nosec G706 -- Set aside for now - logger.Printf("[RECORDER] Recorder %s state update missing task_id, skip task sync", deviceID) - return - } - - taskStatus, ok := recorderStateToTaskStatus(currentState) - if !ok { - // #nosec G706 -- Set aside for now - logger.Printf("[RECORDER] Recorder %s state update ignored: previous=%s current=%s task=%s", deviceID, previousState, currentState, taskID) - return - } - - now := time.Now() - result, err := h.db.Exec( - "UPDATE tasks SET status = ?, updated_at = ? WHERE task_id = ? AND deleted_at IS NULL", - taskStatus, now, taskID, - ) - if err != nil { - // #nosec G706 -- Set aside for now - logger.Printf("[RECORDER] Recorder %s failed to sync task status: task=%s status=%s error=%v", deviceID, taskID, taskStatus, err) - return - } - - rowsAffected, err := result.RowsAffected() - if err != nil { - // #nosec G706 -- Set aside for now - logger.Printf("[RECORDER] Recorder %s failed to check sync result: task=%s error=%v", deviceID, taskID, err) - return - } - if rowsAffected == 0 { - // #nosec G706 -- Set aside for now - logger.Printf("[RECORDER] Recorder %s task sync skipped, task not found: task=%s status=%s", deviceID, taskID, taskStatus) - return - } - - // #nosec G706 -- Set aside for now - logger.Printf("[RECORDER] Recorder %s synced task status from state_update: task=%s previous=%s current=%s mapped_status=%s", deviceID, taskID, previousState, currentState, taskStatus) -} - -func recorderStateToTaskStatus(state string) (string, bool) { - switch state { - case "ready": - return "ready", true - case "recording", "paused": - return "in_progress", true - case "finished", "idle": - return "completed", true - default: - return "", false - } -} - func (h *RecorderHandler) pingLoop(ctx context.Context, conn *websocket.Conn) { ticker := time.NewTicker(time.Duration(h.cfg.PingInterval) * time.Second) defer ticker.Stop() diff --git a/internal/api/handlers/batch.go b/internal/api/handlers/batch.go index 3eb4c92..c91a8c8 100644 --- a/internal/api/handlers/batch.go +++ b/internal/api/handlers/batch.go @@ -5,6 +5,7 @@ package handlers import ( + "context" "database/sql" "encoding/json" "fmt" @@ -14,26 +15,81 @@ import ( "time" "archebase.com/keystone-edge/internal/logger" + "archebase.com/keystone-edge/internal/services" "github.com/gin-gonic/gin" "github.com/jmoiron/sqlx" ) +func validateTaskGroupUniqueness(taskGroups []TaskGroupItem) (dupA int, dupB int, ok bool) { + seen := make(map[string]int, len(taskGroups)) + for i, tg := range taskGroups { + if tg.SOPID <= 0 || tg.SubsceneID <= 0 { + continue + } + key := fmt.Sprintf("%d_%d", tg.SOPID, tg.SubsceneID) + if prev, exists := seen[key]; exists { + return prev, i, true + } + seen[key] = i + } + return 0, 0, false +} + // BatchHandler handles batch-related HTTP requests. type BatchHandler struct { - db *sqlx.DB + db *sqlx.DB + recorderHub *services.RecorderHub + // recorderRPCTimeout controls how long we wait for recorder RPC responses + // when batch cancellation/recall triggers device-side clear (ready) or cancel (in_progress). + recorderRPCTimeout time.Duration +} + +type taskDeviceRow struct { + TaskID string `db:"task_id"` + DeviceID string `db:"device_id"` + Status string `db:"status"` +} + +// taskDeviceBatchRow is used when collecting recorder notify targets grouped by batch. +type taskDeviceBatchRow struct { + BatchID int64 `db:"batch_id"` + TaskID string `db:"task_id"` + DeviceID string `db:"device_id"` + Status string `db:"status"` +} + +// orderCompletionRecorderNotify groups Axon recorder RPC targets per batch after order completion. +type orderCompletionRecorderNotify struct { + BatchID int64 + Rows []taskDeviceRow } // NewBatchHandler creates a new BatchHandler. -func NewBatchHandler(db *sqlx.DB) *BatchHandler { - return &BatchHandler{db: db} +func NewBatchHandler(db *sqlx.DB, recorderHub *services.RecorderHub, recorderRPCTimeout time.Duration) *BatchHandler { + return &BatchHandler{db: db, recorderHub: recorderHub, recorderRPCTimeout: recorderRPCTimeout} } // RegisterRoutes registers batch routes under the provided router group. func (h *BatchHandler) RegisterRoutes(apiV1 *gin.RouterGroup) { apiV1.GET("/batches", h.ListBatches) + apiV1.POST("/batches", h.CreateBatch) apiV1.GET("/batches/:id", h.GetBatch) apiV1.DELETE("/batches/:id", h.DeleteBatch) apiV1.PATCH("/batches/:id", h.PatchBatch) + apiV1.POST("/batches/:id/tasks", h.AdjustBatchTasks) + apiV1.POST("/batches/:id/recall", h.RecallBatch) + apiV1.GET("/batches/:id/tasks", h.ListBatchTasks) +} + +// recalledEpisodeLabel is appended to episodes.labels (JSON string array) when a batch is recalled (see RecallBatch). +const recalledEpisodeLabel = "recalled_batch" + +// batchAdvanceTriggerStatuses lists task statuses that trigger tryAdvanceBatchStatus after a task +// update via PUT /tasks. Tasks become cancelled only via PATCH batch cancel, which does not invoke +// that hook; transfer completion sets completed and also calls tryAdvanceBatchStatus. +var batchAdvanceTriggerStatuses = map[string]struct{}{ + "completed": {}, + "failed": {}, } var validBatchStatuses = map[string]struct{}{ @@ -46,20 +102,23 @@ var validBatchStatuses = map[string]struct{}{ // BatchListItem represents a batch item in list responses. type BatchListItem struct { - ID string `json:"id" db:"id"` - BatchID string `json:"batch_id" db:"batch_id"` - OrderID string `json:"order_id" db:"order_id"` - WorkstationID string `json:"workstation_id" db:"workstation_id"` - Name string `json:"name" db:"name"` - Notes string `json:"notes,omitempty" db:"notes"` - Status string `json:"status" db:"status"` - EpisodeCount int `json:"episode_count" db:"episode_count"` - TaskCount int `json:"task_count" db:"task_count"` - StartedAt string `json:"started_at,omitempty"` - EndedAt string `json:"ended_at,omitempty"` - Metadata any `json:"metadata,omitempty"` - CreatedAt string `json:"created_at,omitempty"` - UpdatedAt string `json:"updated_at,omitempty"` + ID string `json:"id" db:"id"` + BatchID string `json:"batch_id" db:"batch_id"` + OrderID string `json:"order_id" db:"order_id"` + WorkstationID string `json:"workstation_id" db:"workstation_id"` + Name string `json:"name" db:"name"` + Notes string `json:"notes,omitempty" db:"notes"` + Status string `json:"status" db:"status"` + CompletedCount int `json:"completed_count" db:"completed_count"` + TaskCount int `json:"task_count" db:"task_count"` + CancelledCount int `json:"cancelled_count" db:"cancelled_count"` + FailedCount int `json:"failed_count" db:"failed_count"` + EpisodeCount int `json:"episode_count" db:"episode_count"` + StartedAt string `json:"started_at,omitempty"` + EndedAt string `json:"ended_at,omitempty"` + Metadata any `json:"metadata,omitempty"` + CreatedAt string `json:"created_at,omitempty"` + UpdatedAt string `json:"updated_at,omitempty"` } // ListBatchesResponse represents the response body for listing batches. @@ -71,20 +130,23 @@ type ListBatchesResponse struct { } type batchRow struct { - ID int64 `db:"id"` - BatchID string `db:"batch_id"` - OrderID int64 `db:"order_id"` - WorkstationID int64 `db:"workstation_id"` - Name string `db:"name"` - Notes sql.NullString `db:"notes"` - Status string `db:"status"` - EpisodeCount int `db:"episode_count"` - TaskCount int `db:"task_count"` - StartedAt sql.NullTime `db:"started_at"` - EndedAt sql.NullTime `db:"ended_at"` - Metadata sql.NullString `db:"metadata"` - CreatedAt sql.NullString `db:"created_at"` - UpdatedAt sql.NullString `db:"updated_at"` + ID int64 `db:"id"` + BatchID string `db:"batch_id"` + OrderID int64 `db:"order_id"` + WorkstationID int64 `db:"workstation_id"` + Name sql.NullString `db:"name"` + Notes sql.NullString `db:"notes"` + Status string `db:"status"` + CompletedCount int `db:"completed_count"` + TaskCount int `db:"task_count"` + CancelledCount int `db:"cancelled_count"` + FailedCount int `db:"failed_count"` + EpisodeCount int `db:"episode_count"` + StartedAt sql.NullTime `db:"started_at"` + EndedAt sql.NullTime `db:"ended_at"` + Metadata sql.NullString `db:"metadata"` + CreatedAt sql.NullString `db:"created_at"` + UpdatedAt sql.NullString `db:"updated_at"` } func parseNullableJSON(v sql.NullString) any { @@ -127,21 +189,28 @@ func batchListItemFromRow(r batchRow) BatchListItem { updatedAt = r.UpdatedAt.String } + nameOut := "" + if r.Name.Valid { + nameOut = r.Name.String + } return BatchListItem{ - ID: fmt.Sprintf("%d", r.ID), - BatchID: r.BatchID, - OrderID: fmt.Sprintf("%d", r.OrderID), - WorkstationID: fmt.Sprintf("%d", r.WorkstationID), - Name: r.Name, - Notes: notes, - Status: r.Status, - EpisodeCount: r.EpisodeCount, - TaskCount: r.TaskCount, - StartedAt: startedAt, - EndedAt: endedAt, - Metadata: parseNullableJSON(r.Metadata), - CreatedAt: createdAt, - UpdatedAt: updatedAt, + ID: fmt.Sprintf("%d", r.ID), + BatchID: r.BatchID, + OrderID: fmt.Sprintf("%d", r.OrderID), + WorkstationID: fmt.Sprintf("%d", r.WorkstationID), + Name: nameOut, + Notes: notes, + Status: r.Status, + CompletedCount: r.CompletedCount, + TaskCount: r.TaskCount, + CancelledCount: r.CancelledCount, + FailedCount: r.FailedCount, + EpisodeCount: r.EpisodeCount, + StartedAt: startedAt, + EndedAt: endedAt, + Metadata: parseNullableJSON(r.Metadata), + CreatedAt: createdAt, + UpdatedAt: updatedAt, } } @@ -229,8 +298,11 @@ func (h *BatchHandler) ListBatches(c *gin.Context) { b.name, b.notes, b.status, - b.episode_count, COALESCE(tc.task_count, 0) AS task_count, + COALESCE(tc.completed_count, 0) AS completed_count, + COALESCE(tc.cancelled_count, 0) AS cancelled_count, + COALESCE(tc.failed_count, 0) AS failed_count, + COALESCE(b.episode_count, 0) AS episode_count, b.started_at, b.ended_at, CAST(b.metadata AS CHAR) AS metadata, @@ -238,7 +310,12 @@ func (h *BatchHandler) ListBatches(c *gin.Context) { b.updated_at FROM batches b LEFT JOIN ( - SELECT batch_id, COUNT(*) AS task_count + SELECT + batch_id, + COUNT(*) AS task_count, + COALESCE(SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END), 0) AS completed_count, + COALESCE(SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END), 0) AS cancelled_count, + COALESCE(SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END), 0) AS failed_count FROM tasks WHERE deleted_at IS NULL GROUP BY batch_id @@ -299,8 +376,11 @@ func (h *BatchHandler) GetBatch(c *gin.Context) { b.name, b.notes, b.status, - b.episode_count, COALESCE(tc.task_count, 0) AS task_count, + COALESCE(tc.completed_count, 0) AS completed_count, + COALESCE(tc.cancelled_count, 0) AS cancelled_count, + COALESCE(tc.failed_count, 0) AS failed_count, + COALESCE(b.episode_count, 0) AS episode_count, b.started_at, b.ended_at, CAST(b.metadata AS CHAR) AS metadata, @@ -308,7 +388,12 @@ func (h *BatchHandler) GetBatch(c *gin.Context) { b.updated_at FROM batches b LEFT JOIN ( - SELECT batch_id, COUNT(*) AS task_count + SELECT + batch_id, + COUNT(*) AS task_count, + COALESCE(SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END), 0) AS completed_count, + COALESCE(SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END), 0) AS cancelled_count, + COALESCE(SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END), 0) AS failed_count FROM tasks WHERE deleted_at IS NULL GROUP BY batch_id @@ -331,15 +416,18 @@ func (h *BatchHandler) GetBatch(c *gin.Context) { } // DeleteBatch handles batch deletion requests (soft delete). -// Only batches with status "cancelled" can be deleted. +// Only batches with status "cancelled" or "pending" can be targeted. +// If the batch has any completed task, the batch row is not deleted: only non-completed tasks are soft-deleted. +// If after that all remaining tasks are completed, the batch status is set to completed (from pending/cancelled). +// Otherwise the batch and all its tasks are soft-deleted. // // @Summary Delete batch -// @Description Soft deletes a batch by ID. Only allowed when status is cancelled. +// @Description Soft-deletes cancelled or pending batch when it has no completed tasks; if it has completed tasks, only non-completed tasks are removed and the batch may be advanced to completed when appropriate. // @Tags batches // @Accept json // @Produce json // @Param id path int true "Batch ID" -// @Success 204 +// @Success 200 {object} map[string]interface{} "batch_deleted (bool), tasks_removed (int)" // @Failure 400 {object} map[string]string // @Failure 404 {object} map[string]string // @Failure 500 {object} map[string]string @@ -363,19 +451,88 @@ func (h *BatchHandler) DeleteBatch(c *gin.Context) { return } - if status != "cancelled" { - c.JSON(http.StatusBadRequest, gin.H{"error": "batch can only be deleted when status is cancelled"}) + if status != "cancelled" && status != "pending" { + c.JSON(http.StatusConflict, gin.H{"error": "batch can only be deleted when status is cancelled or pending"}) + return + } + + var completedCount int + if err := h.db.Get(&completedCount, "SELECT COUNT(*) FROM tasks WHERE batch_id = ? AND deleted_at IS NULL AND status = 'completed'", id); err != nil { + logger.Printf("[BATCH] Failed to count completed tasks for batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) return } now := time.Now().UTC() - if _, err := h.db.Exec("UPDATE batches SET deleted_at = ?, updated_at = ? WHERE id = ? AND deleted_at IS NULL", now, now, id); err != nil { - logger.Printf("[BATCH] Failed to delete batch: %v", err) + tx, err := h.db.Beginx() + if err != nil { + logger.Printf("[BATCH] Failed to begin transaction: %v", err) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) return } + defer func() { _ = tx.Rollback() }() + + var tasksRemoved int64 + outBatchStatus := status - c.Status(http.StatusNoContent) + if completedCount > 0 { + res, err := tx.Exec("UPDATE tasks SET deleted_at = ?, updated_at = ? WHERE batch_id = ? AND deleted_at IS NULL AND status <> 'completed'", now, now, id) + if err != nil { + logger.Printf("[BATCH] Failed to soft delete non-completed batch tasks: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) + return + } + tasksRemoved, _ = res.RowsAffected() + + var remNonCompleted, remCompleted int + if err := tx.Get(&remNonCompleted, "SELECT COUNT(*) FROM tasks WHERE batch_id = ? AND deleted_at IS NULL AND status <> 'completed'", id); err != nil { + logger.Printf("[BATCH] Failed to count remaining non-completed tasks: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) + return + } + if err := tx.Get(&remCompleted, "SELECT COUNT(*) FROM tasks WHERE batch_id = ? AND deleted_at IS NULL AND status = 'completed'", id); err != nil { + logger.Printf("[BATCH] Failed to count remaining completed tasks: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) + return + } + if remCompleted > 0 && remNonCompleted == 0 { + if _, err := tx.Exec(`UPDATE batches SET status = 'completed', ended_at = COALESCE(ended_at, ?), updated_at = ? WHERE id = ? AND deleted_at IS NULL AND status IN ('pending', 'cancelled')`, now, now, id); err != nil { + logger.Printf("[BATCH] Failed to advance batch to completed after cleanup: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) + return + } + outBatchStatus = "completed" + } + } else { + if _, err := tx.Exec("UPDATE batches SET deleted_at = ?, updated_at = ? WHERE id = ? AND deleted_at IS NULL", now, now, id); err != nil { + logger.Printf("[BATCH] Failed to delete batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) + return + } + res, err := tx.Exec("UPDATE tasks SET deleted_at = ?, updated_at = ? WHERE batch_id = ? AND deleted_at IS NULL", now, now, id) + if err != nil { + logger.Printf("[BATCH] Failed to soft delete batch tasks: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) + return + } + tasksRemoved, _ = res.RowsAffected() + } + + if err := tx.Commit(); err != nil { + logger.Printf("[BATCH] Failed to commit delete batch transaction: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete batch"}) + return + } + + resp := gin.H{ + "batch_deleted": completedCount == 0, + "tasks_removed": tasksRemoved, + "completed_tasks": completedCount, + } + if completedCount > 0 { + resp["batch_status"] = outBatchStatus + } + c.JSON(http.StatusOK, resp) } // PatchBatchRequest is the request body for patching a batch. @@ -391,12 +548,17 @@ type PatchBatchResponse struct { UpdatedAt string `json:"updated_at,omitempty"` } -// PatchBatch handles batch status updates. Only supports status transitions: -// - pending -> active | cancelled -// - active -> completed | cancelled +// PatchBatch handles batch status updates. +// Only supports cancellation transitions: +// - pending -> cancelled +// - active -> cancelled +// +// Note: pending->active is automatic (triggered when a task reaches completed or failed). +// Note: active->completed is automatic (when all non-deleted tasks are completed, failed, or cancelled). +// Note: For recall, use POST /batches/:id/recall. // // @Summary Patch batch -// @Description Updates batch status. Only specific state transitions are allowed. +// @Description Updates batch status. Only cancellation transitions are allowed via PATCH. // @Tags batches // @Accept json // @Produce json @@ -426,6 +588,13 @@ func (h *BatchHandler) PatchBatch(c *gin.Context) { return } + // Only cancellation is allowed via PATCH. + // pending->active is automatic; active->completed is automatic; recall uses POST .../recall. + if req.Status != "cancelled" { + c.JSON(http.StatusBadRequest, gin.H{"error": "PATCH only supports transitioning to 'cancelled'; use POST /batches/:id/recall for recall"}) + return + } + type statusRow struct { Status string `db:"status"` StartedAt sql.NullTime `db:"started_at"` @@ -442,48 +611,1271 @@ func (h *BatchHandler) PatchBatch(c *gin.Context) { return } - allowed := false - switch cur.Status { - case "pending": - allowed = req.Status == "active" || req.Status == "cancelled" - case "active": - allowed = req.Status == "completed" || req.Status == "cancelled" - default: - allowed = false - } - if !allowed { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid status transition"}) + // Only pending and active batches can be cancelled. + if cur.Status != "pending" && cur.Status != "active" { + c.JSON(http.StatusConflict, gin.H{ + "error": fmt.Sprintf("cannot cancel batch in status '%s'; only pending or active batches can be cancelled", cur.Status), + "current_status": cur.Status, + }) return } now := time.Now().UTC() - startedAt := cur.StartedAt - updates := []string{"status = ?", "updated_at = ?"} - args := []interface{}{req.Status, now} + tx, err := h.db.Beginx() + if err != nil { + logger.Printf("[BATCH] Failed to begin transaction for patch batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to patch batch"}) + return + } + defer func() { _ = tx.Rollback() }() - // pending -> active sets started_at when not set yet - if cur.Status == "pending" && req.Status == "active" && !startedAt.Valid { - startedAt = sql.NullTime{Time: now, Valid: true} - updates = append(updates, "started_at = ?") - args = append(args, now) + // If cancelling a pending/active batch, we must also notify Axon Recorder for tasks that are already + // configured (ready) or recording (in_progress): clear vs cancel respectively. + // Collect task->device mapping before we mutate task statuses. + toNotify := make([]taskDeviceRow, 0) + if cur.Status == "pending" || cur.Status == "active" { + if err := tx.Select(&toNotify, ` + SELECT + t.task_id AS task_id, + r.device_id AS device_id, + t.status AS status + FROM tasks t + JOIN workstations ws ON ws.id = t.workstation_id AND ws.deleted_at IS NULL + JOIN robots r ON r.id = ws.robot_id AND r.deleted_at IS NULL + WHERE t.batch_id = ? AND t.deleted_at IS NULL + AND t.status IN ('ready', 'in_progress') + `, id); err != nil { + logger.Printf("[BATCH] Failed to query tasks for recorder notify (batch=%d): %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to patch batch"}) + return + } } - args = append(args, id) - query := fmt.Sprintf("UPDATE batches SET %s WHERE id = ? AND deleted_at IS NULL", strings.Join(updates, ", ")) - if _, err := h.db.Exec(query, args...); err != nil { + // Update batch status (idempotent; only transitions to cancelled). + if _, err := tx.Exec( + "UPDATE batches SET status = 'cancelled', updated_at = ? WHERE id = ? AND deleted_at IS NULL", + now, id, + ); err != nil { logger.Printf("[BATCH] Failed to patch batch: %v", err) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to patch batch"}) return } + // If cancelling a pending/active batch, also cancel its pending/ready/in_progress tasks. + // This prevents orphan runnable tasks under a cancelled batch. + if cur.Status == "pending" || cur.Status == "active" { + if _, err := tx.Exec( + `UPDATE tasks + SET status = 'cancelled', updated_at = ? + WHERE batch_id = ? AND deleted_at IS NULL + AND status IN ('pending', 'ready', 'in_progress')`, + now, id, + ); err != nil { + logger.Printf("[BATCH] Failed to cascade cancel tasks for batch %d: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to patch batch"}) + return + } + } + + var patchWsID int64 + if err := tx.Get(&patchWsID, "SELECT workstation_id FROM batches WHERE id = ? AND deleted_at IS NULL", id); err != nil { + logger.Printf("[BATCH] Failed to read workstation_id for batch %d: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to patch batch"}) + return + } + if err := syncWorkstationStatusFromBatchesTx(tx, patchWsID); err != nil { + logger.Printf("[BATCH] Failed to sync workstation status after patch batch %d: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to patch batch"}) + return + } + + if err := tx.Commit(); err != nil { + logger.Printf("[BATCH] Failed to commit patch batch transaction: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to patch batch"}) + return + } + + // Best-effort: after commit, notify recorder devices (clear for ready, cancel for in_progress). + // Notification failures should not affect the batch cancellation result. + if (cur.Status == "pending" || cur.Status == "active") && h.recorderHub != nil && len(toNotify) > 0 { + go h.notifyRecorderCancelTasks(context.Background(), id, toNotify) + } + startedAtOut := "" - if startedAt.Valid { - startedAtOut = startedAt.Time.UTC().Format(time.RFC3339) + if cur.StartedAt.Valid { + startedAtOut = cur.StartedAt.Time.UTC().Format(time.RFC3339) } c.JSON(http.StatusOK, PatchBatchResponse{ ID: fmt.Sprintf("%d", id), - Status: req.Status, + Status: "cancelled", StartedAt: startedAtOut, UpdatedAt: now.Format(time.RFC3339), }) } + +func (h *BatchHandler) notifyRecorderCancelTasks(ctx context.Context, batchID int64, rows []taskDeviceRow) { + notifyRecorderCancelTasksWithHub(ctx, h.recorderHub, h.recorderRPCTimeout, batchID, rows) +} + +// notifyRecorderCancelTasksWithHub sends clear (ready) / cancel (in_progress) RPCs to Axon recorder. +// hub may be nil (no-op). Used by PATCH batch cancel and order-completion batch finalization. +func notifyRecorderCancelTasksWithHub(ctx context.Context, hub *services.RecorderHub, rpcTimeout time.Duration, batchID int64, rows []taskDeviceRow) { + if hub == nil || len(rows) == 0 { + return + } + timeout := rpcTimeout + if timeout <= 0 { + timeout = 5 * time.Second + } + for _, r := range rows { + deviceID := strings.TrimSpace(r.DeviceID) + taskID := strings.TrimSpace(r.TaskID) + if deviceID == "" || taskID == "" { + continue + } + st := strings.TrimSpace(r.Status) + var err error + switch st { + case "ready": + // READY on device: clear cached config without treating it as an active recording cancel. + _, err = hub.SendRPC(ctx, deviceID, "clear", nil, timeout) + case "in_progress": + _, err = hub.SendRPC(ctx, deviceID, "cancel", map[string]interface{}{"task_id": taskID}, timeout) + default: + continue + } + if err != nil { + logger.Printf("[BATCH] Batch %d: failed to notify recorder (status=%s): device=%s task=%s err=%v", batchID, st, deviceID, taskID, err) + } + } +} + +// TaskGroupItem represents a single task group in a batch creation/adjustment request. +type TaskGroupItem struct { + SOPID int64 `json:"sop_id"` + SubsceneID int64 `json:"subscene_id"` + Quantity int `json:"quantity"` +} + +// CreateBatchRequest is the request body for creating a batch with tasks. +type CreateBatchRequest struct { + OrderID int64 `json:"order_id"` + WorkstationID int64 `json:"workstation_id"` + Name string `json:"name,omitempty"` + Notes string `json:"notes,omitempty"` + TaskGroups []TaskGroupItem `json:"task_groups"` + Metadata json.RawMessage `json:"metadata,omitempty"` +} + +// CreatedTaskItem represents a single created task in the response. +type CreatedTaskItem struct { + ID string `json:"id"` + TaskID string `json:"task_id"` + SOPID string `json:"sop_id"` + SubsceneID string `json:"subscene_id"` + Status string `json:"status"` + CreatedAt string `json:"created_at"` +} + +// CreateBatchResponse is the response body for creating a batch. +type CreateBatchResponse struct { + Batch BatchListItem `json:"batch"` + Tasks []CreatedTaskItem `json:"tasks"` +} + +// CreateBatch creates a new batch and its tasks in a single transaction. +// +// @Summary Create batch with tasks +// @Description Creates a batch and all its tasks atomically. task_groups defines how many tasks per SOP/subscene combination. +// @Tags batches +// @Accept json +// @Produce json +// @Param body body CreateBatchRequest true "Batch creation payload" +// @Success 201 {object} CreateBatchResponse +// @Failure 400 {object} map[string]string +// @Failure 409 {object} map[string]string +// @Failure 500 {object} map[string]string +// @Router /batches [post] +func (h *BatchHandler) CreateBatch(c *gin.Context) { + var req CreateBatchRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body: " + err.Error()}) + return + } + + if req.OrderID <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "order_id is required"}) + return + } + if req.WorkstationID <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "workstation_id is required"}) + return + } + if len(req.TaskGroups) == 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "task_groups must not be empty"}) + return + } + + // Validate task_groups + totalQuantity := 0 + for i, tg := range req.TaskGroups { + if tg.SOPID <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("task_groups[%d].sop_id is required", i)}) + return + } + if tg.SubsceneID <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("task_groups[%d].subscene_id is required", i)}) + return + } + if tg.Quantity < 1 { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("task_groups[%d].quantity must be >= 1", i)}) + return + } + totalQuantity += tg.Quantity + } + if a, b, dup := validateTaskGroupUniqueness(req.TaskGroups); dup { + c.JSON(http.StatusBadRequest, gin.H{ + "error": fmt.Sprintf("duplicate task_groups entries: task_groups[%d] and task_groups[%d] have the same sop_id and subscene_id", a, b), + }) + return + } + if totalQuantity > 1000 { + c.JSON(http.StatusBadRequest, gin.H{"error": "total quantity across all task_groups must be <= 1000"}) + return + } + + now := time.Now().UTC() + + tx, err := h.db.Beginx() + if err != nil { + logger.Printf("[BATCH] Failed to start transaction: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + defer func() { _ = tx.Rollback() }() + + // Lock order and validate quota + type orderQuotaRow struct { + TargetCount int `db:"target_count"` + } + var orderQuota orderQuotaRow + if err := tx.Get(&orderQuota, "SELECT target_count FROM orders WHERE id = ? AND deleted_at IS NULL LIMIT 1 FOR UPDATE", req.OrderID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("order not found: %d", req.OrderID)}) + return + } + logger.Printf("[BATCH] Failed to lock order: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + // Count existing tasks for this order + var existingCompletedCount int + if err := tx.Get(&existingCompletedCount, "SELECT COUNT(*) FROM tasks WHERE order_id = ? AND deleted_at IS NULL AND status = 'completed'", req.OrderID); err != nil { + logger.Printf("[BATCH] Failed to count completed tasks: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + remaining := orderQuota.TargetCount - existingCompletedCount + if totalQuantity > remaining { + c.JSON(http.StatusBadRequest, gin.H{ + "error": fmt.Sprintf("quota exceeded: target_count=%d, completed_count=%d, remaining=%d, requested=%d", orderQuota.TargetCount, existingCompletedCount, remaining, totalQuantity), + "target_count": orderQuota.TargetCount, + "completed_count": existingCompletedCount, + "remaining": remaining, + "requested": totalQuantity, + }) + return + } + + // Validate workstation + type wsRow struct { + ID int64 `db:"id"` + FactoryID int64 `db:"factory_id"` + } + var ws wsRow + if err := tx.Get(&ws, "SELECT id, factory_id FROM workstations WHERE id = ? AND deleted_at IS NULL LIMIT 1", req.WorkstationID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("workstation not found: %d", req.WorkstationID)}) + return + } + logger.Printf("[BATCH] Failed to validate workstation: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + // Resolve organization_id from factory + var organizationID int64 + if err := tx.Get(&organizationID, "SELECT organization_id FROM factories WHERE id = ? AND deleted_at IS NULL LIMIT 1", ws.FactoryID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusBadRequest, gin.H{"error": "workstation factory not found"}) + return + } + logger.Printf("[BATCH] Failed to resolve organization_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + batchName := strings.TrimSpace(req.Name) + // Generate batch_id (unique even under bulk creates) + batchIDStr, err := newPublicBatchID(now, 0) + if err != nil { + logger.Printf("[BATCH] Failed to generate batch_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + nameArg := sql.NullString{} + if batchName != "" { + nameArg = sql.NullString{String: batchName, Valid: true} + } + var taskBatchName any + if batchName != "" { + taskBatchName = batchName + } + + // Handle metadata + var metadataStr sql.NullString + if len(req.Metadata) > 0 { + raw := strings.TrimSpace(string(req.Metadata)) + if raw != "" && raw != "null" { + metadataStr = sql.NullString{String: raw, Valid: true} + } + } + + // Handle notes + var notesStr sql.NullString + if notes := strings.TrimSpace(req.Notes); notes != "" { + notesStr = sql.NullString{String: notes, Valid: true} + } + + // Insert batch + res, err := tx.Exec( + `INSERT INTO batches (batch_id, order_id, workstation_id, name, notes, status, metadata, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, 'pending', ?, ?, ?)`, + batchIDStr, req.OrderID, req.WorkstationID, nameArg, notesStr, metadataStr, now, now, + ) + if err != nil { + logger.Printf("[BATCH] Failed to insert batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + newBatchID, err := res.LastInsertId() + if err != nil { + logger.Printf("[BATCH] Failed to get batch insert id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + // Insert tasks for each task group + createdTasks := make([]CreatedTaskItem, 0, totalQuantity) + seqOffset := 0 + for _, tg := range req.TaskGroups { + // Validate SOP + if err := tx.Get(new(int), "SELECT 1 FROM sops WHERE id = ? AND deleted_at IS NULL LIMIT 1", tg.SOPID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("sop not found: %d", tg.SOPID)}) + return + } + logger.Printf("[BATCH] Failed to validate sop_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + // Validate subscene and get scene info + type subsceneRow struct { + ID int64 `db:"id"` + SceneID int64 `db:"scene_id"` + Scene string `db:"scene_name"` + Name string `db:"name"` + Layout string `db:"initial_scene_layout"` + } + var subscene subsceneRow + if err := tx.Get(&subscene, ` + SELECT ss.id, ss.scene_id, s.name AS scene_name, ss.name, + COALESCE(ss.initial_scene_layout, '') AS initial_scene_layout + FROM subscenes ss + JOIN scenes s ON s.id = ss.scene_id AND s.deleted_at IS NULL + WHERE ss.id = ? AND ss.deleted_at IS NULL + LIMIT 1`, tg.SubsceneID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("subscene not found: %d", tg.SubsceneID)}) + return + } + logger.Printf("[BATCH] Failed to validate subscene_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + for i := 0; i < tg.Quantity; i++ { + taskID, err := newPublicTaskID(now, seqOffset) + if err != nil { + logger.Printf("[BATCH] Failed to generate task_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + seqOffset++ + + resTask, err := tx.Exec( + `INSERT INTO tasks ( + task_id, batch_id, order_id, sop_id, workstation_id, + scene_id, subscene_id, batch_name, scene_name, subscene_name, + factory_id, organization_id, initial_scene_layout, + status, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?, ?)`, + taskID, newBatchID, req.OrderID, tg.SOPID, req.WorkstationID, + subscene.SceneID, tg.SubsceneID, taskBatchName, subscene.Scene, subscene.Name, + ws.FactoryID, organizationID, subscene.Layout, + now, now, + ) + if err != nil { + logger.Printf("[BATCH] Failed to insert task: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + newTaskID, err := resTask.LastInsertId() + if err != nil { + logger.Printf("[BATCH] Failed to get task insert id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + createdTasks = append(createdTasks, CreatedTaskItem{ + ID: fmt.Sprintf("%d", newTaskID), + TaskID: taskID, + SOPID: fmt.Sprintf("%d", tg.SOPID), + SubsceneID: fmt.Sprintf("%d", tg.SubsceneID), + Status: "pending", + CreatedAt: now.Format(time.RFC3339), + }) + } + } + + if err := tx.Commit(); err != nil { + logger.Printf("[BATCH] Failed to commit transaction: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create batch"}) + return + } + + c.JSON(http.StatusCreated, CreateBatchResponse{ + Batch: BatchListItem{ + ID: fmt.Sprintf("%d", newBatchID), + BatchID: batchIDStr, + OrderID: fmt.Sprintf("%d", req.OrderID), + WorkstationID: fmt.Sprintf("%d", req.WorkstationID), + Name: batchName, + Status: "pending", + CompletedCount: 0, + TaskCount: totalQuantity, + FailedCount: 0, + EpisodeCount: 0, + CreatedAt: now.Format(time.RFC3339), + UpdatedAt: now.Format(time.RFC3339), + }, + Tasks: createdTasks, + }) +} + +// AdjustBatchTasksRequest is the request body for adjusting batch tasks declaratively. +type AdjustBatchTasksRequest struct { + TaskGroups []TaskGroupItem `json:"task_groups"` + // Name optional: if present, updates batch display name; empty string clears (NULL). + Name *string `json:"name,omitempty"` +} + +// AdjustBatchTasksResponse is the response body for adjusting batch tasks. +type AdjustBatchTasksResponse struct { + CreatedTasks []CreatedTaskItem `json:"created_tasks"` + DeletedTaskIDs []string `json:"deleted_task_ids"` +} + +// AdjustBatchTasks handles declarative task quantity adjustment for a batch. +// Each task_group entry specifies the TARGET quantity (not a delta) for that (sop_id, subscene_id) combination. +// +// @Summary Adjust batch tasks +// @Description Declaratively sets the target task count per SOP/subscene combination. Only pending/active batches allowed. +// @Tags batches +// @Accept json +// @Produce json +// @Param id path int true "Batch ID" +// @Param body body AdjustBatchTasksRequest true "Task groups with target quantities" +// @Success 200 {object} AdjustBatchTasksResponse +// @Failure 400 {object} map[string]string +// @Failure 409 {object} map[string]string +// @Failure 500 {object} map[string]string +// @Router /batches/{id}/tasks [post] +func (h *BatchHandler) AdjustBatchTasks(c *gin.Context) { + idStr := c.Param("id") + batchNumID, err := strconv.ParseInt(idStr, 10, 64) + if err != nil || batchNumID <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid batch id"}) + return + } + + var req AdjustBatchTasksRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body: " + err.Error()}) + return + } + if len(req.TaskGroups) == 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "task_groups must not be empty"}) + return + } + if a, b, dup := validateTaskGroupUniqueness(req.TaskGroups); dup { + c.JSON(http.StatusBadRequest, gin.H{ + "error": fmt.Sprintf("duplicate task_groups entries: task_groups[%d] and task_groups[%d] have the same sop_id and subscene_id", a, b), + }) + return + } + + now := time.Now().UTC() + + tx, err := h.db.Beginx() + if err != nil { + logger.Printf("[BATCH] Failed to start transaction: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + defer func() { _ = tx.Rollback() }() + + // Lock and validate batch + type batchStatusRow struct { + ID int64 `db:"id"` + OrderID int64 `db:"order_id"` + WorkstationID int64 `db:"workstation_id"` + Status string `db:"status"` + } + var batch batchStatusRow + if err := tx.Get(&batch, + "SELECT id, order_id, workstation_id, status FROM batches WHERE id = ? AND deleted_at IS NULL FOR UPDATE", + batchNumID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error": "batch not found"}) + return + } + logger.Printf("[BATCH] Failed to lock batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + + if batch.Status != "pending" && batch.Status != "active" { + c.JSON(http.StatusConflict, gin.H{ + "error": fmt.Sprintf("batch status is '%s'; only pending or active batches can be adjusted", batch.Status), + "current_status": batch.Status, + }) + return + } + + if req.Name != nil { + if strings.TrimSpace(*req.Name) == "" { + if _, err := tx.Exec( + `UPDATE batches SET name = NULL, updated_at = ? WHERE id = ? AND deleted_at IS NULL`, + now, batchNumID, + ); err != nil { + logger.Printf("[BATCH] Failed to clear batch name: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + } else { + n := strings.TrimSpace(*req.Name) + if _, err := tx.Exec( + `UPDATE batches SET name = ?, updated_at = ? WHERE id = ? AND deleted_at IS NULL`, + n, now, batchNumID, + ); err != nil { + logger.Printf("[BATCH] Failed to update batch name: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + } + } + + // Lock order for quota check + var targetCount int + if err := tx.Get(&targetCount, "SELECT target_count FROM orders WHERE id = ? AND deleted_at IS NULL LIMIT 1 FOR UPDATE", batch.OrderID); err != nil { + logger.Printf("[BATCH] Failed to lock order: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + + // Count current order-level completed count for quota check (completed-only). + var orderCompletedCount int + if err := tx.Get(&orderCompletedCount, "SELECT COUNT(*) FROM tasks WHERE order_id = ? AND deleted_at IS NULL AND status = 'completed'", batch.OrderID); err != nil { + logger.Printf("[BATCH] Failed to count order completed tasks: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + + // Validate workstation for new tasks + type wsRow struct { + ID int64 `db:"id"` + FactoryID int64 `db:"factory_id"` + } + var ws wsRow + if err := tx.Get(&ws, "SELECT id, factory_id FROM workstations WHERE id = ? AND deleted_at IS NULL LIMIT 1", batch.WorkstationID); err != nil { + logger.Printf("[BATCH] Failed to get workstation: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + var organizationID int64 + if err := tx.Get(&organizationID, "SELECT organization_id FROM factories WHERE id = ? AND deleted_at IS NULL LIMIT 1", ws.FactoryID); err != nil { + logger.Printf("[BATCH] Failed to resolve organization_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + + // Get batch name for denormalization + var batchName string + if err := tx.Get(&batchName, "SELECT COALESCE(name, '') FROM batches WHERE id = ? AND deleted_at IS NULL", batchNumID); err != nil { + logger.Printf("[BATCH] Failed to get batch name: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + + // subsceneInfo holds denormalized subscene data for task insertion. + type subsceneInfo struct { + SceneID int64 `db:"scene_id"` + Scene string `db:"scene"` + Name string `db:"name"` + Layout string `db:"layout"` + } + + // Per-group analysis + type groupPlan struct { + tg TaskGroupItem + current int + locked int + pendingOnly int + toInsert int + toDelete int + deleteIDs []int64 + subscene subsceneInfo + } + + plans := make([]groupPlan, 0, len(req.TaskGroups)) + batchDelta := 0 + + for _, tg := range req.TaskGroups { + if tg.SOPID <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "sop_id is required in each task_group"}) + return + } + if tg.SubsceneID <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "subscene_id is required in each task_group"}) + return + } + if tg.Quantity < 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "quantity must be >= 0"}) + return + } + + // Count current, locked, pending-only for this (sop_id, subscene_id) in this batch + var counts struct { + Current int `db:"current_count"` + LockedCount int `db:"locked_count"` + } + if err := tx.Get(&counts, ` + SELECT + COUNT(*) AS current_count, + COALESCE(SUM(CASE WHEN status != 'pending' OR episode_id IS NOT NULL THEN 1 ELSE 0 END), 0) AS locked_count + FROM tasks + WHERE batch_id = ? AND sop_id = ? AND subscene_id = ? AND deleted_at IS NULL`, + batchNumID, tg.SOPID, tg.SubsceneID); err != nil { + logger.Printf("[BATCH] Failed to count tasks for group: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + + pendingOnly := counts.Current - counts.LockedCount + + // Validate: cannot reduce below locked count + if tg.Quantity < counts.LockedCount { + c.JSON(http.StatusBadRequest, gin.H{ + "error": fmt.Sprintf("cannot reduce sop_id=%d subscene_id=%d below locked count %d (requested %d)", + tg.SOPID, tg.SubsceneID, counts.LockedCount, tg.Quantity), + }) + return + } + + plan := groupPlan{ + tg: tg, + current: counts.Current, + locked: counts.LockedCount, + pendingOnly: pendingOnly, + } + + if tg.Quantity > counts.Current { + plan.toInsert = tg.Quantity - counts.Current + } else if tg.Quantity < counts.Current { + toDelete := counts.Current - tg.Quantity + if toDelete > pendingOnly { + c.JSON(http.StatusBadRequest, gin.H{ + "error": fmt.Sprintf("cannot delete %d tasks for sop_id=%d subscene_id=%d: only %d pending tasks available", + toDelete, tg.SOPID, tg.SubsceneID, pendingOnly), + }) + return + } + plan.toDelete = toDelete + + // Select IDs to delete (LIFO: newest first) + var deleteIDs []int64 + if err := tx.Select(&deleteIDs, ` + SELECT id FROM tasks + WHERE batch_id = ? AND sop_id = ? AND subscene_id = ? AND deleted_at IS NULL + AND status = 'pending' AND episode_id IS NULL + ORDER BY created_at DESC, id DESC + LIMIT ?`, + batchNumID, tg.SOPID, tg.SubsceneID, toDelete); err != nil { + logger.Printf("[BATCH] Failed to select tasks to delete: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + plan.deleteIDs = deleteIDs + } + + // Validate subscene for inserts + if plan.toInsert > 0 { + if err := tx.Get(new(int), "SELECT 1 FROM sops WHERE id = ? AND deleted_at IS NULL LIMIT 1", tg.SOPID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("sop not found: %d", tg.SOPID)}) + return + } + logger.Printf("[BATCH] Failed to validate sop: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + if err := tx.Get(&plan.subscene, ` + SELECT ss.scene_id, s.name AS scene, ss.name, COALESCE(ss.initial_scene_layout, '') AS layout + FROM subscenes ss + JOIN scenes s ON s.id = ss.scene_id AND s.deleted_at IS NULL + WHERE ss.id = ? AND ss.deleted_at IS NULL LIMIT 1`, tg.SubsceneID); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("subscene not found: %d", tg.SubsceneID)}) + return + } + logger.Printf("[BATCH] Failed to validate subscene: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + } + + batchDelta += plan.toInsert - plan.toDelete + plans = append(plans, plan) + } + + // Quota check (completed-only): batch_delta (new tasks) must not exceed remaining = target_count - completed_count. + remaining := targetCount - orderCompletedCount + if batchDelta > remaining { + c.JSON(http.StatusBadRequest, gin.H{ + "error": fmt.Sprintf("quota exceeded: target_count=%d, completed_count=%d, remaining=%d, batch_delta=%d", targetCount, orderCompletedCount, remaining, batchDelta), + "target_count": targetCount, + "completed_count": orderCompletedCount, + "remaining": remaining, + "batch_delta": batchDelta, + }) + return + } + + // Execute: first all deletes, then all inserts + deletedTaskIDs := make([]string, 0) + for _, plan := range plans { + for _, delID := range plan.deleteIDs { + if _, err := tx.Exec( + "UPDATE tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND deleted_at IS NULL", + now, now, delID, + ); err != nil { + logger.Printf("[BATCH] Failed to soft-delete task %d: %v", delID, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + deletedTaskIDs = append(deletedTaskIDs, fmt.Sprintf("%d", delID)) + } + } + + createdTasks := make([]CreatedTaskItem, 0) + seqOffset := 0 + for _, plan := range plans { + for i := 0; i < plan.toInsert; i++ { + taskID, err := newPublicTaskID(now, seqOffset) + if err != nil { + logger.Printf("[BATCH] Failed to generate task_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + seqOffset++ + + resTask, err := tx.Exec( + `INSERT INTO tasks ( + task_id, batch_id, order_id, sop_id, workstation_id, + scene_id, subscene_id, batch_name, scene_name, subscene_name, + factory_id, organization_id, initial_scene_layout, + status, created_at, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?, ?)`, + taskID, batchNumID, batch.OrderID, plan.tg.SOPID, batch.WorkstationID, + plan.subscene.SceneID, plan.tg.SubsceneID, batchName, plan.subscene.Scene, plan.subscene.Name, + ws.FactoryID, organizationID, plan.subscene.Layout, + now, now, + ) + if err != nil { + logger.Printf("[BATCH] Failed to insert task: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + newTaskID, err := resTask.LastInsertId() + if err != nil { + logger.Printf("[BATCH] Failed to get task insert id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + createdTasks = append(createdTasks, CreatedTaskItem{ + ID: fmt.Sprintf("%d", newTaskID), + TaskID: taskID, + SOPID: fmt.Sprintf("%d", plan.tg.SOPID), + SubsceneID: fmt.Sprintf("%d", plan.tg.SubsceneID), + Status: "pending", + CreatedAt: now.Format(time.RFC3339), + }) + } + } + + if err := tx.Commit(); err != nil { + logger.Printf("[BATCH] Failed to commit transaction: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to adjust batch tasks"}) + return + } + + c.JSON(http.StatusOK, AdjustBatchTasksResponse{ + CreatedTasks: createdTasks, + DeletedTaskIDs: deletedTaskIDs, + }) +} + +// RecallBatch transitions a batch from active or completed to recalled. +// Cancels pending/ready/in_progress tasks in the batch, and appends recalledEpisodeLabel to episodes.labels +// for episodes linked to completed tasks (downstream filtering). +// +// @Summary Recall batch +// @Description Recalls a batch: sets status to recalled, cancels non-terminal tasks, appends recalled_batch to related episodes' labels. Only active or completed batches. +// @Tags batches +// @Produce json +// @Param id path int true "Batch ID" +// @Success 200 {object} PatchBatchResponse +// @Failure 400 {object} map[string]string +// @Failure 404 {object} map[string]string +// @Failure 409 {object} map[string]string +// @Failure 500 {object} map[string]string +// @Router /batches/{id}/recall [post] +func (h *BatchHandler) RecallBatch(c *gin.Context) { + idStr := c.Param("id") + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil || id <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid batch id"}) + return + } + + now := time.Now().UTC() + tx, err := h.db.Beginx() + if err != nil { + logger.Printf("[BATCH] Failed to begin transaction for recall batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + defer func() { _ = tx.Rollback() }() + + type statusRow struct { + Status string `db:"status"` + StartedAt sql.NullTime `db:"started_at"` + } + var cur statusRow + if err := tx.Get(&cur, "SELECT status, started_at FROM batches WHERE id = ? AND deleted_at IS NULL FOR UPDATE", id); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error": "batch not found"}) + return + } + logger.Printf("[BATCH] Failed to query batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + + if cur.Status != "active" && cur.Status != "completed" { + c.JSON(http.StatusConflict, gin.H{ + "error": fmt.Sprintf("cannot recall batch in status '%s'; only active or completed batches can be recalled", cur.Status), + "current_status": cur.Status, + }) + return + } + + toNotify := make([]taskDeviceRow, 0) + if err := tx.Select(&toNotify, ` + SELECT + t.task_id AS task_id, + r.device_id AS device_id, + t.status AS status + FROM tasks t + JOIN workstations ws ON ws.id = t.workstation_id AND ws.deleted_at IS NULL + JOIN robots r ON r.id = ws.robot_id AND r.deleted_at IS NULL + WHERE t.batch_id = ? AND t.deleted_at IS NULL + AND t.status IN ('ready', 'in_progress') + `, id); err != nil { + logger.Printf("[BATCH] Failed to query tasks for recorder notify (batch=%d): %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + + if _, err := tx.Exec( + "UPDATE batches SET status = 'recalled', updated_at = ? WHERE id = ? AND deleted_at IS NULL", + now, id, + ); err != nil { + logger.Printf("[BATCH] Failed to recall batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + + if _, err := tx.Exec( + `UPDATE tasks + SET status = 'cancelled', updated_at = ? + WHERE batch_id = ? AND deleted_at IS NULL + AND status IN ('pending', 'ready', 'in_progress')`, + now, id, + ); err != nil { + logger.Printf("[BATCH] Failed to cascade cancel tasks for recall batch %d: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + + // Episodes are linked to tasks via episodes.task_id (see transfer upload path). tasks.episode_id + // may be unset, so we must join on e.task_id = t.id, not t.episode_id = e.id. + if _, err := tx.Exec( + `UPDATE episodes e + INNER JOIN tasks t ON e.task_id = t.id AND t.deleted_at IS NULL AND e.deleted_at IS NULL + SET e.labels = IF( + JSON_CONTAINS(COALESCE(e.labels, JSON_ARRAY()), JSON_QUOTE(?), '$'), + e.labels, + JSON_ARRAY_APPEND(COALESCE(e.labels, JSON_ARRAY()), '$', ?) + ), + e.updated_at = ? + WHERE t.batch_id = ? AND t.status = 'completed'`, + recalledEpisodeLabel, recalledEpisodeLabel, now, id, + ); err != nil { + logger.Printf("[BATCH] Failed to update episode labels for recall batch %d: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + + var recallWsID int64 + if err := tx.Get(&recallWsID, "SELECT workstation_id FROM batches WHERE id = ? AND deleted_at IS NULL", id); err != nil { + logger.Printf("[BATCH] Failed to read workstation_id for recall batch %d: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + if err := syncWorkstationStatusFromBatchesTx(tx, recallWsID); err != nil { + logger.Printf("[BATCH] Failed to sync workstation status after recall batch %d: %v", id, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + + if err := tx.Commit(); err != nil { + logger.Printf("[BATCH] Failed to commit recall batch transaction: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to recall batch"}) + return + } + + if h.recorderHub != nil && len(toNotify) > 0 { + go h.notifyRecorderCancelTasks(context.Background(), id, toNotify) + } + + startedAtOut := "" + if cur.StartedAt.Valid { + startedAtOut = cur.StartedAt.Time.UTC().Format(time.RFC3339) + } + c.JSON(http.StatusOK, PatchBatchResponse{ + ID: fmt.Sprintf("%d", id), + Status: "recalled", + StartedAt: startedAtOut, + UpdatedAt: now.Format(time.RFC3339), + }) +} + +// ListBatchTasks lists all tasks belonging to a batch. +// +// @Summary List batch tasks +// @Description Returns all tasks belonging to the specified batch +// @Tags batches +// @Produce json +// @Param id path int true "Batch ID" +// @Success 200 {object} ListTasksResponse +// @Failure 400 {object} map[string]string +// @Failure 404 {object} map[string]string +// @Failure 500 {object} map[string]string +// @Router /batches/{id}/tasks [get] +func (h *BatchHandler) ListBatchTasks(c *gin.Context) { + idStr := c.Param("id") + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil || id <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid batch id"}) + return + } + + // Verify batch exists + var exists int + if err := h.db.Get(&exists, "SELECT 1 FROM batches WHERE id = ? AND deleted_at IS NULL LIMIT 1", id); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error": "batch not found"}) + return + } + logger.Printf("[BATCH] Failed to verify batch: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list batch tasks"}) + return + } + + items := make([]TaskListItem, 0) + if err := h.db.Select(&items, ` + SELECT + CAST(id AS CHAR) AS id, + task_id AS task_id, + CAST(batch_id AS CHAR) AS batch_id, + CAST(order_id AS CHAR) AS order_id, + CAST(sop_id AS CHAR) AS sop_id, + CASE WHEN workstation_id IS NULL THEN NULL ELSE CAST(workstation_id AS CHAR) END AS workstation_id, + CAST(scene_id AS CHAR) AS scene_id, + COALESCE(scene_name, '') AS scene_name, + CAST(subscene_id AS CHAR) AS subscene_id, + COALESCE(subscene_name, '') AS subscene_name, + status, + CASE WHEN assigned_at IS NULL THEN NULL ELSE DATE_FORMAT(CONVERT_TZ(assigned_at, @@session.time_zone, '+00:00'), '%%Y-%%m-%%dT%%H:%%i:%%sZ') END AS assigned_at + FROM tasks + WHERE batch_id = ? AND deleted_at IS NULL + ORDER BY created_at ASC, id ASC`, id); err != nil { + logger.Printf("[BATCH] Failed to query batch tasks: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list batch tasks"}) + return + } + + c.JSON(http.StatusOK, ListTasksResponse{ + Tasks: items, + Total: len(items), + Limit: len(items), + Offset: 0, + }) +} + +// syncWorkstationStatusFromBatchesTx sets workstations.status to active if any non-deleted batch +// for this workstation is active; otherwise inactive. If the workstation is offline (e.g. data +// collector logged out) or on break (operator pause), status is left unchanged so those states win +// over batch-driven updates. +func syncWorkstationStatusFromBatchesTx(tx *sqlx.Tx, workstationID int64) error { + if workstationID <= 0 { + return nil + } + var curStatus string + if err := tx.Get(&curStatus, ` + SELECT status FROM workstations + WHERE id = ? AND deleted_at IS NULL + FOR UPDATE + `, workstationID); err != nil { + if err == sql.ErrNoRows { + return nil + } + return err + } + if curStatus == "offline" || curStatus == "break" { + return nil + } + var hasActive bool + if err := tx.Get(&hasActive, ` + SELECT EXISTS( + SELECT 1 FROM batches + WHERE workstation_id = ? AND status = 'active' AND deleted_at IS NULL + ) + `, workstationID); err != nil { + return err + } + newStatus := "inactive" + if hasActive { + newStatus = "active" + } + if curStatus == newStatus { + return nil + } + now := time.Now().UTC() + _, err := tx.Exec(` + UPDATE workstations + SET status = ?, updated_at = ? + WHERE id = ? AND deleted_at IS NULL + `, newStatus, now, workstationID) + return err +} + +// finalizeOpenBatchesAfterOrderCompletedTx runs inside the same transaction as the order -> completed transition. +// It cancels non-terminal tasks (pending, ready, in_progress) on batches that are still pending or active for +// this order, then sets those batches to completed so batch state matches a finished order. +// Workstation status is re-synced for each affected workstation. +// Before mutating tasks, it returns rows for notifyRecorderCancelTasksWithHub (ready -> clear, in_progress -> cancel). +func finalizeOpenBatchesAfterOrderCompletedTx(tx *sqlx.Tx, orderID int64, now time.Time) ([]orderCompletionRecorderNotify, error) { + var wsIDs []int64 + if err := tx.Select(&wsIDs, ` + SELECT DISTINCT workstation_id + FROM batches + WHERE order_id = ? AND deleted_at IS NULL AND status IN ('pending', 'active') + `, orderID); err != nil { + return nil, err + } + if len(wsIDs) == 0 { + return nil, nil + } + + // Collect ready/in_progress tasks for Axon RPC (same join as PATCH batch cancel). + var notifyRaw []taskDeviceBatchRow + if err := tx.Select(¬ifyRaw, ` + SELECT + t.batch_id AS batch_id, + t.task_id AS task_id, + r.device_id AS device_id, + t.status AS status + FROM tasks t + INNER JOIN batches b ON b.id = t.batch_id AND b.deleted_at IS NULL + JOIN workstations ws ON ws.id = t.workstation_id AND ws.deleted_at IS NULL + JOIN robots r ON r.id = ws.robot_id AND r.deleted_at IS NULL + WHERE t.order_id = ? + AND t.deleted_at IS NULL + AND b.status IN ('pending', 'active') + AND t.status IN ('ready', 'in_progress') + `, orderID); err != nil { + return nil, err + } + byBatch := make(map[int64][]taskDeviceRow) + for _, row := range notifyRaw { + byBatch[row.BatchID] = append(byBatch[row.BatchID], taskDeviceRow{ + TaskID: row.TaskID, + DeviceID: row.DeviceID, + Status: row.Status, + }) + } + recorderNotifies := make([]orderCompletionRecorderNotify, 0, len(byBatch)) + for bid, rows := range byBatch { + recorderNotifies = append(recorderNotifies, orderCompletionRecorderNotify{BatchID: bid, Rows: rows}) + } + + if _, err := tx.Exec(` + UPDATE tasks t + INNER JOIN batches b ON b.id = t.batch_id AND b.deleted_at IS NULL + SET t.status = 'cancelled', t.updated_at = ? + WHERE t.order_id = ? + AND t.deleted_at IS NULL + AND b.status IN ('pending', 'active') + AND t.status IN ('pending', 'ready', 'in_progress') + `, now, orderID); err != nil { + return nil, err + } + + if _, err := tx.Exec(` + UPDATE batches + SET status = 'completed', + started_at = COALESCE(started_at, ?), + ended_at = COALESCE(ended_at, ?), + updated_at = ? + WHERE order_id = ? AND deleted_at IS NULL AND status IN ('pending', 'active') + `, now, now, now, orderID); err != nil { + return nil, err + } + + for _, wsID := range wsIDs { + if err := syncWorkstationStatusFromBatchesTx(tx, wsID); err != nil { + return nil, err + } + } + return recorderNotifies, nil +} + +// tryAdvanceBatchStatus checks and advances batch status based on task completion. +// It should be called within or after a task status change transaction. +// - If batch is pending and a task just reached a terminal state: advance to active. +// - If batch is active and ALL tasks are in terminal state: advance to completed. +// Task cancellation to reach an all-cancelled set is done via PATCH batch (cancel), which sets +// the batch to cancelled already; this helper does not advance batch to cancelled. +// This function uses its own transaction and is safe to call after the task update commits. +func tryAdvanceBatchStatus(db *sqlx.DB, batchID int64) { + tx, err := db.Beginx() + if err != nil { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to begin tx for batch %d: %v", batchID, err) + return + } + defer func() { _ = tx.Rollback() }() + + type batchInfo struct { + Status string `db:"status"` + } + var info batchInfo + if err := tx.Get(&info, "SELECT status FROM batches WHERE id = ? AND deleted_at IS NULL FOR UPDATE", batchID); err != nil { + if err != sql.ErrNoRows { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to lock batch %d: %v", batchID, err) + } + return + } + + now := time.Now().UTC() + + switch info.Status { + case "pending": + // Advance to active: a task just reached a terminal state, so this batch has started. + // Then immediately re-evaluate completion: it's possible all tasks are already terminal. + if _, err := tx.Exec( + "UPDATE batches SET status = 'active', started_at = ?, updated_at = ? WHERE id = ? AND status = 'pending' AND deleted_at IS NULL", + now, now, batchID, + ); err != nil { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to advance batch %d to active: %v", batchID, err) + return + } + logger.Printf("[BATCH] Batch %d advanced: pending -> active", batchID) + info.Status = "active" + fallthrough + + case "active": + // Check if ALL non-deleted tasks are in terminal state + var nonTerminalCount int + if err := tx.Get(&nonTerminalCount, ` + SELECT COUNT(*) FROM tasks + WHERE batch_id = ? AND deleted_at IS NULL + AND status NOT IN ('completed', 'failed', 'cancelled')`, + batchID); err != nil { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to count non-terminal tasks for batch %d: %v", batchID, err) + return + } + + // Also ensure there's at least one task + var totalCount int + if err := tx.Get(&totalCount, "SELECT COUNT(*) FROM tasks WHERE batch_id = ? AND deleted_at IS NULL", batchID); err != nil { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to count tasks for batch %d: %v", batchID, err) + return + } + + if totalCount > 0 && nonTerminalCount == 0 { + if _, err := tx.Exec( + "UPDATE batches SET status = 'completed', ended_at = ?, updated_at = ? WHERE id = ? AND status = 'active' AND deleted_at IS NULL", + now, now, batchID, + ); err != nil { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to advance batch %d to completed: %v", batchID, err) + return + } + logger.Printf("[BATCH] Batch %d advanced: active -> completed (all %d tasks in terminal state)", batchID, totalCount) + } + + default: + // Terminal or non-advanceable state; do nothing + return + } + + var wsID int64 + if err := tx.Get(&wsID, "SELECT workstation_id FROM batches WHERE id = ? AND deleted_at IS NULL", batchID); err != nil { + if err != sql.ErrNoRows { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to read workstation_id for batch %d: %v", batchID, err) + } + return + } + if err := syncWorkstationStatusFromBatchesTx(tx, wsID); err != nil { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to sync workstation status for batch %d: %v", batchID, err) + return + } + + if err := tx.Commit(); err != nil { + logger.Printf("[BATCH] tryAdvanceBatchStatus: failed to commit for batch %d: %v", batchID, err) + } +} diff --git a/internal/api/handlers/common.go b/internal/api/handlers/common.go index ae7ee6e..5130009 100644 --- a/internal/api/handlers/common.go +++ b/internal/api/handlers/common.go @@ -9,21 +9,26 @@ import ( "encoding/json" "strings" "time" + "unicode" + "unicode/utf8" ) // maxSlugLength matches VARCHAR(100) for slug columns in the schema. const maxSlugLength = 100 // invalidSlugUserMessage is returned when slug fails isValidSlug (length or charset). -const invalidSlugUserMessage = "slug must be at most 100 characters and contain only alphanumeric characters and hyphens" +const invalidSlugUserMessage = "slug must be at most 100 characters and contain only letters, digits, and hyphens" -// isValidSlug checks non-empty slug, length <= maxSlugLength, and alphanumeric plus hyphen only. +// isValidSlug checks non-empty slug, length <= maxSlugLength (in runes), and allows Unicode letters/digits plus hyphen. func isValidSlug(s string) bool { - if len(s) == 0 || len(s) > maxSlugLength { + if s == "" { + return false + } + if utf8.RuneCountInString(s) > maxSlugLength { return false } for _, c := range s { - if (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && (c < '0' || c > '9') && c != '-' { + if !unicode.IsLetter(c) && !unicode.IsDigit(c) && c != '-' { return false } } diff --git a/internal/api/handlers/episode.go b/internal/api/handlers/episode.go index ca8e626..cadff8a 100644 --- a/internal/api/handlers/episode.go +++ b/internal/api/handlers/episode.go @@ -7,8 +7,10 @@ package handlers import ( "database/sql" + "encoding/json" "net/http" "strconv" + "strings" "time" "github.com/gin-gonic/gin" @@ -45,6 +47,7 @@ type episodeRow struct { CloudProcessed bool `db:"cloud_processed"` CloudSyncedAt sql.NullTime `db:"cloud_synced_at"` CreatedAt time.Time `db:"created_at"` + LabelsJSON sql.NullString `db:"labels"` } // Episode represents an episode in the API response @@ -63,6 +66,7 @@ type Episode struct { CloudProcessed bool `json:"cloud_processed"` CloudSyncedAt *string `json:"cloud_synced_at"` CreatedAt string `json:"created_at"` + Labels []string `json:"labels"` } // EpisodeListResponse represents the response for listing episodes @@ -106,6 +110,21 @@ func nullableTime(value sql.NullTime) *string { return &v } +// episodeLabelsFromDB parses episodes.labels JSON (string array). Invalid or empty yields empty slice. +func episodeLabelsFromDB(ns sql.NullString) []string { + if !ns.Valid || strings.TrimSpace(ns.String) == "" { + return []string{} + } + var out []string + if err := json.Unmarshal([]byte(ns.String), &out); err != nil { + return []string{} + } + if out == nil { + return []string{} + } + return out +} + // ListEpisodes returns a list of episodes with filtering and pagination // // @Summary List episodes @@ -152,7 +171,8 @@ func (h *EpisodeHandler) ListEpisodes(c *gin.Context) { COALESCE(e.qa_score, 0) as qa_score, e.auto_approved, e.cloud_processed, - e.created_at + e.created_at, + e.labels FROM episodes e WHERE e.deleted_at IS NULL ` @@ -250,6 +270,7 @@ func (h *EpisodeHandler) ListEpisodes(c *gin.Context) { CloudProcessed: r.CloudProcessed, CloudSyncedAt: nullableTime(r.CloudSyncedAt), CreatedAt: r.CreatedAt.UTC().Format(time.RFC3339), + Labels: episodeLabelsFromDB(r.LabelsJSON), } } @@ -292,7 +313,8 @@ func (h *EpisodeHandler) GetEpisode(c *gin.Context) { i.inspected_at, e.cloud_processed, e.cloud_synced_at, - e.created_at + e.created_at, + e.labels FROM episodes e LEFT JOIN inspections i ON i.episode_id = e.id LEFT JOIN inspectors ins ON ins.id = i.inspector_id @@ -327,5 +349,6 @@ func (h *EpisodeHandler) GetEpisode(c *gin.Context) { CloudProcessed: row.CloudProcessed, CloudSyncedAt: nullableTime(row.CloudSyncedAt), CreatedAt: row.CreatedAt.UTC().Format(time.RFC3339), + Labels: episodeLabelsFromDB(row.LabelsJSON), }) } diff --git a/internal/api/handlers/order.go b/internal/api/handlers/order.go index ee173d9..2c71349 100644 --- a/internal/api/handlers/order.go +++ b/internal/api/handlers/order.go @@ -5,6 +5,7 @@ package handlers import ( + "context" "database/sql" "encoding/json" "fmt" @@ -14,18 +15,22 @@ import ( "time" "archebase.com/keystone-edge/internal/logger" + "archebase.com/keystone-edge/internal/services" "github.com/gin-gonic/gin" "github.com/jmoiron/sqlx" ) // OrderHandler handles order-related HTTP requests. type OrderHandler struct { - db *sqlx.DB + db *sqlx.DB + recorderHub *services.RecorderHub + recorderRPCTimeout time.Duration } // NewOrderHandler creates a new OrderHandler. -func NewOrderHandler(db *sqlx.DB) *OrderHandler { - return &OrderHandler{db: db} +// recorderHub may be nil (skips Axon cancel RPCs after finalizing open batches when an order is completed via target_count). +func NewOrderHandler(db *sqlx.DB, recorderHub *services.RecorderHub, recorderRPCTimeout time.Duration) *OrderHandler { + return &OrderHandler{db: db, recorderHub: recorderHub, recorderRPCTimeout: recorderRPCTimeout} } // RegisterRoutes registers order routes under the provided router group. @@ -48,7 +53,10 @@ type OrderResponse struct { SceneID string `json:"scene_id"` Name string `json:"name"` TargetCount int `json:"target_count"` + TaskCount int `json:"task_count"` CompletedCount int `json:"completed_count"` + CancelledCount int `json:"cancelled_count"` + FailedCount int `json:"failed_count"` Status string `json:"status"` Priority string `json:"priority"` Deadline string `json:"deadline,omitempty"` @@ -85,7 +93,10 @@ type orderRow struct { SceneID int64 `db:"scene_id"` Name string `db:"name"` TargetCount int `db:"target_count"` + TaskCount int `db:"task_count"` CompletedCount int `db:"completed_count"` + CancelledCount int `db:"cancelled_count"` + FailedCount int `db:"failed_count"` Status string `db:"status"` Priority string `db:"priority"` Deadline sql.NullTime `db:"deadline"` @@ -123,7 +134,10 @@ func (h *OrderHandler) ListOrders(c *gin.Context) { CAST(o.metadata AS CHAR) AS metadata, o.created_at, o.updated_at, - (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'completed' AND t.deleted_at IS NULL) AS completed_count + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.deleted_at IS NULL) AS task_count, + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'completed' AND t.deleted_at IS NULL) AS completed_count, + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'cancelled' AND t.deleted_at IS NULL) AS cancelled_count, + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'failed' AND t.deleted_at IS NULL) AS failed_count FROM orders o WHERE o.deleted_at IS NULL ORDER BY o.id DESC @@ -162,7 +176,10 @@ func (h *OrderHandler) ListOrders(c *gin.Context) { SceneID: fmt.Sprintf("%d", r.SceneID), Name: r.Name, TargetCount: r.TargetCount, + TaskCount: r.TaskCount, CompletedCount: r.CompletedCount, + CancelledCount: r.CancelledCount, + FailedCount: r.FailedCount, Status: r.Status, Priority: r.Priority, Deadline: deadline, @@ -196,7 +213,10 @@ func (h *OrderHandler) GetOrder(c *gin.Context) { CAST(o.metadata AS CHAR) AS metadata, o.created_at, o.updated_at, - (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'completed' AND t.deleted_at IS NULL) AS completed_count + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.deleted_at IS NULL) AS task_count, + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'completed' AND t.deleted_at IS NULL) AS completed_count, + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'cancelled' AND t.deleted_at IS NULL) AS cancelled_count, + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.status = 'failed' AND t.deleted_at IS NULL) AS failed_count FROM orders o WHERE o.id = ? AND o.deleted_at IS NULL ` @@ -237,7 +257,10 @@ func (h *OrderHandler) GetOrder(c *gin.Context) { SceneID: fmt.Sprintf("%d", r.SceneID), Name: r.Name, TargetCount: r.TargetCount, + TaskCount: r.TaskCount, CompletedCount: r.CompletedCount, + CancelledCount: r.CancelledCount, + FailedCount: r.FailedCount, Status: r.Status, Priority: r.Priority, Deadline: deadline, @@ -386,7 +409,10 @@ func (h *OrderHandler) CreateOrder(c *gin.Context) { SceneID: fmt.Sprintf("%d", sceneID), Name: req.Name, TargetCount: req.TargetCount, + TaskCount: 0, CompletedCount: 0, + CancelledCount: 0, + FailedCount: 0, Status: "created", Priority: req.Priority, Deadline: deadlineOut, @@ -451,13 +477,39 @@ func (h *OrderHandler) UpdateOrder(c *gin.Context) { args = append(args, sceneID) } + var autoStatusFromTarget *string if req.TargetCount != nil { if *req.TargetCount <= 0 { c.JSON(http.StatusBadRequest, gin.H{"error": "target_count must be > 0"}) return } + type orderTargetCtx struct { + Status string `db:"status"` + CompletedCount int `db:"completed_count"` + } + var octx orderTargetCtx + if err := h.db.Get(&octx, ` + SELECT o.status, + (SELECT COUNT(*) FROM tasks t WHERE t.order_id = o.id AND t.deleted_at IS NULL AND t.status = 'completed') AS completed_count + FROM orders o WHERE o.id = ? AND o.deleted_at IS NULL`, id); err != nil { + logger.Printf("[ORDER] Failed to load order for target_count update: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update order"}) + return + } + if *req.TargetCount < octx.CompletedCount { + c.JSON(http.StatusBadRequest, gin.H{"error": "target_count cannot be less than completed_count"}) + return + } updates = append(updates, "target_count = ?") args = append(args, *req.TargetCount) + switch { + case *req.TargetCount == octx.CompletedCount && octx.Status != "cancelled" && octx.Status != "completed": + s := "completed" + autoStatusFromTarget = &s + case octx.Status == "completed" && *req.TargetCount > octx.CompletedCount: + s := "in_progress" + autoStatusFromTarget = &s + } } if req.Name != nil { @@ -519,7 +571,10 @@ func (h *OrderHandler) UpdateOrder(c *gin.Context) { args = append(args, priority) } - if req.Status != nil { + if autoStatusFromTarget != nil { + updates = append(updates, "status = ?") + args = append(args, *autoStatusFromTarget) + } else if req.Status != nil { status := strings.TrimSpace(*req.Status) if status == "" { c.JSON(http.StatusBadRequest, gin.H{"error": "status cannot be empty"}) @@ -543,10 +598,45 @@ func (h *OrderHandler) UpdateOrder(c *gin.Context) { args = append(args, now, id) query := fmt.Sprintf("UPDATE orders SET %s WHERE id = ? AND deleted_at IS NULL", strings.Join(updates, ", ")) - if _, err := h.db.Exec(query, args...); err != nil { - logger.Printf("[ORDER] Failed to update order: %v", err) - c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update order"}) - return + if autoStatusFromTarget != nil && *autoStatusFromTarget == "completed" { + tx, err := h.db.Beginx() + if err != nil { + logger.Printf("[ORDER] Failed to begin tx for order update: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update order"}) + return + } + defer func() { _ = tx.Rollback() }() + if _, err := tx.Exec(query, args...); err != nil { + logger.Printf("[ORDER] Failed to update order: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update order"}) + return + } + orderFinalizeRecorderNotifies, finErr := finalizeOpenBatchesAfterOrderCompletedTx(tx, id, now) + if finErr != nil { + logger.Printf("[ORDER] Failed to finalize open batches after order completed via target_count: %v", finErr) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update order"}) + return + } + if err := tx.Commit(); err != nil { + logger.Printf("[ORDER] Failed to commit order update: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update order"}) + return + } + if len(orderFinalizeRecorderNotifies) > 0 && h.recorderHub != nil { + notifies := orderFinalizeRecorderNotifies + go func() { + ctx := context.Background() + for _, n := range notifies { + notifyRecorderCancelTasksWithHub(ctx, h.recorderHub, h.recorderRPCTimeout, n.BatchID, n.Rows) + } + }() + } + } else { + if _, err := h.db.Exec(query, args...); err != nil { + logger.Printf("[ORDER] Failed to update order: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update order"}) + return + } } h.GetOrder(c) @@ -615,3 +705,97 @@ func (h *OrderHandler) DeleteOrder(c *gin.Context) { c.Status(http.StatusNoContent) } + +// tryAdvanceOrderStatus advances order status based on completed tasks count. +// +// Rules (completed-only): +// - created -> in_progress when there is at least one completed task +// - in_progress -> completed when completed_count == target_count +// +// This helper uses its own transaction and is safe to call after task updates commit. +// recorderHub may be nil (skips Axon clear/cancel RPCs after finalizing open batches). +func tryAdvanceOrderStatus(db *sqlx.DB, orderID int64, recorderHub *services.RecorderHub, recorderRPCTimeout time.Duration) { + tx, err := db.Beginx() + if err != nil { + logger.Printf("[ORDER] tryAdvanceOrderStatus: failed to begin tx for order %d: %v", orderID, err) + return + } + defer func() { _ = tx.Rollback() }() + + var orderFinalizeRecorderNotifies []orderCompletionRecorderNotify + + type orderInfo struct { + Status string `db:"status"` + TargetCount int `db:"target_count"` + } + var info orderInfo + if err := tx.Get(&info, "SELECT status, target_count FROM orders WHERE id = ? AND deleted_at IS NULL FOR UPDATE", orderID); err != nil { + if err != sql.ErrNoRows { + logger.Printf("[ORDER] tryAdvanceOrderStatus: failed to lock order %d: %v", orderID, err) + } + return + } + + // Only auto-advance non-terminal statuses. + if info.Status != "created" && info.Status != "in_progress" { + return + } + if info.TargetCount <= 0 { + return + } + + var completedCount int + if err := tx.Get(&completedCount, ` + SELECT COUNT(*) FROM tasks + WHERE order_id = ? AND deleted_at IS NULL AND status = 'completed' + `, orderID); err != nil { + logger.Printf("[ORDER] tryAdvanceOrderStatus: failed to count completed tasks for order %d: %v", orderID, err) + return + } + + now := time.Now().UTC() + + if info.Status == "created" && completedCount > 0 { + if _, err := tx.Exec( + "UPDATE orders SET status = 'in_progress', updated_at = ? WHERE id = ? AND status = 'created' AND deleted_at IS NULL", + now, orderID, + ); err != nil { + logger.Printf("[ORDER] tryAdvanceOrderStatus: failed to advance order %d created->in_progress: %v", orderID, err) + return + } + info.Status = "in_progress" + } + + if info.Status == "in_progress" && completedCount == info.TargetCount { + if _, err := tx.Exec( + "UPDATE orders SET status = 'completed', updated_at = ? WHERE id = ? AND status = 'in_progress' AND deleted_at IS NULL", + now, orderID, + ); err != nil { + logger.Printf("[ORDER] tryAdvanceOrderStatus: failed to advance order %d in_progress->completed: %v", orderID, err) + return + } + // Close any still-open batches for this order: cancel non-terminal tasks, then mark batches completed. + var finErr error + orderFinalizeRecorderNotifies, finErr = finalizeOpenBatchesAfterOrderCompletedTx(tx, orderID, now) + if finErr != nil { + logger.Printf("[ORDER] tryAdvanceOrderStatus: failed to finalize open batches for completed order %d: %v", orderID, finErr) + return + } + } + + if err := tx.Commit(); err != nil { + logger.Printf("[ORDER] tryAdvanceOrderStatus: failed to commit for order %d: %v", orderID, err) + return + } + + // Best-effort: after commit, notify Axon recorder for ready/in_progress tasks we cancelled (same as PATCH batch cancel). + if len(orderFinalizeRecorderNotifies) > 0 && recorderHub != nil { + notifies := orderFinalizeRecorderNotifies + go func() { + ctx := context.Background() + for _, n := range notifies { + notifyRecorderCancelTasksWithHub(ctx, recorderHub, recorderRPCTimeout, n.BatchID, n.Rows) + } + }() + } +} diff --git a/internal/api/handlers/robot.go b/internal/api/handlers/robot.go index 6f88684..195d82c 100644 --- a/internal/api/handlers/robot.go +++ b/internal/api/handlers/robot.go @@ -57,6 +57,15 @@ type RobotListResponse struct { Robots []RobotResponse `json:"robots"` } +// DeviceConnectionResponse is an in-memory connection snapshot keyed by Axon device_id (no database access). +type DeviceConnectionResponse struct { + DeviceID string `json:"device_id"` + Connected bool `json:"connected"` + ConnectedAt string `json:"connected_at,omitempty"` + RecorderConnected bool `json:"recorder_connected"` + TransferConnected bool `json:"transfer_connected"` +} + // CreateRobotRequest represents the request body for creating a robot. type CreateRobotRequest struct { RobotTypeID string `json:"robot_type_id"` @@ -83,6 +92,7 @@ type CreateRobotResponse struct { func (h *RobotHandler) RegisterRoutes(apiV1 *gin.RouterGroup) { apiV1.GET("/robots", h.ListRobots) apiV1.POST("/robots", h.CreateRobot) + apiV1.GET("/devices/:device_id/connection", h.GetDeviceConnection) apiV1.GET("/robots/:id", h.GetRobot) apiV1.PUT("/robots/:id", h.UpdateRobot) apiV1.DELETE("/robots/:id", h.DeleteRobot) @@ -109,20 +119,32 @@ func robotMetadataFromDB(ns sql.NullString) interface{} { } func (h *RobotHandler) connectionState(deviceID string) (connected bool, connectedAt string) { + connected, connectedAt, _, _ = h.connectionStateDetailed(deviceID) + return connected, connectedAt +} + +// connectionStateDetailed returns hub presence for recorder and transfer (no DB). +func (h *RobotHandler) connectionStateDetailed(deviceID string) (connected bool, connectedAt string, recorderConnected bool, transferConnected bool) { + deviceID = strings.TrimSpace(deviceID) + if deviceID == "" { + return false, "", false, false + } if h.recorderHub == nil || h.transferHub == nil { - return false, "" + return false, "", false, false } recConn := h.recorderHub.Get(deviceID) transConn := h.transferHub.Get(deviceID) - connected = recConn != nil && transConn != nil + recorderConnected = recConn != nil + transferConnected = transConn != nil + connected = recorderConnected && transferConnected if !connected { - return false, "" + return false, "", recorderConnected, transferConnected } t := recConn.ConnectedAt if transConn.ConnectedAt.After(t) { t = transConn.ConnectedAt } - return true, t.UTC().Format(time.RFC3339) + return true, t.UTC().Format(time.RFC3339), recorderConnected, transferConnected } func (h *RobotHandler) responseFromRow(r robotRow) RobotResponse { @@ -405,6 +427,34 @@ func (h *RobotHandler) CreateRobot(c *gin.Context) { }) } +// GetDeviceConnection returns recorder+transfer WebSocket presence for a device_id without touching the database. +// +// @Summary Device connection status +// @Description In-memory connection snapshot (RecorderHub + TransferHub). Same rules as GET /robots/:id field `connected`. +// @Tags robots +// @Accept json +// @Produce json +// @Param device_id path string true "Axon device id" +// @Success 200 {object} DeviceConnectionResponse +// @Failure 400 {object} map[string]string +// @Router /devices/{device_id}/connection [get] +func (h *RobotHandler) GetDeviceConnection(c *gin.Context) { + raw := c.Param("device_id") + deviceID := strings.TrimSpace(raw) + if deviceID == "" { + c.JSON(http.StatusBadRequest, gin.H{"error": "device_id is required"}) + return + } + connected, connectedAt, rec, trans := h.connectionStateDetailed(deviceID) + c.JSON(http.StatusOK, DeviceConnectionResponse{ + DeviceID: deviceID, + Connected: connected, + ConnectedAt: connectedAt, + RecorderConnected: rec, + TransferConnected: trans, + }) +} + // GetRobot handles getting a single robot by ID. // // @Summary Get robot diff --git a/internal/api/handlers/skill.go b/internal/api/handlers/skill.go index 94e84c6..83fa430 100644 --- a/internal/api/handlers/skill.go +++ b/internal/api/handlers/skill.go @@ -51,7 +51,6 @@ func NewSkillHandler(db *sqlx.DB) *SkillHandler { type SkillResponse struct { ID string `json:"id"` Slug string `json:"slug"` - Name string `json:"name"` Description string `json:"description,omitempty"` Version string `json:"version,omitempty"` Metadata interface{} `json:"metadata,omitempty"` @@ -67,7 +66,6 @@ type SkillListResponse struct { // CreateSkillRequest represents the request body for creating a skill. type CreateSkillRequest struct { Slug string `json:"slug"` - Name string `json:"name"` Description string `json:"description,omitempty"` Version string `json:"version,omitempty"` Metadata interface{} `json:"metadata,omitempty"` @@ -77,7 +75,6 @@ type CreateSkillRequest struct { type CreateSkillResponse struct { ID string `json:"id"` Slug string `json:"slug"` - Name string `json:"name"` Version string `json:"version"` CreatedAt string `json:"created_at"` } @@ -86,7 +83,6 @@ type CreateSkillResponse struct { // Metadata uses optionalJSONPatch so JSON null (clear) is distinct from omitting the key (unchanged). type UpdateSkillRequest struct { Slug *string `json:"slug,omitempty"` - Name *string `json:"name,omitempty"` Description *string `json:"description,omitempty"` Version *string `json:"version,omitempty"` Metadata optionalJSONPatch `json:"metadata,omitempty"` @@ -105,7 +101,6 @@ func (h *SkillHandler) RegisterRoutes(apiV1 *gin.RouterGroup) { type skillRow struct { ID int64 `db:"id"` Slug string `db:"slug"` - Name string `db:"name"` Description sql.NullString `db:"description"` Version sql.NullString `db:"version"` Metadata sql.NullString `db:"metadata"` @@ -128,7 +123,6 @@ func (h *SkillHandler) ListSkills(c *gin.Context) { SELECT id, slug, - name, description, version, metadata, @@ -172,7 +166,6 @@ func (h *SkillHandler) ListSkills(c *gin.Context) { skills = append(skills, SkillResponse{ ID: fmt.Sprintf("%d", s.ID), Slug: s.Slug, - Name: s.Name, Description: description, Version: version, Metadata: metadata, @@ -211,7 +204,6 @@ func (h *SkillHandler) GetSkill(c *gin.Context) { SELECT id, slug, - name, description, version, metadata, @@ -256,7 +248,6 @@ func (h *SkillHandler) GetSkill(c *gin.Context) { c.JSON(http.StatusOK, SkillResponse{ ID: fmt.Sprintf("%d", s.ID), Slug: s.Slug, - Name: s.Name, Description: description, Version: version, Metadata: metadata, @@ -285,7 +276,6 @@ func (h *SkillHandler) CreateSkill(c *gin.Context) { } req.Slug = strings.TrimSpace(req.Slug) - req.Name = strings.TrimSpace(req.Name) req.Description = strings.TrimSpace(req.Description) req.Version = strings.TrimSpace(req.Version) @@ -297,10 +287,6 @@ func (h *SkillHandler) CreateSkill(c *gin.Context) { c.JSON(http.StatusBadRequest, gin.H{"error": invalidSlugUserMessage}) return } - if req.Name == "" { - c.JSON(http.StatusBadRequest, gin.H{"error": "name is required"}) - return - } version := "1.0.0" if req.Version != "" { @@ -333,15 +319,13 @@ func (h *SkillHandler) CreateSkill(c *gin.Context) { result, err := h.db.Exec( `INSERT INTO skills ( slug, - name, description, version, metadata, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?)`, + ) VALUES (?, ?, ?, ?, ?, ?)`, req.Slug, - req.Name, descriptionStr, version, metadataStr, @@ -364,7 +348,6 @@ func (h *SkillHandler) CreateSkill(c *gin.Context) { c.JSON(http.StatusCreated, CreateSkillResponse{ ID: fmt.Sprintf("%d", id), Slug: req.Slug, - Name: req.Name, Version: version, CreatedAt: now.Format(time.RFC3339), }) @@ -448,14 +431,6 @@ func (h *SkillHandler) UpdateSkill(c *gin.Context) { } } - if req.Name != nil { - name := strings.TrimSpace(*req.Name) - if name != "" { - updates = append(updates, "name = ?") - args = append(args, name) - } - } - if req.Description != nil { description := strings.TrimSpace(*req.Description) var descStr sql.NullString @@ -519,7 +494,7 @@ func (h *SkillHandler) UpdateSkill(c *gin.Context) { // Fetch the updated skill var s skillRow - err = h.db.Get(&s, "SELECT id, slug, name, description, version, metadata, created_at, updated_at FROM skills WHERE id = ?", id) + err = h.db.Get(&s, "SELECT id, slug, description, version, metadata, created_at, updated_at FROM skills WHERE id = ?", id) if err != nil { logger.Printf("[SKILL] Failed to fetch updated skill: %v", err) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to get updated skill"}) @@ -550,7 +525,6 @@ func (h *SkillHandler) UpdateSkill(c *gin.Context) { c.JSON(http.StatusOK, SkillResponse{ ID: fmt.Sprintf("%d", s.ID), Slug: s.Slug, - Name: s.Name, Description: description, Version: version, Metadata: metadata, diff --git a/internal/api/handlers/sop.go b/internal/api/handlers/sop.go index 0939e56..24b935f 100644 --- a/internal/api/handlers/sop.go +++ b/internal/api/handlers/sop.go @@ -32,7 +32,6 @@ func NewSOPHandler(db *sqlx.DB) *SOPHandler { // SOPResponse represents an SOP in the response. type SOPResponse struct { ID string `json:"id"` - Name string `json:"name"` Slug string `json:"slug"` Description string `json:"description,omitempty"` SkillSequence []string `json:"skill_sequence"` @@ -48,7 +47,6 @@ type SOPListResponse struct { // CreateSOPRequest represents the request body for creating an SOP. type CreateSOPRequest struct { - Name string `json:"name"` Slug string `json:"slug"` Description string `json:"description,omitempty"` SkillSequence []string `json:"skill_sequence"` @@ -58,7 +56,6 @@ type CreateSOPRequest struct { // CreateSOPResponse represents the response for creating an SOP. type CreateSOPResponse struct { ID string `json:"id"` - Name string `json:"name"` Slug string `json:"slug"` SkillSequence []string `json:"skill_sequence"` Version string `json:"version"` @@ -67,7 +64,6 @@ type CreateSOPResponse struct { // UpdateSOPRequest represents the request body for updating an SOP. type UpdateSOPRequest struct { - Name *string `json:"name,omitempty"` Slug *string `json:"slug,omitempty"` Description *string `json:"description,omitempty"` SkillSequence *[]string `json:"skill_sequence,omitempty"` @@ -86,7 +82,6 @@ func (h *SOPHandler) RegisterRoutes(apiV1 *gin.RouterGroup) { // sopRow represents an SOP in the database type sopRow struct { ID int64 `db:"id"` - Name string `db:"name"` Slug string `db:"slug"` Description sql.NullString `db:"description"` SkillSequence string `db:"skill_sequence"` @@ -109,7 +104,6 @@ func (h *SOPHandler) ListSOPs(c *gin.Context) { query := ` SELECT id, - name, slug, description, skill_sequence, @@ -150,7 +144,6 @@ func (h *SOPHandler) ListSOPs(c *gin.Context) { sops = append(sops, SOPResponse{ ID: fmt.Sprintf("%d", s.ID), - Name: s.Name, Slug: s.Slug, Description: description, SkillSequence: skillSequence, @@ -189,7 +182,6 @@ func (h *SOPHandler) GetSOP(c *gin.Context) { query := ` SELECT id, - name, slug, description, skill_sequence, @@ -231,7 +223,6 @@ func (h *SOPHandler) GetSOP(c *gin.Context) { c.JSON(http.StatusOK, SOPResponse{ ID: fmt.Sprintf("%d", s.ID), - Name: s.Name, Slug: s.Slug, Description: description, SkillSequence: skillSequence, @@ -260,16 +251,10 @@ func (h *SOPHandler) CreateSOP(c *gin.Context) { return } - req.Name = strings.TrimSpace(req.Name) req.Slug = strings.TrimSpace(req.Slug) req.Description = strings.TrimSpace(req.Description) req.Version = strings.TrimSpace(req.Version) - if req.Name == "" { - c.JSON(http.StatusBadRequest, gin.H{"error": "name is required"}) - return - } - if req.Slug == "" { c.JSON(http.StatusBadRequest, gin.H{"error": "slug is required"}) return @@ -313,15 +298,13 @@ func (h *SOPHandler) CreateSOP(c *gin.Context) { result, err := h.db.Exec( `INSERT INTO sops ( - name, slug, description, skill_sequence, version, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?)`, - req.Name, + ) VALUES (?, ?, ?, ?, ?, ?)`, req.Slug, descriptionStr, string(skillSeqJSON), @@ -344,7 +327,6 @@ func (h *SOPHandler) CreateSOP(c *gin.Context) { c.JSON(http.StatusCreated, CreateSOPResponse{ ID: fmt.Sprintf("%d", id), - Name: req.Name, Slug: req.Slug, SkillSequence: req.SkillSequence, Version: version, @@ -400,14 +382,6 @@ func (h *SOPHandler) UpdateSOP(c *gin.Context) { updates := []string{} args := []interface{}{} - if req.Name != nil { - name := strings.TrimSpace(*req.Name) - if name != "" { - updates = append(updates, "name = ?") - args = append(args, name) - } - } - if req.Slug != nil { slug := strings.TrimSpace(*req.Slug) if slug == "" { @@ -498,7 +472,7 @@ func (h *SOPHandler) UpdateSOP(c *gin.Context) { // Fetch the updated SOP var s sopRow - err = h.db.Get(&s, "SELECT id, name, slug, description, skill_sequence, version, created_at, updated_at FROM sops WHERE id = ?", id) + err = h.db.Get(&s, "SELECT id, slug, description, skill_sequence, version, created_at, updated_at FROM sops WHERE id = ?", id) if err != nil { logger.Printf("[SOP] Failed to fetch updated SOP: %v", err) c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to get updated SOP"}) @@ -525,7 +499,6 @@ func (h *SOPHandler) UpdateSOP(c *gin.Context) { c.JSON(http.StatusOK, SOPResponse{ ID: fmt.Sprintf("%d", s.ID), - Name: s.Name, Slug: s.Slug, Description: description, SkillSequence: skillSequence, diff --git a/internal/api/handlers/station.go b/internal/api/handlers/station.go index 7cab4f3..a1d3e5c 100644 --- a/internal/api/handlers/station.go +++ b/internal/api/handlers/station.go @@ -70,6 +70,7 @@ func stationMetadataFromDB(ns sql.NullString) interface{} { // RegisterRoutes registers station related routes. func (h *StationHandler) RegisterRoutes(apiV1 *gin.RouterGroup) { + apiV1.POST("/stations/lookup", h.LookupStations) apiV1.POST("/stations", h.CreateStation) apiV1.GET("/stations", h.ListStations) apiV1.GET("/stations/:id", h.GetStation) @@ -285,7 +286,7 @@ func (h *StationHandler) CreateStation(c *gin.Context) { dcInfo.OperatorID, // collector_operator_id robotInfo.FactoryID, req.Name, - "inactive", + "offline", metadataStr, createdAt, createdAt, @@ -316,7 +317,7 @@ func (h *StationHandler) CreateStation(c *gin.Context) { RobotID: fmt.Sprintf("%d", robotInfo.ID), DataCollectorID: fmt.Sprintf("%d", dcInfo.ID), FactoryID: fmt.Sprintf("%d", robotInfo.FactoryID), - Status: "inactive", + Status: "offline", Name: req.Name, Metadata: metaOut, CreatedAt: createdAtISO.Format(time.RFC3339), @@ -384,7 +385,7 @@ func (h *StationHandler) ListStations(c *gin.Context) { } response = append(response, StationResponse{ - ID: fmt.Sprintf("ws_%d", s.ID), + ID: fmt.Sprintf("%d", s.ID), RobotID: fmt.Sprintf("%d", s.RobotID), DataCollectorID: fmt.Sprintf("%d", s.DataCollectorID), FactoryID: fmt.Sprintf("%d", s.FactoryID), @@ -399,6 +400,159 @@ func (h *StationHandler) ListStations(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"stations": response}) } +const maxStationLookupIDs = 500 + +// LookupStationsRequest is the body for POST /stations/lookup. +type LookupStationsRequest struct { + WorkstationIDs []any `json:"workstation_ids"` +} + +// StationLookupItem is a workstation snapshot for admin/history views (includes soft-deleted rows). +type StationLookupItem struct { + ID string `json:"id"` + RobotID string `json:"robot_id"` + DataCollectorID string `json:"data_collector_id"` + FactoryID string `json:"factory_id"` + Name string `json:"name"` + Status string `json:"status"` + RobotName string `json:"robot_name,omitempty"` + RobotSerial string `json:"robot_serial,omitempty"` + CollectorName string `json:"collector_name,omitempty"` + CollectorOperatorID string `json:"collector_operator_id,omitempty"` + Deleted bool `json:"deleted"` +} + +func parseWorkstationIDFromLookupAny(v any) (int64, bool) { + if v == nil { + return 0, false + } + switch x := v.(type) { + case float64: + if x < 1 || x != float64(int64(x)) { + return 0, false + } + return int64(x), true + case string: + s := strings.TrimSpace(x) + s = strings.TrimPrefix(strings.TrimPrefix(s, "ws_"), "WS_") + if s == "" { + return 0, false + } + id, err := strconv.ParseInt(s, 10, 64) + if err != nil || id <= 0 { + return 0, false + } + return id, true + case json.Number: + id, err := strconv.ParseInt(strings.TrimSpace(string(x)), 10, 64) + if err != nil || id <= 0 { + return 0, false + } + return id, true + default: + return 0, false + } +} + +// LookupStations returns workstation snapshots by id, including soft-deleted rows. +func (h *StationHandler) LookupStations(c *gin.Context) { + var req LookupStationsRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"}) + return + } + if len(req.WorkstationIDs) == 0 { + c.JSON(http.StatusOK, gin.H{"stations": []StationLookupItem{}}) + return + } + + seen := make(map[int64]struct{}) + ids := make([]int64, 0, len(req.WorkstationIDs)) + for _, raw := range req.WorkstationIDs { + id, ok := parseWorkstationIDFromLookupAny(raw) + if !ok { + continue + } + if _, dup := seen[id]; dup { + continue + } + seen[id] = struct{}{} + ids = append(ids, id) + if len(ids) >= maxStationLookupIDs { + break + } + } + if len(ids) == 0 { + c.JSON(http.StatusOK, gin.H{"stations": []StationLookupItem{}}) + return + } + + query, args, err := sqlx.In(` + SELECT + id, robot_id, + COALESCE(robot_name, '') AS robot_name, + COALESCE(robot_serial, '') AS robot_serial, + data_collector_id, + COALESCE(collector_name, '') AS collector_name, + COALESCE(collector_operator_id, '') AS collector_operator_id, + factory_id, + name, status, + deleted_at + FROM workstations + WHERE id IN (?) + `, ids) + if err != nil { + logger.Printf("[STATION] Failed to build lookup query: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to lookup stations"}) + return + } + query = h.db.Rebind(query) + + type lookupRow struct { + ID int64 `db:"id"` + RobotID int64 `db:"robot_id"` + RobotName string `db:"robot_name"` + RobotSerial string `db:"robot_serial"` + DataCollectorID int64 `db:"data_collector_id"` + CollectorName string `db:"collector_name"` + CollectorOperatorID string `db:"collector_operator_id"` + FactoryID int64 `db:"factory_id"` + Name sql.NullString `db:"name"` + Status string `db:"status"` + DeletedAt sql.NullTime `db:"deleted_at"` + } + + var rows []lookupRow + if err := h.db.Select(&rows, query, args...); err != nil { + logger.Printf("[STATION] Failed to lookup stations: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to lookup stations"}) + return + } + + out := make([]StationLookupItem, 0, len(rows)) + for _, r := range rows { + name := "" + if r.Name.Valid { + name = strings.TrimSpace(r.Name.String) + } + out = append(out, StationLookupItem{ + ID: fmt.Sprintf("%d", r.ID), + RobotID: fmt.Sprintf("%d", r.RobotID), + DataCollectorID: fmt.Sprintf("%d", r.DataCollectorID), + FactoryID: fmt.Sprintf("%d", r.FactoryID), + Name: name, + Status: r.Status, + RobotName: strings.TrimSpace(r.RobotName), + RobotSerial: strings.TrimSpace(r.RobotSerial), + CollectorName: strings.TrimSpace(r.CollectorName), + CollectorOperatorID: strings.TrimSpace(r.CollectorOperatorID), + Deleted: r.DeletedAt.Valid, + }) + } + + c.JSON(http.StatusOK, gin.H{"stations": out}) +} + // validStationStatuses contains all valid station status values var validStationStatuses = map[string]bool{ "active": true, @@ -407,6 +561,22 @@ var validStationStatuses = map[string]bool{ "offline": true, } +// parseStationPathID parses a station id from the URL path (decimal string, e.g. "12"). +func parseStationPathID(stationIDStr string) (int64, error) { + s := strings.TrimSpace(stationIDStr) + if s == "" { + return 0, fmt.Errorf("empty station id") + } + id, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0, err + } + if id <= 0 { + return 0, fmt.Errorf("station id must be positive") + } + return id, nil +} + // UpdateStation handles updating a station's status. // // @Summary Update station @@ -414,7 +584,7 @@ var validStationStatuses = map[string]bool{ // @Tags stations // @Accept json // @Produce json -// @Param id path string true "Station ID (e.g., ws_001)" +// @Param id path string true "Station ID (numeric, e.g. 1)" // @Param body body UpdateStationRequest true "Status update payload" // @Success 200 {object} StationResponse // @Failure 400 {object} map[string]string @@ -424,17 +594,9 @@ var validStationStatuses = map[string]bool{ func (h *StationHandler) UpdateStation(c *gin.Context) { stationIDStr := c.Param("id") - // Parse station ID (format: ws_XXX) - if !strings.HasPrefix(stationIDStr, "ws_") { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected ws_XXX"}) - return - } - - idStr := strings.TrimPrefix(stationIDStr, "ws_") - var stationID int64 - _, err := fmt.Sscanf(idStr, "%d", &stationID) + stationID, err := parseStationPathID(stationIDStr) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected ws_XXX"}) + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected numeric id"}) return } @@ -699,7 +861,7 @@ func (h *StationHandler) UpdateStation(c *gin.Context) { // @Tags stations // @Accept json // @Produce json -// @Param id path string true "Station ID (e.g., ws_001)" +// @Param id path string true "Station ID (numeric, e.g., 1)" // @Success 200 {object} StationResponse // @Failure 400 {object} map[string]string // @Failure 404 {object} map[string]string @@ -708,17 +870,9 @@ func (h *StationHandler) UpdateStation(c *gin.Context) { func (h *StationHandler) GetStation(c *gin.Context) { stationIDStr := c.Param("id") - // Parse station ID (format: ws_XXX) - if !strings.HasPrefix(stationIDStr, "ws_") { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected ws_XXX"}) - return - } - - idStr := strings.TrimPrefix(stationIDStr, "ws_") - var stationID int64 - _, err := fmt.Sscanf(idStr, "%d", &stationID) + stationID, err := parseStationPathID(stationIDStr) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected ws_XXX"}) + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected numeric id"}) return } @@ -751,7 +905,7 @@ func (h *StationHandler) GetStation(c *gin.Context) { } c.JSON(http.StatusOK, StationResponse{ - ID: fmt.Sprintf("ws_%d", station.ID), + ID: fmt.Sprintf("%d", station.ID), RobotID: fmt.Sprintf("%d", station.RobotID), DataCollectorID: fmt.Sprintf("%d", station.DataCollectorID), FactoryID: fmt.Sprintf("%d", station.FactoryID), @@ -770,26 +924,19 @@ func (h *StationHandler) GetStation(c *gin.Context) { // @Tags stations // @Accept json // @Produce json -// @Param id path string true "Station ID (e.g., ws_001)" +// @Param id path string true "Station ID (numeric, e.g. 1)" // @Success 204 // @Failure 400 {object} map[string]string // @Failure 404 {object} map[string]string +// @Failure 409 {object} map[string]string // @Failure 500 {object} map[string]string // @Router /stations/{id} [delete] func (h *StationHandler) DeleteStation(c *gin.Context) { stationIDStr := c.Param("id") - // Parse station ID (format: ws_XXX) - if !strings.HasPrefix(stationIDStr, "ws_") { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected ws_XXX"}) - return - } - - idStr := strings.TrimPrefix(stationIDStr, "ws_") - var stationID int64 - _, err := fmt.Sscanf(idStr, "%d", &stationID) + stationID, err := parseStationPathID(stationIDStr) if err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected ws_XXX"}) + c.JSON(http.StatusBadRequest, gin.H{"error": "invalid station ID format, expected numeric id"}) return } @@ -807,6 +954,26 @@ func (h *StationHandler) DeleteStation(c *gin.Context) { return } + var hasBlockingBatch bool + err = h.db.Get(&hasBlockingBatch, ` + SELECT EXISTS( + SELECT 1 FROM batches + WHERE workstation_id = ? AND deleted_at IS NULL + AND status IN ('pending', 'active') + ) + `, stationID) + if err != nil { + logger.Printf("[STATION] Failed to check batches for station %d: %v", stationID, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete station"}) + return + } + if hasBlockingBatch { + c.JSON(http.StatusConflict, gin.H{ + "error": "cannot delete station while batches are pending or active", + }) + return + } + now := time.Now().UTC() // Perform soft delete by setting deleted_at diff --git a/internal/api/handlers/task.go b/internal/api/handlers/task.go index 8368d18..ce8e50f 100644 --- a/internal/api/handlers/task.go +++ b/internal/api/handlers/task.go @@ -40,17 +40,37 @@ func newPublicTaskID(now time.Time, seq int) (string, error) { ), nil } +func newPublicBatchID(now time.Time, seq int) (string, error) { + // Format: batch_YYYYMMDD_HHMMSS_mmm__ + // Keep it human-readable while avoiding collisions under concurrent/bulk creates. + b := make([]byte, 4) + if _, err := rand.Read(b); err != nil { + return "", err + } + return fmt.Sprintf( + "batch_%s_%03d_%02d_%s", + now.UTC().Format("20060102_150405"), + now.UTC().Nanosecond()/1_000_000, + seq%100, + hex.EncodeToString(b), + ), nil +} + // TaskHandler handles task-related HTTP requests type TaskHandler struct { - db *sqlx.DB - hub *services.TransferHub + db *sqlx.DB + hub *services.TransferHub + recorderHub *services.RecorderHub + recorderRPCTimeout time.Duration } // NewTaskHandler creates a new TaskHandler -func NewTaskHandler(db *sqlx.DB, hub *services.TransferHub) *TaskHandler { +func NewTaskHandler(db *sqlx.DB, hub *services.TransferHub, recorderHub *services.RecorderHub, recorderRPCTimeout time.Duration) *TaskHandler { return &TaskHandler{ - db: db, - hub: hub, + db: db, + hub: hub, + recorderHub: recorderHub, + recorderRPCTimeout: recorderRPCTimeout, } } @@ -79,6 +99,7 @@ func (h *TaskHandler) RegisterRoutes(apiV1 *gin.RouterGroup) { apiV1.GET("/tasks", h.ListTasks) apiV1.GET("/tasks/:id", h.GetTask) apiV1.PUT("/tasks/:id", h.UpdateTask) + apiV1.DELETE("/tasks/:id", h.DeleteTask) apiV1.GET("/tasks/:id/config", h.GetTaskConfig) } @@ -159,22 +180,31 @@ type UpdateTaskResponse struct { var validTaskStatusTransitions = map[string]map[string]struct{}{ "pending": { - "ready": {}, - "cancelled": {}, + "ready": {}, }, "ready": { - "pending": {}, + "in_progress": {}, + "pending": {}, }, + "in_progress": { + "pending": {}, + "completed": {}, + "failed": {}, + }, + "failed": {}, + "completed": {}, + "cancelled": {}, } // ListTasks handles task listing requests with optional filtering. // // @Summary List tasks -// @Description Lists tasks with optional workstation and status filters +// @Description Lists tasks with optional workstation, status, and public task_id filters // @Tags tasks // @Produce json // @Param workstation_id query string false "Filter by workstation" // @Param status query string false "Filter by status" +// @Param task_id query string false "Filter by public task_id (exact match)" // @Param limit query int false "Max results" default(50) // @Param offset query int false "Pagination offset" default(0) // @Success 200 {object} ListTasksResponse @@ -186,6 +216,7 @@ func (h *TaskHandler) ListTasks(c *gin.Context) { workstationID := strings.TrimSpace(c.Query("workstation_id")) status := strings.TrimSpace(c.Query("status")) + publicTaskID := strings.TrimSpace(c.Query("task_id")) limit := defaultLimit if rawLimit := strings.TrimSpace(c.Query("limit")); rawLimit != "" { @@ -215,7 +246,7 @@ func (h *TaskHandler) ListTasks(c *gin.Context) { } conditions := []string{"deleted_at IS NULL"} - args := make([]interface{}, 0, 4) + args := make([]interface{}, 0, 6) if workstationID != "" { conditions = append(conditions, "CAST(workstation_id AS CHAR) = ?") @@ -227,6 +258,11 @@ func (h *TaskHandler) ListTasks(c *gin.Context) { args = append(args, status) } + if publicTaskID != "" { + conditions = append(conditions, "task_id = ?") + args = append(args, publicTaskID) + } + whereClause := strings.Join(conditions, " AND ") var total int @@ -340,7 +376,7 @@ func (h *TaskHandler) GetTask(c *gin.Context) { // UpdateTask handles task status update requests. // // @Summary Update task -// @Description Updates task status with restricted state transitions +// @Description Updates task status with restricted state transitions. Setting status to cancelled is not allowed; cancel the parent batch instead. Rejected when the parent batch status is cancelled or recalled. // @Tags tasks // @Accept json // @Produce json @@ -349,7 +385,7 @@ func (h *TaskHandler) GetTask(c *gin.Context) { // @Success 200 {object} UpdateTaskResponse // @Failure 400 {object} map[string]string // @Failure 404 {object} map[string]string -// @Failure 409 {object} map[string]string +// @Failure 409 {object} map[string]string "Conflict (invalid transition or batch is cancelled/recalled)" // @Failure 500 {object} map[string]string // @Router /tasks/{id} [put] func (h *TaskHandler) UpdateTask(c *gin.Context) { @@ -385,15 +421,28 @@ func (h *TaskHandler) UpdateTask(c *gin.Context) { return } + if req.Status == "cancelled" { + c.JSON(http.StatusBadRequest, gin.H{ + "error_msg": "setting status to 'cancelled' is not allowed via PUT; cancel the parent batch (PATCH /batches/:id) instead", + }) + return + } + if req.UpdatedBy == "" { c.JSON(http.StatusBadRequest, gin.H{"error_msg": "updated_by is required"}) return } var taskRow struct { - Status string `db:"status"` + Status string `db:"status"` + OrderID int64 `db:"order_id"` + BatchStatus sql.NullString `db:"batch_status"` } - err = h.db.Get(&taskRow, "SELECT status FROM tasks WHERE id = ? AND deleted_at IS NULL", id) + err = h.db.Get(&taskRow, ` + SELECT t.status, t.order_id, b.status AS batch_status + FROM tasks t + LEFT JOIN batches b ON b.id = t.batch_id AND b.deleted_at IS NULL + WHERE t.id = ? AND t.deleted_at IS NULL`, id) if err == sql.ErrNoRows { c.JSON(http.StatusNotFound, gin.H{"error_msg": "Task not found: " + idStr}) return @@ -404,6 +453,18 @@ func (h *TaskHandler) UpdateTask(c *gin.Context) { return } + if taskRow.BatchStatus.Valid { + bs := taskRow.BatchStatus.String + if bs == "cancelled" || bs == "recalled" { + c.JSON(http.StatusConflict, gin.H{ + "error_msg": fmt.Sprintf("cannot update task while parent batch status is %q", bs), + "batch_status": bs, + "requested_status": req.Status, + }) + return + } + } + if _, ok := validTaskStatusTransitions[taskRow.Status][req.Status]; !ok { c.JSON(http.StatusConflict, gin.H{ "error_msg": fmt.Sprintf("Cannot transition from '%s' to '%s'", taskRow.Status, req.Status), @@ -414,6 +475,12 @@ func (h *TaskHandler) UpdateTask(c *gin.Context) { } now := time.Now().UTC() + + // Fetch batch_id for post-update batch state advancement + var batchIDForAdvance int64 + _ = h.db.Get(&batchIDForAdvance, "SELECT batch_id FROM tasks WHERE id = ? AND deleted_at IS NULL LIMIT 1", id) + orderIDForAdvance := taskRow.OrderID + result, err := h.db.Exec( "UPDATE tasks SET status = ?, updated_at = ?, ready_at = CASE WHEN ? = 'ready' THEN ? ELSE ready_at END WHERE id = ? AND status = ? AND deleted_at IS NULL", req.Status, @@ -445,6 +512,15 @@ func (h *TaskHandler) UpdateTask(c *gin.Context) { return } + // After completed/failed, try to advance the batch status (pending->active, active->completed). + if _, ok := batchAdvanceTriggerStatuses[req.Status]; ok && batchIDForAdvance > 0 { + go tryAdvanceBatchStatus(h.db, batchIDForAdvance) + } + // After completed, try to advance the order status (created->in_progress, in_progress->completed). + if req.Status == "completed" && orderIDForAdvance > 0 { + go tryAdvanceOrderStatus(h.db, orderIDForAdvance, h.recorderHub, h.recorderRPCTimeout) + } + c.JSON(http.StatusOK, UpdateTaskResponse{ ID: idStr, Status: req.Status, @@ -452,6 +528,58 @@ func (h *TaskHandler) UpdateTask(c *gin.Context) { }) } +// DeleteTask handles soft deletion of a task. +// Tasks with status "completed" cannot be deleted. +// +// @Summary Delete task +// @Description Soft deletes a task. Tasks with status 'completed' cannot be deleted. +// @Tags tasks +// @Produce json +// @Param id path string true "Task ID" +// @Success 204 +// @Failure 400 {object} map[string]string +// @Failure 404 {object} map[string]string +// @Failure 409 {object} map[string]string +// @Failure 500 {object} map[string]string +// @Router /tasks/{id} [delete] +func (h *TaskHandler) DeleteTask(c *gin.Context) { + idStr := strings.TrimSpace(c.Param("id")) + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil || id <= 0 { + c.JSON(http.StatusBadRequest, gin.H{"error_msg": "invalid task id"}) + return + } + + var taskStatus string + if err := h.db.Get(&taskStatus, "SELECT status FROM tasks WHERE id = ? AND deleted_at IS NULL LIMIT 1", id); err != nil { + if err == sql.ErrNoRows { + c.JSON(http.StatusNotFound, gin.H{"error_msg": "task not found"}) + return + } + logger.Printf("[TASK] Failed to query task status: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error_msg": "failed to delete task"}) + return + } + + // Completed tasks cannot be deleted (they form part of the audit trail) + if taskStatus == "completed" { + c.JSON(http.StatusConflict, gin.H{"error_msg": "cannot delete a completed task; completed tasks form part of the audit trail"}) + return + } + + now := time.Now().UTC() + if _, err := h.db.Exec( + "UPDATE tasks SET deleted_at = ?, updated_at = ? WHERE id = ? AND deleted_at IS NULL", + now, now, id, + ); err != nil { + logger.Printf("[TASK] Failed to delete task: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error_msg": "failed to delete task"}) + return + } + + c.Status(http.StatusNoContent) +} + // CreateTaskResponse represents the response body for creating a task. type CreateTaskResponse struct { ID string `json:"id"` @@ -598,8 +726,8 @@ func (h *TaskHandler) CreateTask(c *gin.Context) { // Ensure a batch exists for (order_id, workstation_id). Prefer active/pending, otherwise create. type batchRow struct { - ID int64 `db:"id"` - Name string `db:"name"` + ID int64 `db:"id"` + Name sql.NullString `db:"name"` } var batch batchRow batchQuery := ` @@ -615,8 +743,12 @@ func (h *TaskHandler) CreateTask(c *gin.Context) { return } if err == sql.ErrNoRows { - batchIDStr := now.Format("batch_20060102_150405") - batchName := fmt.Sprintf("Batch %s (order=%d ws=%d)", batchIDStr, req.OrderID, req.WorkstationID) + batchIDStr, err := newPublicBatchID(now, 0) + if err != nil { + logger.Printf("[TASK] Failed to generate batch_id: %v", err) + c.JSON(http.StatusInternalServerError, gin.H{"error_msg": "failed to create task"}) + return + } res, err := tx.Exec( `INSERT INTO batches ( batch_id, @@ -626,11 +758,10 @@ func (h *TaskHandler) CreateTask(c *gin.Context) { status, created_at, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?)`, + ) VALUES (?, ?, ?, NULL, ?, ?, ?)`, batchIDStr, req.OrderID, req.WorkstationID, - batchName, "pending", now, now, @@ -647,7 +778,7 @@ func (h *TaskHandler) CreateTask(c *gin.Context) { return } batch.ID = newID - batch.Name = batchName + batch.Name = sql.NullString{} } // Denormalized filtering fields @@ -662,6 +793,11 @@ func (h *TaskHandler) CreateTask(c *gin.Context) { return } + var taskBatchNameArg interface{} + if batch.Name.Valid { + taskBatchNameArg = batch.Name.String + } + created := make([]CreateTaskResponse, 0, quantity) for i := 0; i < quantity; i++ { taskID, err := newPublicTaskID(now, i) @@ -696,7 +832,7 @@ func (h *TaskHandler) CreateTask(c *gin.Context) { req.WorkstationID, subscene.SceneID, req.SubsceneID, - batch.Name, + taskBatchNameArg, subscene.Scene, subscene.Name, ws.FactoryID, @@ -938,7 +1074,7 @@ func (h *TaskHandler) GetTaskConfig(c *gin.Context) { SceneName sql.NullString `db:"scene_name"` SubsceneName sql.NullString `db:"subscene_name"` Layout sql.NullString `db:"initial_scene_layout"` - SOPName sql.NullString `db:"sop_name"` + SOPSlug sql.NullString `db:"sop_slug"` SkillSequence sql.NullString `db:"skill_sequence"` ROSTopics sql.NullString `db:"ros_topics"` } @@ -957,7 +1093,7 @@ func (h *TaskHandler) GetTaskConfig(c *gin.Context) { COALESCE(t.scene_name, '') AS scene_name, COALESCE(t.subscene_name, '') AS subscene_name, COALESCE(t.initial_scene_layout, '') AS initial_scene_layout, - s.name AS sop_name, + s.slug AS sop_slug, COALESCE(s.skill_sequence, '[]') AS skill_sequence, COALESCE(rt.ros_topics, '[]') AS ros_topics FROM tasks t @@ -1011,21 +1147,21 @@ func (h *TaskHandler) GetTaskConfig(c *gin.Context) { c.JSON(http.StatusConflict, gin.H{"error_msg": fmt.Sprintf("Robot %d has no robot_type ros_topics", row.RobotID.Int64)}) return } - if !row.SOPName.Valid || strings.TrimSpace(row.SOPName.String) == "" { + if !row.SOPSlug.Valid || strings.TrimSpace(row.SOPSlug.String) == "" { c.JSON(http.StatusConflict, gin.H{"error_msg": "Task sop_id not found"}) return } - // Resolve skill ids (from sop.skill_sequence) to skill names, preserving order. + // Resolve skill ids (from sop.skill_sequence) to skill slugs, preserving order. skillIDs := parseJSONArray(row.SkillSequence.String) skills := make([]string, 0, len(skillIDs)) if len(skillIDs) > 0 { type skillRow struct { ID string `db:"id"` - Name string `db:"name"` + Slug string `db:"slug"` } query, args, err := sqlx.In( - `SELECT CAST(id AS CHAR) AS id, name FROM skills WHERE deleted_at IS NULL AND id IN (?)`, + `SELECT CAST(id AS CHAR) AS id, slug FROM skills WHERE deleted_at IS NULL AND id IN (?)`, skillIDs, ) if err != nil { @@ -1040,13 +1176,13 @@ func (h *TaskHandler) GetTaskConfig(c *gin.Context) { c.JSON(http.StatusInternalServerError, gin.H{"error_msg": "Failed to query skills"}) return } - nameByID := make(map[string]string, len(rows)) + slugByID := make(map[string]string, len(rows)) for _, r := range rows { - nameByID[strings.TrimSpace(r.ID)] = strings.TrimSpace(r.Name) + slugByID[strings.TrimSpace(r.ID)] = strings.TrimSpace(r.Slug) } for _, id := range skillIDs { - if name, ok := nameByID[strings.TrimSpace(id)]; ok && name != "" { - skills = append(skills, name) + if slug, ok := slugByID[strings.TrimSpace(id)]; ok && slug != "" { + skills = append(skills, slug) } } } @@ -1062,7 +1198,7 @@ func (h *TaskHandler) GetTaskConfig(c *gin.Context) { Subscene: strings.TrimSpace(row.SubsceneName.String), InitialSceneLayout: strings.TrimSpace(row.Layout.String), Skills: skills, - SOPID: strings.TrimSpace(row.SOPName.String), + SOPID: strings.TrimSpace(row.SOPSlug.String), Topics: parseJSONArray(row.ROSTopics.String), StartCallbackURL: "http://keystone.factory.internal/api/v1/callbacks/start", FinishCallbackURL: "http://keystone.factory.internal/api/v1/callbacks/finish", diff --git a/internal/api/handlers/transfer.go b/internal/api/handlers/transfer.go index aff563f..f151d7d 100644 --- a/internal/api/handlers/transfer.go +++ b/internal/api/handlers/transfer.go @@ -14,6 +14,7 @@ import ( "net" "net/http" "strconv" + "strings" "sync" "time" @@ -38,18 +39,25 @@ type TransferHandler struct { bucket string factoryID string client *http.Client + + // recorderHub is used on transfer disconnect to notify recorder (clear/cancel) before reverting tasks. + recorderHub *services.RecorderHub + recorderRPCTimeout time.Duration } // NewTransferHandler creates a new TransferHandler. // db and s3Client may be nil; Verified ACK will be skipped if either is absent. -func NewTransferHandler(hub *services.TransferHub, cfg *config.TransferConfig, db *sqlx.DB, s3Client *s3.Client, bucket string, factoryID string) *TransferHandler { +// recorderHub may be nil (disables recorder RPC on transfer disconnect). +func NewTransferHandler(hub *services.TransferHub, cfg *config.TransferConfig, db *sqlx.DB, s3Client *s3.Client, bucket string, factoryID string, recorderHub *services.RecorderHub, recorderRPCTimeout time.Duration) *TransferHandler { return &TransferHandler{ - hub: hub, - cfg: cfg, - db: db, - s3: s3Client, - bucket: bucket, - factoryID: factoryID, + hub: hub, + cfg: cfg, + db: db, + s3: s3Client, + bucket: bucket, + factoryID: factoryID, + recorderHub: recorderHub, + recorderRPCTimeout: recorderRPCTimeout, client: &http.Client{ Timeout: 10 * time.Second, }, @@ -134,6 +142,7 @@ func (h *TransferHandler) HandleWebSocket(w http.ResponseWriter, r *http.Request dc := h.hub.NewTransferConn(conn, deviceID, remoteIP) h.hub.Connect(deviceID, dc) defer h.hub.Disconnect(deviceID) + defer revertRunnableTasksOnDeviceDisconnect(h.db, deviceID, h.recorderHub, h.recorderRPCTimeout, true) // #nosec G706 -- Set aside for now logger.Printf("[TRANSFER] Transfer %s connected from %s", deviceID, remoteIP) @@ -177,7 +186,7 @@ func (h *TransferHandler) handleMessage(ctx context.Context, dc *services.Transf case "upload_complete": h.onUploadComplete(ctx, dc, msg) case "upload_failed": - h.onUploadFailed(dc, msg) + h.onUploadFailed(ctx, dc, msg) case "upload_not_found": h.onUploadNotFound(dc, msg) case "status": @@ -319,6 +328,24 @@ func (h *TransferHandler) onUploadComplete(ctx context.Context, dc *services.Tra return } + // Resolve batch id for post-commit batch state advancement. + // Best-effort: failure here should not block upload acknowledgement. + var batchIDForAdvance int64 + if err := tx.QueryRowContext(ctx, "SELECT batch_id FROM tasks WHERE id = ? AND deleted_at IS NULL", taskPK).Scan(&batchIDForAdvance); err != nil { + // #nosec G706 -- Set aside for now + logger.Printf("[TRANSFER] Device %s: failed to resolve batch id for task=%s (task_pk=%d): %v", dc.DeviceID, taskID, taskPK, err) + batchIDForAdvance = 0 + } + + // Resolve order id for post-commit order state advancement. + // Best-effort: failure here should not block upload acknowledgement. + var orderIDForAdvance int64 + if err := tx.QueryRowContext(ctx, "SELECT order_id FROM tasks WHERE id = ? AND deleted_at IS NULL", taskPK).Scan(&orderIDForAdvance); err != nil { + // #nosec G706 -- Set aside for now + logger.Printf("[TRANSFER] Device %s: failed to resolve order id for task=%s (task_pk=%d): %v", dc.DeviceID, taskID, taskPK, err) + orderIDForAdvance = 0 + } + // Check if mcap_path and sidecar_path already exist in database var count int err = tx.QueryRowContext(ctx, @@ -441,21 +468,6 @@ func (h *TransferHandler) onUploadComplete(ctx context.Context, dc *services.Tra } } - // Episode is confirmed (inserted or already existed). Mark task as completed. - now := time.Now().UTC() - if _, dbErr := tx.ExecContext(ctx, ` - UPDATE tasks - SET - status = 'completed', - completed_at = CASE WHEN completed_at IS NULL THEN ? ELSE completed_at END, - updated_at = ? - WHERE id = ? AND deleted_at IS NULL - `, now, now, taskPK); dbErr != nil { - // #nosec G706 -- Set aside for now - logger.Printf("[TRANSFER] Device %s: DB update failed for task=%s: %v", dc.DeviceID, taskID, dbErr) - return - } - // Commit transaction if err := tx.Commit(); err != nil { // #nosec G706 -- Set aside for now @@ -479,10 +491,33 @@ func (h *TransferHandler) onUploadComplete(ctx context.Context, dc *services.Tra dc.RecordEvent("outbound", ackMsg) // #nosec G706 -- Set aside for now logger.Printf("[TRANSFER] Device %s: upload_ack sent for task=%s", dc.DeviceID, taskID) + + // After upload_ack is sent, mark task as completed (in_progress -> completed). + // Best-effort: do not affect the already-sent acknowledgement. + now := time.Now().UTC() + if _, err := h.db.ExecContext(ctx, ` + UPDATE tasks + SET + status = 'completed', + completed_at = CASE WHEN completed_at IS NULL THEN ? ELSE completed_at END, + updated_at = ? + WHERE id = ? AND status = 'in_progress' AND deleted_at IS NULL + `, now, now, taskPK); err != nil { + // #nosec G706 -- Set aside for now + logger.Printf("[TRANSFER] Device %s: failed to mark task in_progress->completed after upload_ack: task=%s err=%v", dc.DeviceID, taskID, err) + } else { + if batchIDForAdvance > 0 { + // Must run after the task row is terminal: tryAdvanceBatchStatus counts tasks in DB. + go tryAdvanceBatchStatus(h.db, batchIDForAdvance) + } + if orderIDForAdvance > 0 { + go tryAdvanceOrderStatus(h.db, orderIDForAdvance, h.recorderHub, h.recorderRPCTimeout) + } + } } -// onUploadFailed handles "upload_failed" message -func (h *TransferHandler) onUploadFailed(dc *services.TransferConn, msg map[string]interface{}) { +// onUploadFailed handles "upload_failed" message and marks the task as failed. +func (h *TransferHandler) onUploadFailed(ctx context.Context, dc *services.TransferConn, msg map[string]interface{}) { data, _ := msg["data"].(map[string]interface{}) if data == nil { return @@ -510,6 +545,147 @@ func (h *TransferHandler) onUploadFailed(dc *services.TransferConn, msg map[stri if h.s3 != nil { logger.Printf("[TRANSFER] Keystone configured bucket: %s", h.s3.Bucket()) } + + // Mark task as failed when upload_failed is received and task is in_progress. + if h.db == nil || taskID == "" { + return + } + now := time.Now().UTC() + result, err := h.db.ExecContext(ctx, ` + UPDATE tasks + SET + status = 'failed', + completed_at = CASE WHEN completed_at IS NULL THEN ? ELSE completed_at END, + updated_at = ? + WHERE task_id = ? AND status = 'in_progress' AND deleted_at IS NULL + `, now, now, taskID) + if err != nil { + // #nosec G706 -- Set aside for now + logger.Printf("[TRANSFER] Device %s: failed to mark task failed on upload_failed: task=%s err=%v", dc.DeviceID, taskID, err) + return + } + if rows, _ := result.RowsAffected(); rows > 0 { + // #nosec G706 -- Set aside for now + logger.Printf("[TRANSFER] Device %s: task=%s marked as failed due to upload_failed", dc.DeviceID, taskID) + // Trigger batch status advancement since the task reached a terminal state. + var batchID int64 + if err := h.db.QueryRowContext(ctx, + "SELECT batch_id FROM tasks WHERE task_id = ? AND deleted_at IS NULL", taskID, + ).Scan(&batchID); err == nil && batchID > 0 { + go tryAdvanceBatchStatus(h.db, batchID) + } + } +} + +// revertRunnableTasksOnDeviceDisconnect reverts runnable tasks for the given device back to pending +// when a device WebSocket connection is lost. +// +// When notifyRecorder is true (transfer disconnect), it best-effort sends recorder RPCs first: +// - ready -> clear +// - in_progress -> cancel (with task_id) +// +// This prevents tasks from being stuck in ready/in_progress when the edge is disconnected, and +// clears runtime timestamps so the task can be re-configured / retried. +// +// Uses a background context because the request context is already cancelled at this point +// (deferred after read loop exits). It is a package-level function so both TransferHandler and +// RecorderHandler can call it. +func revertRunnableTasksOnDeviceDisconnect(db *sqlx.DB, deviceID string, recorderHub *services.RecorderHub, rpcTimeout time.Duration, notifyRecorder bool) { + if db == nil || deviceID == "" { + return + } + ctx := context.Background() + now := time.Now().UTC() + + timeout := rpcTimeout + if timeout <= 0 { + timeout = 5 * time.Second + } + + // Resolve all runnable tasks for the disconnected device via the + // robots → workstations → tasks join chain. + rows, err := db.QueryContext(ctx, ` + SELECT t.id, t.task_id, t.batch_id, t.status + FROM tasks t + JOIN workstations ws ON ws.id = t.workstation_id AND ws.deleted_at IS NULL + JOIN robots r ON r.id = ws.robot_id AND r.deleted_at IS NULL + WHERE r.device_id = ? + AND t.status IN ('ready', 'in_progress') + AND t.deleted_at IS NULL + `, deviceID) + if err != nil { + // #nosec G706 -- Set aside for now + logger.Printf("[DEVICE] Device %s: failed to query runnable tasks on disconnect: %v", deviceID, err) + return + } + defer func() { + if cerr := rows.Close(); cerr != nil { + logger.Printf("[DEVICE] Device %s: close rows after disconnect task query: %v", deviceID, cerr) + } + }() + + type taskRef struct { + id int64 + taskID string + batchID int64 + status string + } + var toRevert []taskRef + for rows.Next() { + var ref taskRef + if err := rows.Scan(&ref.id, &ref.taskID, &ref.batchID, &ref.status); err != nil { + logger.Printf("[DEVICE] Device %s: scan error during disconnect task query: %v", deviceID, err) + continue + } + toRevert = append(toRevert, ref) + } + if err := rows.Err(); err != nil { + logger.Printf("[DEVICE] Device %s: rows error during disconnect task query: %v", deviceID, err) + } + + if notifyRecorder && recorderHub != nil { + rpcCtx := context.Background() + for _, ref := range toRevert { + tid := strings.TrimSpace(ref.taskID) + if tid == "" { + continue + } + st := strings.TrimSpace(ref.status) + switch st { + case "ready": + if _, err := recorderHub.SendRPC(rpcCtx, deviceID, "clear", nil, timeout); err != nil { + logger.Printf("[DEVICE] Device %s: recorder clear after transfer disconnect failed (task=%s): %v", deviceID, tid, err) + } + case "in_progress": + if _, err := recorderHub.SendRPC(rpcCtx, deviceID, "cancel", map[string]interface{}{"task_id": tid}, timeout); err != nil { + logger.Printf("[DEVICE] Device %s: recorder cancel after transfer disconnect failed (task=%s): %v", deviceID, tid, err) + } + } + } + } + + for _, ref := range toRevert { + result, err := db.ExecContext(ctx, ` + UPDATE tasks + SET + status = 'pending', + ready_at = NULL, + started_at = NULL, + completed_at = NULL, + error_message = NULL, + updated_at = ? + WHERE id = ? AND status IN ('ready', 'in_progress') AND deleted_at IS NULL + `, now, ref.id) + if err != nil { + // #nosec G706 -- Set aside for now + logger.Printf("[DEVICE] Device %s: failed to revert task=%s to pending on disconnect: %v", deviceID, ref.taskID, err) + continue + } + if affected, _ := result.RowsAffected(); affected > 0 { + // #nosec G706 -- Set aside for now + logger.Printf("[DEVICE] Device %s: task=%s reverted to pending due to device disconnect", deviceID, ref.taskID) + } + } } // onUploadNotFound handles "upload_not_found" message @@ -734,6 +910,33 @@ func (h *TransferHandler) ManualACK(c *gin.Context) { c.JSON(http.StatusNotFound, gin.H{"error": err.Error()}) return } + + // After upload_ack is sent, mark task as completed (in_progress -> completed). + // Best-effort: do not fail the acknowledgement response. + if h.db != nil { + now := time.Now().UTC() + if _, err := h.db.Exec( + `UPDATE tasks + SET + status = 'completed', + completed_at = CASE WHEN completed_at IS NULL THEN ? ELSE completed_at END, + updated_at = ? + WHERE task_id = ? AND status = 'in_progress' AND deleted_at IS NULL`, + now, now, body.TaskID, + ); err != nil { + // #nosec G706 -- Set aside for now + logger.Printf("[TRANSFER] Device %s: failed to mark task in_progress->completed after manual upload_ack: task=%s err=%v", deviceID, body.TaskID, err) + } else { + var batchID int64 + if err := h.db.Get(&batchID, "SELECT batch_id FROM tasks WHERE task_id = ? AND deleted_at IS NULL LIMIT 1", body.TaskID); err == nil && batchID > 0 { + go tryAdvanceBatchStatus(h.db, batchID) + } + var orderID int64 + if err := h.db.Get(&orderID, "SELECT order_id FROM tasks WHERE task_id = ? AND deleted_at IS NULL LIMIT 1", body.TaskID); err == nil && orderID > 0 { + go tryAdvanceOrderStatus(h.db, orderID, h.recorderHub, h.recorderRPCTimeout) + } + } + } c.JSON(http.StatusOK, gin.H{"status": "sent"}) } diff --git a/internal/server/server.go b/internal/server/server.go index 7ac9dc6..6e6b593 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -74,19 +74,20 @@ func New(cfg *config.Config, db *sqlx.DB, s3Client *s3.Client) *Server { authHandler = handlers.NewAuthHandler(db, &cfg.Auth) } - // Create TransferHub and TransferHandler for Transfer Service - transferHub := services.NewTransferHub(cfg.AxonTransfer.MaxEvents) - transferHandler := handlers.NewTransferHandler(transferHub, &cfg.AxonTransfer, db, s3Client, cfg.Storage.Bucket, cfg.AxonTransfer.FactoryID) - - // Create recorderHub and RecorderHandler for Axon Recorder RPC + // Recorder hub must exist before TransferHandler (transfer disconnect notifies recorder via RPC). recorderHub := services.NewRecorderHub() recorderHandler := handlers.NewRecorderHandler(recorderHub, &cfg.AxonRecorder, db) + recorderRPCTimeout := time.Duration(cfg.AxonRecorder.ResponseTimeout) * time.Second + + // Create TransferHub and TransferHandler for Transfer Service + transferHub := services.NewTransferHub(cfg.AxonTransfer.MaxEvents) + transferHandler := handlers.NewTransferHandler(transferHub, &cfg.AxonTransfer, db, s3Client, cfg.Storage.Bucket, cfg.AxonTransfer.FactoryID, recorderHub, recorderRPCTimeout) // Create EpisodeHandler for episode listing episodeHandler := handlers.NewEpisodeHandler(db) // Create TaskHandler for task configuration - taskHandler := handlers.NewTaskHandler(db, transferHub) + taskHandler := handlers.NewTaskHandler(db, transferHub, recorderHub, recorderRPCTimeout) // Create database-dependent handlers only when DB is available var ( @@ -105,7 +106,7 @@ func New(cfg *config.Config, db *sqlx.DB, s3Client *s3.Client) *Server { orderHandler *handlers.OrderHandler ) if db != nil { - batchHandler = handlers.NewBatchHandler(db) + batchHandler = handlers.NewBatchHandler(db, recorderHub, recorderRPCTimeout) robotTypeHandler = handlers.NewRobotTypeHandler(db) robotHandler = handlers.NewRobotHandler(db, recorderHub, transferHub) factoryHandler = handlers.NewFactoryHandler(db) @@ -117,7 +118,7 @@ func New(cfg *config.Config, db *sqlx.DB, s3Client *s3.Client) *Server { sopHandler = handlers.NewSOPHandler(db) sceneHandler = handlers.NewSceneHandler(db) subsceneHandler = handlers.NewSubsceneHandler(db) - orderHandler = handlers.NewOrderHandler(db) + orderHandler = handlers.NewOrderHandler(db, recorderHub, recorderRPCTimeout) } s := &Server{ diff --git a/internal/services/recorder_hub.go b/internal/services/recorder_hub.go index 1251b6c..d0059dc 100644 --- a/internal/services/recorder_hub.go +++ b/internal/services/recorder_hub.go @@ -117,7 +117,11 @@ func (h *RecorderHub) NewRecorderConn(conn *websocket.Conn, deviceID, remoteIP s RemoteIP: remoteIP, ConnectedAt: time.Now(), LastSeenAt: time.Now(), - Pending: make(map[string]*PendingRPC), + State: RecorderState{ + CurrentState: "unknown", + UpdatedAt: time.Now(), + }, + Pending: make(map[string]*PendingRPC), } } diff --git a/internal/storage/database/migrations/000001_initial_schema.up.sql b/internal/storage/database/migrations/000001_initial_schema.up.sql index 94558da..1a4be7c 100644 --- a/internal/storage/database/migrations/000001_initial_schema.up.sql +++ b/internal/storage/database/migrations/000001_initial_schema.up.sql @@ -77,7 +77,6 @@ CREATE TABLE IF NOT EXISTS subscenes ( CREATE TABLE IF NOT EXISTS skills ( id BIGINT AUTO_INCREMENT PRIMARY KEY, slug VARCHAR(100) NOT NULL, - name VARCHAR(255) NOT NULL, description TEXT, version VARCHAR(20) DEFAULT '1.0.0', metadata JSON DEFAULT NULL, @@ -100,7 +99,6 @@ CREATE TABLE IF NOT EXISTS subscene_skills ( CREATE TABLE IF NOT EXISTS sops ( id BIGINT AUTO_INCREMENT PRIMARY KEY, - name VARCHAR(255) NOT NULL, slug VARCHAR(100) NOT NULL, description TEXT, skill_sequence JSON NOT NULL, @@ -242,10 +240,10 @@ CREATE TABLE IF NOT EXISTS orders ( CREATE TABLE IF NOT EXISTS batches ( id BIGINT AUTO_INCREMENT PRIMARY KEY, - batch_id VARCHAR(100) NOT NULL COMMENT 'Human-readable batch ID', + batch_id VARCHAR(100) NOT NULL, order_id BIGINT NOT NULL, workstation_id BIGINT NOT NULL, - name VARCHAR(255) NOT NULL, + name VARCHAR(255) NULL COMMENT 'Human-readable name', notes TEXT, status ENUM('pending', 'active', 'completed', 'cancelled', 'recalled') DEFAULT 'pending', episode_count INT DEFAULT 0, @@ -255,7 +253,7 @@ CREATE TABLE IF NOT EXISTS batches ( created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, deleted_at TIMESTAMP NULL, - _name_unique VARCHAR(600) GENERATED ALWAYS AS (CONCAT(IFNULL(order_id, ''), '|', IFNULL(name, ''), '|', IFNULL(deleted_at, ''))) STORED, + _name_unique VARCHAR(600) GENERATED ALWAYS AS (CONCAT(IFNULL(order_id, ''), '|', IFNULL(batch_id, ''), '|', IFNULL(deleted_at, ''))) STORED, UNIQUE INDEX idx_name_del (_name_unique), INDEX idx_batch_id (batch_id), INDEX idx_order (order_id), @@ -473,12 +471,12 @@ INSERT INTO factories (organization_id, name, slug, location, timezone, settings (1, 'San Francisco Factory', 'factory-sf', 'San Francisco, USA', 'America/Los_Angeles', '{}') ON DUPLICATE KEY UPDATE name=VALUES(name), location=VALUES(location), timezone=VALUES(timezone), settings=VALUES(settings); -INSERT INTO skills (slug, name, description) VALUES - ('pick', 'Pick', 'Grasp and lift an object'), - ('place', 'Place', 'Put an object at a target location'), - ('drop', 'Drop', 'Release an object without precision'), - ('push', 'Push', 'Move an object without grasping'), - ('wipe', 'Wipe', 'Clean a surface with wiping motion'), - ('navigate', 'Navigate', 'Move from one location to another'), - ('pour', 'Pour', 'Transfer liquid between containers') -ON DUPLICATE KEY UPDATE name=VALUES(name); +INSERT INTO skills (slug, description) VALUES + ('pick', 'Grasp and lift an object'), + ('place', 'Put an object at a target location'), + ('drop', 'Release an object without precision'), + ('push', 'Move an object without grasping'), + ('wipe', 'Clean a surface with wiping motion'), + ('navigate', 'Move from one location to another'), + ('pour', 'Transfer liquid between containers') +ON DUPLICATE KEY UPDATE description=VALUES(description);