From 236089f5c89804007b3a9dfd9879d07a7d961d0e Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 17:43:41 +0000
Subject: [PATCH 01/18] Add long-running agents anomaly monitoring end-to-end

Co-authored-by: leor <leor@fortresslabs.com>
---
 docs/agents_monitoring_handoff.md          | 128 +++
 electron-ui/index.html                     |  93 +++
 electron-ui/main.js                        | 316 ++++++-
 electron-ui/preload.js                     |  29 +
 electron-ui/renderer.js                    | 379 +++++++++
 electron-ui/styles.css                     | 275 +++++++
 scripts/anomaly_monitor.py                 | 912 +++++++++++++++++++++
 scripts/anomaly_rules.py                   | 217 +++++
 scripts/fixtures/anomaly_replay_cases.json |  32 +
 scripts/graph_api.py                       |  11 +-
 scripts/ignition_api_client.py             |  69 +-
 scripts/neo4j_ontology.py                  | 148 +++-
 12 files changed, 2565 insertions(+), 44 deletions(-)
 create mode 100644 docs/agents_monitoring_handoff.md
 create mode 100644 scripts/anomaly_monitor.py
 create mode 100644 scripts/anomaly_rules.py
 create mode 100644 scripts/fixtures/anomaly_replay_cases.json

diff --git a/docs/agents_monitoring_handoff.md b/docs/agents_monitoring_handoff.md
new file mode 100644
index 0000000..a5368fb
--- /dev/null
+++ b/docs/agents_monitoring_handoff.md
@@ -0,0 +1,128 @@
+# Long-Running Agents Monitoring Handoff
+
+## Summary
+
+This handoff documents the implemented V1 monitoring capability:
+
+- New **Agents** tab in Electron UI for starting/stopping long-running monitoring.
+- Continuous Python worker (`anomaly_monitor.py`) with:
+  - deterministic historical-deviation scoring,
+  - quality/staleness gates,
+  - optional LLM triage,
+  - Neo4j persistence for `AgentRun` and `AnomalyEvent`,
+  - event dedup and retention cleanup.
+- IPC surface and stream channels from Electron main to renderer:
+  - `agents:start`, `agents:status`, `agents:stop`,
+  - `agents:list-events`, `agents:get-event`, `agents:ack-event`, `agents:cleanup`,
+  - channels: `agent-status`, `agent-event`, `agent-error`, `agent-complete`.
+- Graph drill-down integration with anomaly node support.
+
+## Files Changed
+
+### Electron
+
+- `electron-ui/index.html`
+  - Added **Agents** nav button.
+  - Added `tab-agents` page shell with controls, filters, feed, and detail panel.
+  - Added graph filter option for anomaly layer.
+
+- `electron-ui/styles.css`
+  - Added Agents tab styles (`agents-*`, `status-chip`, feed cards, detail panel).
+
+- `electron-ui/preload.js`
+  - Added `agents*` API bridge methods.
+  - Added event listeners for `agent-status/event/error/complete`.
+
+- `electron-ui/main.js`
+  - Added background agent runtime management (`activeAgentRun`).
+  - Added stream parser for monitor stdout markers (`[AGENT_STATUS]`, etc.).
+  - Added full `agents:*` IPC handlers.
+  - Added graceful stop handling on app shutdown.
+
+- `electron-ui/renderer.js`
+  - Added Agents tab state management.
+  - Added start/stop/refresh/cleanup/ack handlers.
+  - Added realtime feed updates from agent channels.
+  - Added event detail rendering and graph drill-down action.
+
+### Python backend
+
+- `scripts/anomaly_rules.py` (new)
+  - Deterministic scoring logic (`z`, `MAD`, rate, drift trend, flatline).
+  - Quality/staleness helpers and dedup key generator.
+
+- `scripts/anomaly_monitor.py` (new)
+  - Long-running monitoring worker with CLI subcommands:
+    - `run`, `status`, `list-events`, `get-event`, `ack-event`, `cleanup`, `replay-fixtures`.
+  - Neo4j persistence + dedup + retention cleanup.
+  - Optional LLM triage with structured JSON fallback.
+
+- `scripts/ignition_api_client.py`
+  - Added `query_tag_history(...)` and local-time-to-UTC conversion helper.
+
+- `scripts/neo4j_ontology.py`
+  - Added monitoring schema constraints/indexes for `AgentRun` / `AnomalyEvent`.
+  - Added helper methods: list/get/cleanup anomaly events.
+  - Added CLI commands:
+    - `init-agent-schema`
+    - `list-anomaly-events`
+    - `get-anomaly-event`
+    - `cleanup-anomaly-events`
+
+- `scripts/graph_api.py`
+  - Added node groups/colors for `AgentRun` and `AnomalyEvent`.
+  - Extended neighbor center-node lookup to support `event_id` and `run_id`.
+
+### Fixtures
+
+- `scripts/fixtures/anomaly_replay_cases.json` (new)
+  - Deterministic replay cases:
+    - normal baseline,
+    - sudden spike,
+    - slow drift,
+    - flatline/stuck.
+
+## Runtime Commands
+
+### Deterministic replay validation
+
+```bash
+python3 scripts/anomaly_monitor.py replay-fixtures --fixture-file scripts/fixtures/anomaly_replay_cases.json
+```
+
+### Monitor worker manual run
+
+```bash
+python3 scripts/anomaly_monitor.py run --run-id demo-run --config-json '{"pollIntervalMs":15000}'
+```
+
+### Event operations
+
+```bash
+python3 scripts/anomaly_monitor.py list-events --limit 50
+python3 scripts/anomaly_monitor.py get-event --event-id <event_id>
+python3 scripts/anomaly_monitor.py ack-event --event-id <event_id> --note "Reviewed by operator"
+python3 scripts/anomaly_monitor.py cleanup --retention-days 14
+```
+
+## Known Environment Requirements
+
+The Python environment must include packages from `requirements.txt`:
+
+- `neo4j`
+- `anthropic` (for LLM triage; deterministic fallback works without API key)
+- `python-dotenv`
+- `requests`
+
+If `ANTHROPIC_API_KEY` is absent, triage automatically falls back to deterministic explanations.
+
+## Validation Status
+
+- Syntax checks passed:
+  - Python (`py_compile`) for all modified scripts.
+  - JS syntax checks (`node --check`) for Electron files.
+- Fixture replay passed:
+  - `4/4` deterministic scenarios.
+
+Live end-to-end validation against actual Ignition + Neo4j + Anthropic requires connected runtime services.
+
diff --git a/electron-ui/index.html b/electron-ui/index.html
index 03b808e..7e5e8a7 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -36,6 +36,13 @@
           </svg>
           <span class="nav-label">Assist</span>
         </button>
+        <button class="nav-btn" data-tab="agents" title="Long-Running Agents">
+          <svg class="nav-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+            <path d="M4 5h7v7H4zM13 5h7v7h-7zM4 14h7v7H4z"/>
+            <path d="M16.5 14.5l1.5 1.5 3-3"/>
+          </svg>
+          <span class="nav-label">Agents</span>
+        </button>
         <button class="nav-btn" data-tab="database" title="Database">
           <svg class="nav-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
             <ellipse cx="12" cy="5" rx="9" ry="3"/>
@@ -532,6 +539,91 @@ <h2>Troubleshooting Assistant</h2>
         </div>
       </section>
 
+      <!-- Agents Tab -->
+      <section class="tab-content" id="tab-agents">
+        <header class="tab-header">
+          <h2>Long-Running Agents</h2>
+          <p>Continuously monitor live process data and triage anomalies with ontology context</p>
+        </header>
+
+        <div class="agents-topbar">
+          <div class="agents-run-controls">
+            <button class="btn btn-primary" id="btn-agents-start">Start Monitoring</button>
+            <button class="btn btn-secondary" id="btn-agents-stop" disabled>Stop</button>
+            <button class="btn btn-ghost" id="btn-agents-refresh">Refresh Events</button>
+            <button class="btn btn-ghost" id="btn-agents-cleanup">Cleanup Old</button>
+          </div>
+          <div class="agents-run-status">
+            <span class="status-chip" id="agents-status-chip">Idle</span>
+            <span class="agents-status-text" id="agents-status-text">No active run</span>
+          </div>
+        </div>
+
+        <div class="agents-config-row">
+          <label>Poll (ms)</label>
+          <input class="input input-sm" id="agents-config-poll-ms" type="number" min="5000" step="1000" value="15000">
+          <label>History (min)</label>
+          <input class="input input-sm" id="agents-config-history-min" type="number" min="10" step="10" value="360">
+          <label>Min Points</label>
+          <input class="input input-sm" id="agents-config-min-points" type="number" min="10" step="5" value="30">
+          <label>Max LLM/Cycle</label>
+          <input class="input input-sm" id="agents-config-max-llm" type="number" min="0" step="1" value="5">
+          <label>Z</label>
+          <input class="input input-sm" id="agents-config-threshold-z" type="number" min="0.5" step="0.5" value="3">
+          <label>MAD</label>
+          <input class="input input-sm" id="agents-config-threshold-mad" type="number" min="0.5" step="0.5" value="3.5">
+          <label>Stale (sec)</label>
+          <input class="input input-sm" id="agents-config-staleness-sec" type="number" min="10" step="10" value="120">
+        </div>
+
+        <div class="agents-metrics-row">
+          <div class="metric-card"><span class="metric-label">Cycle (ms)</span><span class="metric-value" id="agents-metric-cycle">0</span></div>
+          <div class="metric-card"><span class="metric-label">Candidates</span><span class="metric-value" id="agents-metric-candidates">0</span></div>
+          <div class="metric-card"><span class="metric-label">Triaged</span><span class="metric-value" id="agents-metric-triaged">0</span></div>
+          <div class="metric-card"><span class="metric-label">Emitted</span><span class="metric-value" id="agents-metric-emitted">0</span></div>
+          <div class="metric-card"><span class="metric-label">Last heartbeat</span><span class="metric-value" id="agents-metric-heartbeat">n/a</span></div>
+        </div>
+
+        <div class="agents-main">
+          <aside class="agents-feed-panel">
+            <div class="agents-feed-header">
+              <h3>Anomaly Feed</h3>
+              <div class="agents-feed-filters">
+                <select class="input input-sm" id="agents-filter-state">
+                  <option value="">All states</option>
+                  <option value="open">Open</option>
+                  <option value="acknowledged">Acknowledged</option>
+                </select>
+                <select class="input input-sm" id="agents-filter-severity">
+                  <option value="">All severity</option>
+                  <option value="critical">Critical</option>
+                  <option value="high">High</option>
+                  <option value="medium">Medium</option>
+                  <option value="low">Low</option>
+                </select>
+                <input class="input input-sm" id="agents-filter-search" placeholder="Search tag/equipment">
+              </div>
+            </div>
+            <div class="agents-event-list" id="agents-event-list">
+              <div class="agents-empty">No anomaly events yet.</div>
+            </div>
+          </aside>
+
+          <section class="agents-detail-panel">
+            <div class="agents-detail-header">
+              <h3>Event Details</h3>
+              <div class="agents-detail-actions">
+                <button class="btn btn-sm btn-secondary" id="btn-agents-open-graph" disabled>Open in Graph</button>
+                <button class="btn btn-sm btn-ghost" id="btn-agents-ack" disabled>Acknowledge</button>
+              </div>
+            </div>
+            <div class="agents-detail-content" id="agents-event-detail">
+              <p class="text-muted">Select an anomaly event from the feed.</p>
+            </div>
+          </section>
+        </div>
+      </section>
+
       <!-- Database Tab -->
       <section class="tab-content" id="tab-database">
         <header class="tab-header">
@@ -630,6 +722,7 @@ <h2>Ontology Graph</h2>
                   <option value="siemens-hmi">Siemens HMI</option>
                   <option value="mes">MES Layer</option>
                   <option value="troubleshooting">Troubleshooting</option>
+                  <option value="anomaly">Anomalies</option>
                   <option value="flows">Flows</option>
                 </select>
               </div>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index b5cdb4d..e215fb4 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -4,6 +4,7 @@ const fs = require('fs');
 const { spawn } = require('child_process');
 
 let mainWindow;
+let activeAgentRun = null;
 
 // ---------------------------------------------------------------------------
 // Python backend configuration  (works in both dev and packaged modes)
@@ -103,6 +104,16 @@ app.on('window-all-closed', () => {
   }
 });
 
+app.on('before-quit', () => {
+  if (activeAgentRun && activeAgentRun.process && !activeAgentRun.process.killed) {
+    try {
+      activeAgentRun.process.kill('SIGTERM');
+    } catch (err) {
+      // Ignore termination errors during shutdown.
+    }
+  }
+});
+
 app.on('activate', () => {
   if (BrowserWindow.getAllWindows().length === 0) {
     createWindow();
@@ -185,6 +196,132 @@ function runPythonScript(scriptName, args = [], options = {}) {
   });
 }
 
+function normalizeAgentConfig(config = {}) {
+  const thresholds = (config && typeof config.thresholds === 'object' && config.thresholds) || {};
+  const scope = (config && typeof config.scope === 'object' && config.scope) || {};
+  return {
+    pollIntervalMs: Math.max(5000, Number(config.pollIntervalMs || 15000)),
+    historyWindowMinutes: Math.max(10, Number(config.historyWindowMinutes || 360)),
+    minHistoryPoints: Math.max(10, Number(config.minHistoryPoints || 30)),
+    maxMonitoredTags: Math.max(10, Number(config.maxMonitoredTags || 200)),
+    maxCandidatesPerCycle: Math.max(1, Number(config.maxCandidatesPerCycle || 25)),
+    maxLlmTriagesPerCycle: Math.max(0, Number(config.maxLlmTriagesPerCycle || 5)),
+    dedupCooldownMinutes: Math.max(1, Number(config.dedupCooldownMinutes || 10)),
+    retentionDays: Math.max(1, Number(config.retentionDays || 14)),
+    cleanupEveryCycles: Math.max(1, Number(config.cleanupEveryCycles || 40)),
+    thresholds: {
+      z: Number(thresholds.z ?? 3.0),
+      mad: Number(thresholds.mad ?? 3.5),
+      rate: Number(thresholds.rate ?? 0.0),
+      stalenessSec: Number(thresholds.stalenessSec ?? 120),
+      flatline_std_epsilon: Number(thresholds.flatline_std_epsilon ?? 1e-6),
+      stuck_window_size: Number(thresholds.stuck_window_size ?? 20),
+    },
+    scope: {
+      project: scope.project || null,
+      equipmentTags: Array.isArray(scope.equipmentTags) ? scope.equipmentTags : [],
+      tagRegex: scope.tagRegex || null,
+    },
+  };
+}
+
+function routeAgentMessage(channel, payload) {
+  if (mainWindow) {
+    mainWindow.webContents.send(channel, payload);
+  }
+}
+
+function parseAgentLine(line) {
+  const trimmed = (line || '').trim();
+  if (!trimmed) return null;
+  const prefixes = [
+    { key: '[AGENT_STATUS]', channel: 'agent-status' },
+    { key: '[AGENT_EVENT]', channel: 'agent-event' },
+    { key: '[AGENT_ERROR]', channel: 'agent-error' },
+    { key: '[AGENT_COMPLETE]', channel: 'agent-complete' },
+  ];
+  for (const prefix of prefixes) {
+    if (!trimmed.startsWith(prefix.key)) continue;
+    const jsonText = trimmed.slice(prefix.key.length).trim();
+    try {
+      const payload = JSON.parse(jsonText);
+      return { channel: prefix.channel, payload };
+    } catch (err) {
+      return {
+        channel: 'agent-error',
+        payload: {
+          runId: activeAgentRun ? activeAgentRun.runId : null,
+          code: 'invalid_agent_json',
+          message: `Failed to parse agent stream line: ${trimmed.slice(0, 200)}`,
+          recoverable: true,
+          timestamp: new Date().toISOString(),
+        },
+      };
+    }
+  }
+  return null;
+}
+
+function handleAgentStdoutChunk(text) {
+  if (!activeAgentRun) return;
+  activeAgentRun.stdoutBuffer += text;
+  const lines = activeAgentRun.stdoutBuffer.split(/\r?\n/);
+  activeAgentRun.stdoutBuffer = lines.pop() || '';
+  for (const line of lines) {
+    const parsed = parseAgentLine(line);
+    if (!parsed) continue;
+    if (parsed.channel === 'agent-status' && parsed.payload) {
+      activeAgentRun.status = parsed.payload.state || activeAgentRun.status;
+      activeAgentRun.metrics = {
+        cycleMs: parsed.payload.cycleMs || 0,
+        candidates: parsed.payload.candidates || 0,
+        triaged: parsed.payload.triaged || 0,
+        emitted: parsed.payload.emitted || 0,
+        timestamp: parsed.payload.timestamp || new Date().toISOString(),
+      };
+    }
+    routeAgentMessage(parsed.channel, parsed.payload);
+  }
+}
+
+async function stopActiveAgent(reason = 'stopped_by_user') {
+  if (!activeAgentRun || !activeAgentRun.process || activeAgentRun.process.killed) {
+    return { success: false, error: 'No active agent run' };
+  }
+  const runId = activeAgentRun.runId;
+  activeAgentRun.status = 'stopping';
+
+  return new Promise((resolve) => {
+    const proc = activeAgentRun.process;
+    let settled = false;
+    const done = (result) => {
+      if (settled) return;
+      settled = true;
+      resolve(result);
+    };
+
+    proc.once('close', () => {
+      done({ success: true, runId, stoppedAt: new Date().toISOString(), reason });
+    });
+
+    try {
+      proc.kill('SIGTERM');
+    } catch (err) {
+      done({ success: false, error: err.message });
+      return;
+    }
+
+    setTimeout(() => {
+      if (proc.killed) return;
+      try {
+        proc.kill('SIGKILL');
+      } catch (err) {
+        // Ignore forced termination errors.
+      }
+    }, 5000);
+  });
+}
+
 // IPC Handlers
 
 // Select file dialog
@@ -1304,7 +1441,9 @@ function readDbCredentials() {
   if (!fs.existsSync(credPath)) return {};
   try {
     return JSON.parse(fs.readFileSync(credPath, 'utf-8'));
-  } catch { return {}; }
+  } catch {
+    return {};
+  }
 }
 
 // Get database connections from Neo4j + credential status from db_credentials.json
@@ -1314,10 +1453,8 @@ ipcMain.handle('get-db-connections', async () => {
       const proc = spawnPythonProcess('neo4j_ontology.py', ['db-connections', '--json']);
 
       let stdout = '';
-      let stderr = '';
 
       proc.stdout.on('data', (data) => { stdout += data.toString(); });
-      proc.stderr.on('data', (data) => { stderr += data.toString(); });
 
       proc.on('close', (code) => {
         if (code !== 0) {
@@ -1335,7 +1472,7 @@ ipcMain.handle('get-db-connections', async () => {
           }));
 
           resolve({ success: true, connections: enriched });
-        } catch (e) {
+        } catch {
           resolve({ success: true, connections: [] });
         }
       });
@@ -1349,7 +1486,7 @@ ipcMain.handle('get-db-connections', async () => {
 ipcMain.handle('save-db-credentials', async (event, credentials) => {
   try {
     const credPath = getDbCredentialsPath();
-    let existing = readDbCredentials();
+    const existing = readDbCredentials();
 
     for (const [name, cred] of Object.entries(credentials)) {
       existing[name] = {
@@ -1392,4 +1529,173 @@ ipcMain.handle('test-db-connection', async (event, connectionName) => {
   } catch (error) {
     return { success: false, error: error.message };
   }
+});
+
+// ============================================
+// Long-running Agent Monitoring IPC Handlers
+// ============================================
+
+ipcMain.handle('agents:start', async (event, rawConfig = {}) => {
+  if (activeAgentRun && activeAgentRun.process && !activeAgentRun.process.killed) {
+    return { success: false, error: `Agent run already active: ${activeAgentRun.runId}`, runId: activeAgentRun.runId };
+  }
+
+  const runId = `agent-${Date.now()}`;
+  const config = normalizeAgentConfig(rawConfig);
+
+  try {
+    const proc = spawnPythonProcess('anomaly_monitor.py', [
+      'run',
+      '--run-id',
+      runId,
+      '--config-json',
+      JSON.stringify(config),
+    ]);
+
+    activeAgentRun = {
+      runId,
+      process: proc,
+      status: 'starting',
+      startedAt: new Date().toISOString(),
+      metrics: {
+        cycleMs: 0,
+        candidates: 0,
+        triaged: 0,
+        emitted: 0,
+        timestamp: new Date().toISOString(),
+      },
+      stdoutBuffer: '',
+      config,
+    };
+
+    proc.stdout.on('data', (data) => {
+      handleAgentStdoutChunk(data.toString());
+    });
+
+    proc.stderr.on('data', (data) => {
+      const text = data.toString().trim();
+      if (!text) return;
+      routeAgentMessage('agent-error', {
+        runId,
+        code: 'worker_stderr',
+        message: text,
+        recoverable: true,
+        timestamp: new Date().toISOString(),
+      });
+    });
+
+    proc.on('close', (code) => {
+      const hadActive = activeAgentRun && activeAgentRun.runId === runId;
+      if (hadActive) {
+        routeAgentMessage('agent-complete', {
+          runId,
+          success: code === 0,
+          reason: code === 0 ? 'completed' : 'worker_exit_error',
+          stoppedAt: new Date().toISOString(),
+        });
+        activeAgentRun = null;
+      }
+    });
+
+    proc.on('error', (err) => {
+      routeAgentMessage('agent-error', {
+        runId,
+        code: 'worker_spawn_error',
+        message: err.message,
+        recoverable: false,
+        timestamp: new Date().toISOString(),
+      });
+      activeAgentRun = null;
+    });
+
+    return { success: true, runId, startedAt: activeAgentRun.startedAt, config };
+  } catch (error) {
+    activeAgentRun = null;
+    return { success: false, error: error.message, runId };
+  }
+});
+
+ipcMain.handle('agents:status', async (event, runId) => {
+  if (activeAgentRun && (!runId || runId === activeAgentRun.runId)) {
+    return {
+      success: true,
+      runId: activeAgentRun.runId,
+      status: activeAgentRun.status,
+      metrics: activeAgentRun.metrics,
+      lastHeartbeatAt: activeAgentRun.metrics.timestamp,
+      startedAt: activeAgentRun.startedAt,
+      config: activeAgentRun.config,
+      active: true,
+    };
+  }
+
+  if (!runId) {
+    return { success: true, active: false, status: 'idle' };
+  }
+
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', ['status', '--run-id', runId]);
+    const parsed = JSON.parse(output || '{}');
+    return parsed;
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
+ipcMain.handle('agents:stop', async (event, runId = null) => {
+  if (!activeAgentRun) {
+    return { success: false, error: 'No active agent run' };
+  }
+  if (runId && runId !== activeAgentRun.runId) {
+    return { success: false, error: `Requested run ${runId} does not match active run ${activeAgentRun.runId}` };
+  }
+  return stopActiveAgent('stopped_by_user');
+});
+
+ipcMain.handle('agents:list-events', async (event, filters = {}) => {
+  const args = ['list-events'];
+  if (filters.limit) args.push('--limit', String(filters.limit));
+  if (filters.state) args.push('--state', String(filters.state));
+  if (filters.severity) args.push('--severity', String(filters.severity));
+  if (filters.runId) args.push('--run-id', String(filters.runId));
+
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', args);
+    return JSON.parse(output || '{"success":true,"events":[]}');
+  } catch (error) {
+    return { success: false, error: error.message, events: [] };
+  }
+});
+
+ipcMain.handle('agents:get-event', async (event, eventId) => {
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', ['get-event', '--event-id', String(eventId)]);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
+ipcMain.handle('agents:ack-event', async (event, eventId, note = '') => {
+  try {
+    const args = ['ack-event', '--event-id', String(eventId)];
+    if (note) args.push('--note', String(note));
+    const output = await runPythonScript('anomaly_monitor.py', args);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
+ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', [
+      'cleanup',
+      '--retention-days',
+      String(retentionDays),
+    ]);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
 });
\ No newline at end of file
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index d3c8171..1e0930c 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -70,6 +70,15 @@ contextBridge.exposeInMainWorld('api', {
   getSettings: () => ipcRenderer.invoke('get-settings'),
   saveSettings: (settings) => ipcRenderer.invoke('save-settings', settings),
   testIgnitionConnection: (options) => ipcRenderer.invoke('test-ignition-connection', options),
+
+  // Long-running agents monitoring
+  agentsStart: (config) => ipcRenderer.invoke('agents:start', config),
+  agentsStatus: (runId) => ipcRenderer.invoke('agents:status', runId),
+  agentsStop: (runId) => ipcRenderer.invoke('agents:stop', runId),
+  agentsListEvents: (filters) => ipcRenderer.invoke('agents:list-events', filters),
+  agentsGetEvent: (eventId) => ipcRenderer.invoke('agents:get-event', eventId),
+  agentsAckEvent: (eventId, note) => ipcRenderer.invoke('agents:ack-event', eventId, note),
+  agentsCleanup: (retentionDays) => ipcRenderer.invoke('agents:cleanup', retentionDays),
   
   // Database connections
   getDbConnections: () => ipcRenderer.invoke('get-db-connections'),
@@ -91,6 +100,26 @@ contextBridge.exposeInMainWorld('api', {
     const handler = (event, data) => callback(data);
     ipcRenderer.on('stream-complete', handler);
     return () => ipcRenderer.removeListener('stream-complete', handler);
+  },
+  onAgentStatus: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-status', handler);
+    return () => ipcRenderer.removeListener('agent-status', handler);
+  },
+  onAgentEvent: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-event', handler);
+    return () => ipcRenderer.removeListener('agent-event', handler);
+  },
+  onAgentError: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-error', handler);
+    return () => ipcRenderer.removeListener('agent-error', handler);
+  },
+  onAgentComplete: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-complete', handler);
+    return () => ipcRenderer.removeListener('agent-complete', handler);
   }
 });
 
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 53974f5..cab7e8b 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3536,6 +3536,381 @@ btnSaveDbCreds?.addEventListener('click', async () => {
     btnSaveDbCreds.disabled = false;
   }
 });
+// Agents Tab - Long-running monitoring
+// ============================================
+
+const agentsState = {
+  runId: null,
+  status: 'idle',
+  events: [],
+  selectedEventId: null,
+  listenersReady: false,
+};
+
+function getAgentsElements() {
+  return {
+    btnStart: document.getElementById('btn-agents-start'),
+    btnStop: document.getElementById('btn-agents-stop'),
+    btnRefresh: document.getElementById('btn-agents-refresh'),
+    btnCleanup: document.getElementById('btn-agents-cleanup'),
+    btnOpenGraph: document.getElementById('btn-agents-open-graph'),
+    btnAck: document.getElementById('btn-agents-ack'),
+    statusChip: document.getElementById('agents-status-chip'),
+    statusText: document.getElementById('agents-status-text'),
+    list: document.getElementById('agents-event-list'),
+    detail: document.getElementById('agents-event-detail'),
+    filterState: document.getElementById('agents-filter-state'),
+    filterSeverity: document.getElementById('agents-filter-severity'),
+    filterSearch: document.getElementById('agents-filter-search'),
+    metricCycle: document.getElementById('agents-metric-cycle'),
+    metricCandidates: document.getElementById('agents-metric-candidates'),
+    metricTriaged: document.getElementById('agents-metric-triaged'),
+    metricEmitted: document.getElementById('agents-metric-emitted'),
+    metricHeartbeat: document.getElementById('agents-metric-heartbeat'),
+    cfgPoll: document.getElementById('agents-config-poll-ms'),
+    cfgHist: document.getElementById('agents-config-history-min'),
+    cfgPoints: document.getElementById('agents-config-min-points'),
+    cfgMaxLlm: document.getElementById('agents-config-max-llm'),
+    cfgZ: document.getElementById('agents-config-threshold-z'),
+    cfgMad: document.getElementById('agents-config-threshold-mad'),
+    cfgStale: document.getElementById('agents-config-staleness-sec'),
+  };
+}
+
+function getAgentsConfigFromUI() {
+  const el = getAgentsElements();
+  return {
+    pollIntervalMs: Number(el.cfgPoll?.value || 15000),
+    historyWindowMinutes: Number(el.cfgHist?.value || 360),
+    minHistoryPoints: Number(el.cfgPoints?.value || 30),
+    maxLlmTriagesPerCycle: Number(el.cfgMaxLlm?.value || 5),
+    thresholds: {
+      z: Number(el.cfgZ?.value || 3),
+      mad: Number(el.cfgMad?.value || 3.5),
+      stalenessSec: Number(el.cfgStale?.value || 120),
+    },
+  };
+}
+
+function formatAgentTime(ts) {
+  if (!ts) return 'n/a';
+  const d = new Date(ts);
+  if (Number.isNaN(d.getTime())) return String(ts);
+  return d.toLocaleString();
+}
+
+function updateAgentStatusUi(status, text) {
+  const el = getAgentsElements();
+  if (!el.statusChip || !el.statusText) return;
+  el.statusChip.className = 'status-chip';
+  const normalized = (status || 'idle').toLowerCase();
+  if (normalized === 'running') el.statusChip.classList.add('running');
+  if (normalized === 'failed' || normalized === 'error') el.statusChip.classList.add('error');
+  el.statusChip.textContent = normalized;
+  el.statusText.textContent = text || normalized;
+  if (el.btnStart) el.btnStart.disabled = normalized === 'running' || normalized === 'starting';
+  if (el.btnStop) el.btnStop.disabled = !(normalized === 'running' || normalized === 'starting' || normalized === 'stopping');
+}
+
+function updateAgentMetrics(metrics = {}, heartbeatTs = null) {
+  const el = getAgentsElements();
+  if (el.metricCycle) el.metricCycle.textContent = String(metrics.cycleMs ?? metrics.lastCycleMs ?? 0);
+  if (el.metricCandidates) el.metricCandidates.textContent = String(metrics.candidates ?? metrics.lastCandidates ?? 0);
+  if (el.metricTriaged) el.metricTriaged.textContent = String(metrics.triaged ?? metrics.lastTriaged ?? 0);
+  if (el.metricEmitted) el.metricEmitted.textContent = String(metrics.emitted ?? metrics.lastEmitted ?? 0);
+  if (el.metricHeartbeat) el.metricHeartbeat.textContent = formatAgentTime(heartbeatTs || metrics.timestamp);
+}
+
+function getFilteredAgentEvents() {
+  const el = getAgentsElements();
+  const state = (el.filterState?.value || '').toLowerCase();
+  const severity = (el.filterSeverity?.value || '').toLowerCase();
+  const search = (el.filterSearch?.value || '').trim().toLowerCase();
+  return agentsState.events.filter((event) => {
+    if (state && String(event.state || '').toLowerCase() !== state) return false;
+    if (severity && String(event.severity || '').toLowerCase() !== severity) return false;
+    if (search) {
+      const haystack = [
+        event.summary,
+        event.source_tag,
+        event.tag_name,
+        ...(event.equipment || []),
+        ...(event.tags || []),
+      ]
+        .filter(Boolean)
+        .join(' ')
+        .toLowerCase();
+      if (!haystack.includes(search)) return false;
+    }
+    return true;
+  });
+}
+
+function renderAgentEventList() {
+  const el = getAgentsElements();
+  if (!el.list) return;
+  const events = getFilteredAgentEvents();
+  if (!events.length) {
+    el.list.innerHTML = '<div class="agents-empty">No anomaly events match the current filters.</div>';
+    return;
+  }
+  el.list.innerHTML = events
+    .map((event) => {
+      const active = event.event_id === agentsState.selectedEventId ? ' active' : '';
+      const sev = String(event.severity || 'low').toLowerCase();
+      const equipment = (event.equipment || []).slice(0, 2).join(', ');
+      return `
+        <div class="agents-event-card${active}" data-event-id="${escapeHtml(event.event_id || '')}">
+          <div class="agents-event-line-top">
+            <span class="agents-severity sev-${escapeHtml(sev)}">${escapeHtml(sev)}</span>
+            <span class="agents-event-time">${escapeHtml(formatAgentTime(event.created_at))}</span>
+          </div>
+          <div class="agents-event-summary">${escapeHtml(event.summary || 'Untitled anomaly')}</div>
+          <div class="agents-event-meta">${escapeHtml(event.tag_name || event.source_tag || '')}${equipment ? ` • ${escapeHtml(equipment)}` : ''}</div>
+        </div>
+      `;
+    })
+    .join('');
+
+  el.list.querySelectorAll('.agents-event-card').forEach((card) => {
+    card.addEventListener('click', () => {
+      const eventId = card.getAttribute('data-event-id');
+      if (!eventId) return;
+      selectAgentEvent(eventId);
+    });
+  });
+}
+
+function resolveAgentGraphTarget(event) {
+  const equipment = (event.equipment || []).find(Boolean);
+  if (equipment) return { name: equipment, type: 'Equipment' };
+  const tagName = event.tag_name || (event.tags || []).find(Boolean) || event.source_tag;
+  if (tagName) return { name: tagName, type: 'ScadaTag' };
+  return null;
+}
+
+function renderAgentEventDetails(event) {
+  const el = getAgentsElements();
+  if (!el.detail) return;
+  if (!event) {
+    el.detail.innerHTML = '<p class="text-muted">Select an anomaly event from the feed.</p>';
+    if (el.btnOpenGraph) el.btnOpenGraph.disabled = true;
+    if (el.btnAck) el.btnAck.disabled = true;
+    return;
+  }
+
+  let checks = [];
+  let causes = [];
+  let safety = [];
+  try { checks = JSON.parse(event.recommended_checks_json || '[]'); } catch (e) {}
+  try { causes = JSON.parse(event.probable_causes_json || '[]'); } catch (e) {}
+  try { safety = JSON.parse(event.safety_notes_json || '[]'); } catch (e) {}
+
+  el.detail.innerHTML = `
+    <div class="agents-detail-grid">
+      <div class="agents-detail-item"><span class="agents-detail-label">Event ID</span><span class="agents-detail-value">${escapeHtml(event.event_id || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">State</span><span class="agents-detail-value">${escapeHtml(event.state || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Severity</span><span class="agents-detail-value">${escapeHtml(event.severity || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Confidence</span><span class="agents-detail-value">${escapeHtml(String(event.confidence ?? ''))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Category</span><span class="agents-detail-value">${escapeHtml(event.category || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Timestamp</span><span class="agents-detail-value">${escapeHtml(formatAgentTime(event.created_at))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Source Tag</span><span class="agents-detail-value">${escapeHtml(event.source_tag || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Tag Name</span><span class="agents-detail-value">${escapeHtml(event.tag_name || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">z-score</span><span class="agents-detail-value">${escapeHtml(String(event.z_score ?? '0'))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">MAD score</span><span class="agents-detail-value">${escapeHtml(String(event.mad_score ?? '0'))}</span></div>
+    </div>
+    <div>
+      <div class="agents-detail-label">Summary</div>
+      <div>${escapeHtml(event.summary || '')}</div>
+    </div>
+    <div>
+      <div class="agents-detail-label">Explanation</div>
+      <div>${escapeHtml(event.explanation || '')}</div>
+    </div>
+    <div>
+      <div class="agents-detail-label">Probable Causes</div>
+      <ul class="agents-list">${(causes || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
+    </div>
+    <div>
+      <div class="agents-detail-label">Verification Checks</div>
+      <ul class="agents-list">${(checks || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
+    </div>
+    <div>
+      <div class="agents-detail-label">Safety Notes</div>
+      <ul class="agents-list">${(safety || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
+    </div>
+  `;
+
+  if (el.btnOpenGraph) el.btnOpenGraph.disabled = !resolveAgentGraphTarget(event);
+  if (el.btnAck) el.btnAck.disabled = event.state === 'acknowledged';
+}
+
+async function selectAgentEvent(eventId) {
+  agentsState.selectedEventId = eventId;
+  const existing = agentsState.events.find((e) => e.event_id === eventId);
+  if (existing && existing.explanation && existing.recommended_checks_json) {
+    renderAgentEventList();
+    renderAgentEventDetails(existing);
+    return;
+  }
+  const detailResult = await window.api.agentsGetEvent(eventId);
+  if (detailResult.success && detailResult.event) {
+    const idx = agentsState.events.findIndex((e) => e.event_id === eventId);
+    if (idx >= 0) {
+      agentsState.events[idx] = { ...agentsState.events[idx], ...detailResult.event };
+    } else {
+      agentsState.events.unshift(detailResult.event);
+    }
+    renderAgentEventList();
+    renderAgentEventDetails(detailResult.event);
+  }
+}
+
+async function loadAgentEvents() {
+  const el = getAgentsElements();
+  const result = await window.api.agentsListEvents({
+    limit: 200,
+    state: el.filterState?.value || undefined,
+    severity: el.filterSeverity?.value || undefined,
+    runId: agentsState.runId || undefined,
+  });
+  if (!result.success) return;
+  agentsState.events = Array.isArray(result.events) ? result.events : [];
+  renderAgentEventList();
+
+  if (agentsState.selectedEventId) {
+    const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+    renderAgentEventDetails(selected || null);
+  }
+}
+
+async function refreshAgentStatus() {
+  const status = await window.api.agentsStatus(agentsState.runId || undefined);
+  if (!status.success) {
+    updateAgentStatusUi('error', status.error || 'Failed to fetch status');
+    return;
+  }
+  if (status.active) {
+    agentsState.runId = status.runId || agentsState.runId;
+    agentsState.status = status.status || 'running';
+    updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId}`);
+    updateAgentMetrics(status.metrics || {}, status.lastHeartbeatAt);
+  } else {
+    agentsState.status = 'idle';
+    updateAgentStatusUi('idle', 'No active run');
+  }
+}
+
+async function startAgentsMonitoring() {
+  const config = getAgentsConfigFromUI();
+  const result = await window.api.agentsStart(config);
+  if (!result.success) {
+    updateAgentStatusUi('error', result.error || 'Failed to start monitoring');
+    return;
+  }
+  agentsState.runId = result.runId;
+  agentsState.status = 'running';
+  updateAgentStatusUi('running', `Run ${result.runId}`);
+  await loadAgentEvents();
+}
+
+async function stopAgentsMonitoring() {
+  const result = await window.api.agentsStop(agentsState.runId || undefined);
+  if (!result.success) {
+    updateAgentStatusUi('error', result.error || 'Failed to stop monitoring');
+    return;
+  }
+  agentsState.status = 'stopped';
+  updateAgentStatusUi('stopped', 'Monitoring stopped');
+}
+
+async function acknowledgeSelectedAgentEvent() {
+  if (!agentsState.selectedEventId) return;
+  const result = await window.api.agentsAckEvent(agentsState.selectedEventId, '');
+  if (!result.success) return;
+  await loadAgentEvents();
+  const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+  renderAgentEventDetails(selected || null);
+}
+
+function upsertRealtimeAgentEvent(payload) {
+  if (!payload || !payload.eventId) return;
+  const idx = agentsState.events.findIndex((e) => e.event_id === payload.eventId);
+  const next = {
+    event_id: payload.eventId,
+    severity: payload.severity || 'medium',
+    summary: payload.summary || 'Anomaly detected',
+    category: payload.category || 'deviation',
+    created_at: payload.createdAt || new Date().toISOString(),
+    source_tag: payload.entityRefs?.sourceTag || payload.entityRefs?.tag || '',
+    tag_name: payload.entityRefs?.tag || '',
+    state: 'open',
+  };
+  if (idx >= 0) {
+    agentsState.events[idx] = { ...agentsState.events[idx], ...next };
+  } else {
+    agentsState.events.unshift(next);
+  }
+  renderAgentEventList();
+}
+
+function ensureAgentListeners() {
+  if (agentsState.listenersReady) return;
+  agentsState.listenersReady = true;
+
+  window.api.onAgentStatus((payload) => {
+    if (!payload) return;
+    if (payload.runId) agentsState.runId = payload.runId;
+    agentsState.status = payload.state || agentsState.status;
+    updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
+    updateAgentMetrics(payload, payload.timestamp);
+  });
+
+  window.api.onAgentEvent((payload) => {
+    upsertRealtimeAgentEvent(payload);
+  });
+
+  window.api.onAgentError((payload) => {
+    if (!payload) return;
+    updateAgentStatusUi('error', payload.message || 'Agent runtime error');
+  });
+
+  window.api.onAgentComplete((payload) => {
+    if (!payload) return;
+    agentsState.status = payload.success ? 'stopped' : 'failed';
+    updateAgentStatusUi(agentsState.status, payload.reason || 'Run complete');
+    refreshAgentStatus();
+  });
+}
+
+function initAgentsTab() {
+  ensureAgentListeners();
+  const el = getAgentsElements();
+  if (!el.btnStart) return;
+  if (!el.btnStart.dataset.bound) {
+    el.btnStart.dataset.bound = '1';
+    el.btnStart.addEventListener('click', startAgentsMonitoring);
+    el.btnStop?.addEventListener('click', stopAgentsMonitoring);
+    el.btnRefresh?.addEventListener('click', loadAgentEvents);
+    el.btnCleanup?.addEventListener('click', async () => {
+      await window.api.agentsCleanup(14);
+      await loadAgentEvents();
+    });
+    el.btnAck?.addEventListener('click', acknowledgeSelectedAgentEvent);
+    el.btnOpenGraph?.addEventListener('click', () => {
+      const event = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+      if (!event) return;
+      const target = resolveAgentGraphTarget(event);
+      if (!target) return;
+      openGraphModal(target.name, target.type, event.summary || target.name);
+    });
+    el.filterState?.addEventListener('change', loadAgentEvents);
+    el.filterSeverity?.addEventListener('change', loadAgentEvents);
+    el.filterSearch?.addEventListener('input', renderAgentEventList);
+  }
+  refreshAgentStatus();
+  loadAgentEvents();
+}
 
 // Initialize graph tab when it's first shown
 navButtons.forEach(btn => {
@@ -3558,6 +3933,9 @@ navButtons.forEach(btn => {
       loadSettings();
       loadDbConnections();
     }
+    if (btn.dataset.tab === 'agents') {
+      setTimeout(initAgentsTab, 100);
+    }
   });
 });
 
@@ -3569,5 +3947,6 @@ setTimeout(() => {
   loadTiaProjects();
   loadSettings();
   loadDbConnections();
+  ensureAgentListeners();
 }, 500);
 
diff --git a/electron-ui/styles.css b/electron-ui/styles.css
index 5ba9186..f1e066e 100644
--- a/electron-ui/styles.css
+++ b/electron-ui/styles.css
@@ -2979,3 +2979,278 @@ select.input,
 .connection-status .status-dot {
   flex-shrink: 0;
 }
+
+/* ============================================
+   AGENTS TAB
+   ============================================ */
+
+.agents-topbar {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  gap: var(--space-4);
+  margin-bottom: var(--space-3);
+  flex-wrap: wrap;
+}
+
+.agents-run-controls {
+  display: flex;
+  gap: var(--space-2);
+  flex-wrap: wrap;
+}
+
+.agents-run-status {
+  display: flex;
+  align-items: center;
+  gap: var(--space-2);
+  color: var(--color-text-secondary);
+  font-size: var(--text-sm);
+}
+
+.status-chip {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  padding: 2px 8px;
+  border-radius: 999px;
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel-2);
+  color: var(--color-text-secondary);
+  font-size: var(--text-xs);
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.4px;
+}
+
+.status-chip.running {
+  color: var(--color-success);
+  border-color: rgba(34, 197, 94, 0.35);
+  background: rgba(34, 197, 94, 0.12);
+}
+
+.status-chip.error {
+  color: var(--color-danger);
+  border-color: rgba(239, 68, 68, 0.35);
+  background: rgba(239, 68, 68, 0.12);
+}
+
+.agents-config-row {
+  display: grid;
+  grid-template-columns: repeat(14, minmax(0, 1fr));
+  gap: var(--space-2);
+  margin-bottom: var(--space-4);
+  align-items: center;
+}
+
+.agents-config-row label {
+  font-size: var(--text-xs);
+  color: var(--color-text-secondary);
+  text-transform: uppercase;
+  letter-spacing: 0.35px;
+}
+
+.agents-config-row .input {
+  min-width: 0;
+}
+
+.agents-metrics-row {
+  display: grid;
+  grid-template-columns: repeat(5, minmax(0, 1fr));
+  gap: var(--space-2);
+  margin-bottom: var(--space-4);
+}
+
+.metric-card {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel);
+  border-radius: var(--radius-md);
+  padding: var(--space-2) var(--space-3);
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+}
+
+.metric-label {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+}
+
+.metric-value {
+  font-family: var(--font-mono);
+  font-size: var(--text-sm);
+  color: var(--color-text);
+}
+
+.agents-main {
+  display: grid;
+  grid-template-columns: minmax(300px, 38%) minmax(0, 1fr);
+  gap: var(--space-3);
+  min-height: 480px;
+}
+
+.agents-feed-panel,
+.agents-detail-panel {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+  display: flex;
+  flex-direction: column;
+}
+
+.agents-feed-header,
+.agents-detail-header {
+  padding: var(--space-3);
+  border-bottom: 1px solid var(--color-border-subtle);
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  gap: var(--space-2);
+}
+
+.agents-feed-header h3,
+.agents-detail-header h3 {
+  font-size: var(--text-md);
+  font-weight: 600;
+}
+
+.agents-feed-filters {
+  display: flex;
+  gap: var(--space-2);
+  flex-wrap: wrap;
+}
+
+.agents-feed-filters .input {
+  min-width: 120px;
+}
+
+.agents-event-list {
+  overflow-y: auto;
+  padding: var(--space-2);
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-2);
+  flex: 1;
+}
+
+.agents-empty {
+  color: var(--color-text-muted);
+  font-size: var(--text-sm);
+  padding: var(--space-4);
+  text-align: center;
+}
+
+.agents-event-card {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel-2);
+  border-radius: var(--radius-md);
+  padding: var(--space-2) var(--space-3);
+  cursor: pointer;
+  transition: border-color var(--transition-fast), transform var(--transition-fast);
+}
+
+.agents-event-card:hover {
+  border-color: var(--color-border-active);
+  transform: translateY(-1px);
+}
+
+.agents-event-card.active {
+  border-color: var(--color-accent);
+  box-shadow: 0 0 0 1px rgba(34, 211, 238, 0.35) inset;
+}
+
+.agents-event-line-top {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 4px;
+  gap: var(--space-2);
+}
+
+.agents-severity {
+  font-size: var(--text-xs);
+  text-transform: uppercase;
+  letter-spacing: 0.4px;
+  padding: 2px 6px;
+  border-radius: 999px;
+  border: 1px solid transparent;
+}
+
+.agents-severity.sev-critical {
+  color: #fecaca;
+  background: rgba(239, 68, 68, 0.2);
+  border-color: rgba(239, 68, 68, 0.4);
+}
+
+.agents-severity.sev-high {
+  color: #fdba74;
+  background: rgba(249, 115, 22, 0.18);
+  border-color: rgba(249, 115, 22, 0.35);
+}
+
+.agents-severity.sev-medium {
+  color: #fde68a;
+  background: rgba(245, 158, 11, 0.15);
+  border-color: rgba(245, 158, 11, 0.35);
+}
+
+.agents-severity.sev-low {
+  color: #bfdbfe;
+  background: rgba(59, 130, 246, 0.15);
+  border-color: rgba(59, 130, 246, 0.35);
+}
+
+.agents-event-time {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+  font-family: var(--font-mono);
+}
+
+.agents-event-summary {
+  font-size: var(--text-sm);
+  color: var(--color-text);
+  margin-bottom: 4px;
+}
+
+.agents-event-meta {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+}
+
+.agents-detail-content {
+  padding: var(--space-3);
+  overflow-y: auto;
+  font-size: var(--text-sm);
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-3);
+}
+
+.agents-detail-grid {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: var(--space-2) var(--space-3);
+}
+
+.agents-detail-item {
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+}
+
+.agents-detail-label {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.3px;
+}
+
+.agents-detail-value {
+  font-family: var(--font-mono);
+  color: var(--color-text);
+}
+
+.agents-list {
+  margin-left: var(--space-4);
+  color: var(--color-text-secondary);
+}
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
new file mode 100644
index 0000000..70a0f4b
--- /dev/null
+++ b/scripts/anomaly_monitor.py
@@ -0,0 +1,912 @@
+#!/usr/bin/env python3
+"""
+Long-running anomaly monitor worker.
+
+Modes:
+  - run: start continuous monitoring loop
+  - status: get run status
+  - list-events: list persisted anomaly events
+  - get-event: fetch one anomaly event
+  - ack-event: mark event as acknowledged
+  - cleanup: delete old events by retention policy
+  - replay-fixtures: run deterministic fixture validation
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import signal
+import sys
+import time
+import uuid
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+try:
+    from dotenv import load_dotenv
+except ImportError:  # pragma: no cover - optional fallback for minimal environments
+    def load_dotenv(*_args, **_kwargs):
+        return False
+
+from anomaly_rules import (
+    compute_deviation_scores,
+    dedup_key,
+    is_quality_good,
+    is_stale,
+    parse_timestamp,
+    safe_float,
+)
+
+
+load_dotenv()
+
+
+def utc_now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def emit(prefix: str, payload: Dict[str, Any]) -> None:
+    """Emit machine-parseable messages for Electron main process."""
+    print(f"[{prefix}] {json.dumps(payload, default=str)}", flush=True)
+
+
+def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+    raw = dict(config or {})
+    thresholds = raw.get("thresholds", {}) if isinstance(raw.get("thresholds"), dict) else {}
+    defaults = {
+        "pollIntervalMs": 15000,
+        "historyWindowMinutes": 360,
+        "minHistoryPoints": 30,
+        "maxMonitoredTags": 200,
+        "maxCandidatesPerCycle": 25,
+        "maxLlmTriagesPerCycle": 5,
+        "dedupCooldownMinutes": 10,
+        "retentionDays": 14,
+        "cleanupEveryCycles": 40,
+        "runMode": "live",
+        "scope": {
+            "project": None,
+            "equipmentTags": [],
+            "tagRegex": None,
+        },
+        "thresholds": {
+            "z": 3.0,
+            "mad": 3.5,
+            "rate": 0.0,
+            "stalenessSec": 120,
+            "flatline_std_epsilon": 1e-6,
+            "stuck_window_size": 20,
+        },
+    }
+    cfg = defaults
+    cfg.update({k: v for k, v in raw.items() if k in defaults and k != "thresholds"})
+    cfg["thresholds"].update({k: v for k, v in thresholds.items() if v is not None})
+    if isinstance(raw.get("scope"), dict):
+        cfg["scope"].update(raw["scope"])
+    return cfg
+
+
+class AnomalyMonitor:
+    def __init__(self, config: Dict[str, Any], run_id: Optional[str] = None):
+        self.config = merge_defaults(config)
+        self.run_id = run_id or f"agent-run-{uuid.uuid4()}"
+        from ignition_api_client import IgnitionApiClient
+        from neo4j_ontology import get_ontology_graph
+
+        self.graph = get_ontology_graph()
+
+        self.api = IgnitionApiClient(
+            base_url=self.config.get("ignitionApiUrl") or os.getenv("IGNITION_API_URL"),
+            api_token=self.config.get("ignitionApiToken") or os.getenv("IGNITION_API_TOKEN"),
+            timeout=15.0,
+        )
+
+        self.llm = None
+        self._llm_enabled = bool(os.getenv("ANTHROPIC_API_KEY"))
+        if self._llm_enabled:
+            try:
+                from claude_client import ClaudeClient
+
+                self.llm = ClaudeClient(
+                    enable_tools=False,
+                    ignition_api_url=self.config.get("ignitionApiUrl"),
+                    ignition_api_token=self.config.get("ignitionApiToken"),
+                )
+            except Exception as exc:
+                self._llm_enabled = False
+                emit("AGENT_ERROR", {
+                    "runId": self.run_id,
+                    "code": "llm_init_failed",
+                    "message": str(exc),
+                    "recoverable": True,
+                    "timestamp": utc_now_iso(),
+                })
+
+        self._running = True
+        self._cycle_count = 0
+        self._prev_values: Dict[str, float] = {}
+
+    # -----------------------------
+    # Schema / run lifecycle
+    # -----------------------------
+    def init_schema(self) -> None:
+        self.graph.init_agent_monitoring_schema()
+
+    def upsert_run(self, status: str, reason: Optional[str] = None) -> None:
+        with self.graph.session() as session:
+            session.run(
+                """
+                MERGE (r:AgentRun {run_id: $run_id})
+                SET r.status = $status,
+                    r.updated_at = datetime(),
+                    r.last_heartbeat_at = datetime(),
+                    r.config_json = $config_json,
+                    r.cycle_count = $cycle_count,
+                    r.started_at = coalesce(r.started_at, datetime()),
+                    r.stopped_at = CASE WHEN $status IN ['stopped', 'failed'] THEN datetime() ELSE r.stopped_at END,
+                    r.stop_reason = CASE WHEN $reason IS NULL THEN r.stop_reason ELSE $reason END
+                """,
+                run_id=self.run_id,
+                status=status,
+                config_json=json.dumps(self.config, default=str),
+                cycle_count=self._cycle_count,
+                reason=reason,
+            )
+
+    def heartbeat(self, metrics: Dict[str, Any]) -> None:
+        with self.graph.session() as session:
+            session.run(
+                """
+                MATCH (r:AgentRun {run_id: $run_id})
+                SET r.last_heartbeat_at = datetime(),
+                    r.cycle_count = $cycle_count,
+                    r.last_cycle_ms = $cycle_ms,
+                    r.last_candidates = $candidates,
+                    r.last_triaged = $triaged,
+                    r.last_emitted = $emitted
+                """,
+                run_id=self.run_id,
+                cycle_count=self._cycle_count,
+                cycle_ms=metrics.get("cycleMs", 0),
+                candidates=metrics.get("candidates", 0),
+                triaged=metrics.get("triaged", 0),
+                emitted=metrics.get("emitted", 0),
+            )
+
+    # -----------------------------
+    # Tag and context collection
+    # -----------------------------
+    def get_monitored_tags(self) -> List[Dict[str, str]]:
+        max_tags = int(self.config.get("maxMonitoredTags", 200))
+        scope = self.config.get("scope", {})
+        tag_regex = scope.get("tagRegex")
+        equipment_tags = set(scope.get("equipmentTags") or [])
+
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (t:ScadaTag)
+                WHERE coalesce(t.opc_item_path, t.name) IS NOT NULL
+                  AND coalesce(t.opc_item_path, t.name) <> ''
+                RETURN DISTINCT coalesce(t.opc_item_path, t.name) AS tag_path,
+                                coalesce(t.name, t.opc_item_path) AS tag_name
+                LIMIT $limit
+                """,
+                limit=max_tags * 3,
+            )
+            tags = [{"path": r["tag_path"], "name": r["tag_name"]} for r in result if r["tag_path"]]
+
+        if tag_regex:
+            import re
+            try:
+                pattern = re.compile(tag_regex, re.IGNORECASE)
+                tags = [t for t in tags if pattern.search(t["path"]) or pattern.search(t["name"])]
+            except re.error:
+                emit("AGENT_ERROR", {
+                    "runId": self.run_id,
+                    "code": "invalid_tag_regex",
+                    "message": f"Invalid regex: {tag_regex}",
+                    "recoverable": True,
+                    "timestamp": utc_now_iso(),
+                })
+
+        if equipment_tags:
+            tags = [t for t in tags if t["name"] in equipment_tags or t["path"] in equipment_tags]
+
+        return tags[:max_tags]
+
+    def _extract_history_values(self, history_data: Any, tag_path: str) -> List[float]:
+        """Normalize multiple gateway response shapes to numeric values list."""
+        values: List[float] = []
+        if history_data is None:
+            return values
+        if isinstance(history_data, dict) and history_data.get("error"):
+            return values
+
+        rows: List[Any] = []
+        if isinstance(history_data, list):
+            rows = history_data
+        elif isinstance(history_data, dict):
+            for key in ("rows", "data", "results", "values", "history"):
+                chunk = history_data.get(key)
+                if isinstance(chunk, list):
+                    rows = chunk
+                    break
+            if not rows and "tagHistory" in history_data and isinstance(history_data["tagHistory"], list):
+                rows = history_data["tagHistory"]
+
+        for row in rows:
+            if isinstance(row, (int, float, str)):
+                val = safe_float(row)
+                if val is not None:
+                    values.append(val)
+                continue
+            if not isinstance(row, dict):
+                continue
+            candidate = None
+            if "value" in row:
+                candidate = row.get("value")
+            elif tag_path in row:
+                candidate = row.get(tag_path)
+            else:
+                # Wide format often has timestamp + one tag column.
+                for k, v in row.items():
+                    if k.lower() in {"timestamp", "ts", "t", "time"}:
+                        continue
+                    candidate = v
+                    break
+            val = safe_float(candidate)
+            if val is not None:
+                values.append(val)
+        return values
+
+    def fetch_history_values(self, tag_path: str) -> List[float]:
+        minutes = int(self.config.get("historyWindowMinutes", 360))
+        end_dt = datetime.now(timezone.utc)
+        start_dt = end_dt - timedelta(minutes=minutes)
+        data = self.api.query_tag_history(
+            [tag_path],
+            start_dt.isoformat(),
+            end_dt.isoformat(),
+            return_size=max(100, int(self.config.get("minHistoryPoints", 30)) * 4),
+            aggregation_mode="Average",
+            return_format="Wide",
+        )
+        return self._extract_history_values(data, tag_path)
+
+    def get_context(self, tag_path: str) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (t:ScadaTag)
+                WHERE t.name = $tag OR t.opc_item_path = $tag
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
+                OPTIONAL MATCH (eq)-[:HAS_SYMPTOM]->(s:FaultSymptom)
+                OPTIONAL MATCH (s)-[:CAUSED_BY]->(c:FaultCause)
+                OPTIONAL MATCH (eq)-[:HAS_PATTERN]->(p:ControlPattern)
+                OPTIONAL MATCH (eq)-[:SAFETY_CRITICAL]->(se:SafetyElement)
+                RETURN t,
+                       collect(DISTINCT eq.name) AS equipment,
+                       collect(DISTINCT s.symptom) AS symptoms,
+                       collect(DISTINCT c.cause) AS causes,
+                       collect(DISTINCT p.pattern_name) AS patterns,
+                       collect(DISTINCT se.name) AS safety
+                LIMIT 1
+                """,
+                tag=tag_path,
+            )
+            record = result.single()
+            if not record:
+                return {
+                    "tag_path": tag_path,
+                    "equipment": [],
+                    "symptoms": [],
+                    "causes": [],
+                    "patterns": [],
+                    "safety": [],
+                }
+            node = record["t"]
+            return {
+                "tag_path": tag_path,
+                "tag_name": node.get("name") if node else tag_path,
+                "equipment": [x for x in record["equipment"] if x],
+                "symptoms": [x for x in record["symptoms"] if x],
+                "causes": [x for x in record["causes"] if x],
+                "patterns": [x for x in record["patterns"] if x],
+                "safety": [x for x in record["safety"] if x],
+            }
+
+    # -----------------------------
+    # Triage and persistence
+    # -----------------------------
+    def run_llm_triage(
+        self,
+        context: Dict[str, Any],
+        deterministic: Dict[str, Any],
+        live_sample: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        fallback = {
+            "summary": f"Deterministic anomaly on {context.get('tag_name', context['tag_path'])}",
+            "category": deterministic.get("category", "deviation"),
+            "severity": "medium",
+            "confidence": 0.55,
+            "probable_causes": ["Signal deviates from historical baseline."],
+            "verification_checks": [
+                f"Check live quality/timestamp for {context.get('tag_path')}",
+                "Inspect upstream interlocks and communication health.",
+            ],
+            "safety_notes": context.get("safety", []),
+            "rationale": "LLM triage unavailable; using deterministic fallback.",
+            "related_entities": [
+                {"label": "Equipment", "name": e} for e in context.get("equipment", [])[:3]
+            ],
+        }
+        if not self.llm:
+            return fallback
+
+        system_prompt = (
+            "You are an industrial anomaly triage assistant. "
+            "Return ONLY valid JSON with keys: summary, category, severity, confidence, "
+            "probable_causes, verification_checks, safety_notes, rationale, related_entities. "
+            "Severity must be one of critical/high/medium/low. "
+            "Category must be one of spike/drift/stuck/state-conflict/quality-issue/deviation. "
+            "related_entities is a list of objects: {label,name}."
+        )
+        user_prompt = json.dumps(
+            {
+                "context": context,
+                "deterministic": deterministic,
+                "live_sample": live_sample,
+            },
+            default=str,
+        )
+        try:
+            result = self.llm.query_json(
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                max_tokens=900,
+                use_tools=False,
+            )
+            data = result.get("data")
+            if not isinstance(data, dict):
+                return fallback
+            merged = dict(fallback)
+            merged.update({k: v for k, v in data.items() if v is not None})
+            return merged
+        except Exception as exc:
+            emit("AGENT_ERROR", {
+                "runId": self.run_id,
+                "code": "llm_triage_failed",
+                "message": str(exc),
+                "recoverable": True,
+                "timestamp": utc_now_iso(),
+            })
+            return fallback
+
+    def _severity_from_scores(self, deterministic: Dict[str, Any], llm_out: Dict[str, Any]) -> str:
+        sev = str(llm_out.get("severity", "")).lower()
+        if sev in {"critical", "high", "medium", "low"}:
+            return sev
+        z = abs(float(deterministic.get("z_score", 0.0)))
+        if z >= 8:
+            return "critical"
+        if z >= 5:
+            return "high"
+        if z >= 3:
+            return "medium"
+        return "low"
+
+    def is_duplicate_recent(self, dedup_sig: str) -> bool:
+        cooldown = max(1, int(self.config.get("dedupCooldownMinutes", 10)))
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {dedup_key: $dedup_key})
+                WHERE e.created_at IS NOT NULL
+                  AND datetime(e.created_at) > datetime() - duration({minutes: $minutes})
+                RETURN count(e) AS cnt
+                """,
+                dedup_key=dedup_sig,
+                minutes=cooldown,
+            )
+            row = result.single()
+            return bool(row and row["cnt"] > 0)
+
+    def persist_event(
+        self,
+        context: Dict[str, Any],
+        deterministic: Dict[str, Any],
+        live_sample: Dict[str, Any],
+        triage: Dict[str, Any],
+    ) -> Optional[Dict[str, Any]]:
+        category = triage.get("category") or deterministic.get("category", "deviation")
+        dedup_sig = dedup_key(context["tag_path"], category, int(self.config.get("dedupCooldownMinutes", 10)))
+        if self.is_duplicate_recent(dedup_sig):
+            return None
+
+        event_id = f"ae-{uuid.uuid4()}"
+        severity = self._severity_from_scores(deterministic, triage)
+        confidence = float(max(0.0, min(1.0, triage.get("confidence", 0.5))))
+        event_data = {
+            "event_id": event_id,
+            "run_id": self.run_id,
+            "event_schema_version": 1,
+            "state": "open",
+            "severity": severity,
+            "confidence": confidence,
+            "category": category,
+            "summary": triage.get("summary", f"Anomaly on {context['tag_path']}"),
+            "explanation": triage.get("rationale", ""),
+            "recommended_checks_json": json.dumps(triage.get("verification_checks", []), default=str),
+            "probable_causes_json": json.dumps(triage.get("probable_causes", []), default=str),
+            "safety_notes_json": json.dumps(triage.get("safety_notes", []), default=str),
+            "deterministic_reasons_json": json.dumps(deterministic.get("reasons", []), default=str),
+            "z_score": float(deterministic.get("z_score", 0.0)),
+            "mad_score": float(deterministic.get("mad_score", 0.0)),
+            "delta_rate": float(deterministic.get("delta_rate", 0.0)),
+            "window_volatility": float(deterministic.get("window_volatility", 0.0)),
+            "source_tag": context["tag_path"],
+            "tag_name": context.get("tag_name") or context["tag_path"],
+            "live_quality": live_sample.get("quality"),
+            "live_timestamp": live_sample.get("timestamp"),
+            "live_value": str(live_sample.get("value")),
+            "dedup_key": dedup_sig,
+            "created_at": utc_now_iso(),
+            "updated_at": utc_now_iso(),
+        }
+
+        with self.graph.session() as session:
+            session.run(
+                """
+                MATCH (r:AgentRun {run_id: $run_id})
+                CREATE (e:AnomalyEvent $props)
+                MERGE (r)-[:EMITTED]->(e)
+                """,
+                run_id=self.run_id,
+                props=event_data,
+            )
+
+            session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                MATCH (t:ScadaTag)
+                WHERE t.name = $tag OR t.opc_item_path = $tag
+                MERGE (e)-[:OBSERVED_ON]->(t)
+                """,
+                event_id=event_id,
+                tag=context["tag_path"],
+            )
+
+            for equipment_name in context.get("equipment", [])[:5]:
+                session.run(
+                    """
+                    MATCH (e:AnomalyEvent {event_id: $event_id})
+                    MATCH (eq:Equipment {name: $name})
+                    MERGE (e)-[:AFFECTS]->(eq)
+                    """,
+                    event_id=event_id,
+                    name=equipment_name,
+                )
+
+            related_inputs: List[Dict[str, str]] = []
+            for item in triage.get("related_entities", []) or []:
+                if isinstance(item, dict) and item.get("label") and item.get("name"):
+                    related_inputs.append({"label": str(item["label"]), "name": str(item["name"])})
+            for name in context.get("symptoms", [])[:3]:
+                related_inputs.append({"label": "FaultSymptom", "name": name})
+            for name in context.get("causes", [])[:3]:
+                related_inputs.append({"label": "FaultCause", "name": name})
+
+            for rel in related_inputs[:8]:
+                label = rel["label"]
+                if label not in {"FaultSymptom", "FaultCause", "ControlPattern", "SafetyElement", "Equipment", "ScadaTag"}:
+                    continue
+                session.run(
+                    f"""
+                    MATCH (e:AnomalyEvent {{event_id: $event_id}})
+                    MATCH (n:{label})
+                    WHERE n.name = $name OR n.symptom = $name OR n.cause = $name
+                    MERGE (e)-[:RELATED_TO]->(n)
+                    """,
+                    event_id=event_id,
+                    name=rel["name"],
+                )
+
+        return event_data
+
+    # -----------------------------
+    # Monitoring loop
+    # -----------------------------
+    def run_cycle(self) -> Dict[str, Any]:
+        cycle_start = time.time()
+        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0}
+        thresholds = self.config.get("thresholds", {})
+        min_history = int(self.config.get("minHistoryPoints", 30))
+
+        if not self.api.is_configured:
+            emit("AGENT_ERROR", {
+                "runId": self.run_id,
+                "code": "ignition_not_configured",
+                "message": "Ignition API URL/token not configured.",
+                "recoverable": True,
+                "timestamp": utc_now_iso(),
+            })
+            metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
+            return metrics
+
+        tags = self.get_monitored_tags()
+        if not tags:
+            emit("AGENT_ERROR", {
+                "runId": self.run_id,
+                "code": "no_tags_found",
+                "message": "No ScadaTag nodes with readable tag paths found.",
+                "recoverable": True,
+                "timestamp": utc_now_iso(),
+            })
+            metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
+            return metrics
+
+        tag_paths = [t["path"] for t in tags]
+        live_values = self.api.read_tags(tag_paths)
+        candidates: List[Dict[str, Any]] = []
+        now = datetime.now(timezone.utc)
+
+        for tv in live_values:
+            if tv.error:
+                continue
+            if not is_quality_good(tv.quality):
+                # quality gate: only emit quality anomalies if this persists via triage.
+                continue
+            if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
+                continue
+
+            history = self.fetch_history_values(tv.path)
+            if len(history) < min_history:
+                continue
+
+            prev_val = self._prev_values.get(tv.path)
+            deterministic = compute_deviation_scores(
+                current_value=tv.value,
+                history_values=history,
+                prev_value=prev_val,
+                thresholds=thresholds,
+            )
+            curr_num = safe_float(tv.value)
+            if curr_num is not None:
+                self._prev_values[tv.path] = curr_num
+
+            if deterministic.get("candidate"):
+                context = self.get_context(tv.path)
+                candidates.append(
+                    {
+                        "context": context,
+                        "deterministic": deterministic,
+                        "live_sample": {
+                            "path": tv.path,
+                            "value": tv.value,
+                            "quality": tv.quality,
+                            "timestamp": tv.timestamp,
+                            "data_type": tv.data_type,
+                        },
+                    }
+                )
+
+        metrics["candidates"] = len(candidates)
+        max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
+        max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
+        shortlisted = candidates[:max_candidates]
+
+        for idx, candidate in enumerate(shortlisted):
+            use_llm = idx < max_triage
+            triage = (
+                self.run_llm_triage(
+                    candidate["context"],
+                    candidate["deterministic"],
+                    candidate["live_sample"],
+                )
+                if use_llm
+                else {
+                    "summary": f"Deviation on {candidate['context'].get('tag_name', candidate['context']['tag_path'])}",
+                    "category": candidate["deterministic"].get("category", "deviation"),
+                    "severity": "medium",
+                    "confidence": 0.5,
+                    "verification_checks": [],
+                    "probable_causes": [],
+                    "safety_notes": [],
+                    "rationale": "Triaged in deterministic-only mode due per-cycle LLM cap.",
+                    "related_entities": [],
+                }
+            )
+            metrics["triaged"] += 1
+            persisted = self.persist_event(
+                candidate["context"],
+                candidate["deterministic"],
+                candidate["live_sample"],
+                triage,
+            )
+            if persisted:
+                metrics["emitted"] += 1
+                emit("AGENT_EVENT", {
+                    "runId": self.run_id,
+                    "eventId": persisted["event_id"],
+                    "severity": persisted["severity"],
+                    "summary": persisted["summary"],
+                    "category": persisted.get("category"),
+                    "entityRefs": {
+                        "tag": persisted.get("tag_name") or persisted.get("source_tag"),
+                        "sourceTag": persisted.get("source_tag"),
+                    },
+                    "createdAt": persisted.get("created_at"),
+                })
+
+        metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
+        return metrics
+
+    def cleanup_retention(self) -> int:
+        retention_days = int(self.config.get("retentionDays", 14))
+        return self.graph.cleanup_anomaly_events(retention_days=retention_days)
+
+    def run_forever(self) -> int:
+        self.init_schema()
+        self.upsert_run("running")
+        emit("AGENT_STATUS", {
+            "runId": self.run_id,
+            "state": "running",
+            "cycleMs": 0,
+            "candidates": 0,
+            "triaged": 0,
+            "emitted": 0,
+            "timestamp": utc_now_iso(),
+        })
+
+        poll_ms = int(self.config.get("pollIntervalMs", 15000))
+        cleanup_every = max(1, int(self.config.get("cleanupEveryCycles", 40)))
+        exit_code = 0
+        reason = "stopped"
+
+        while self._running:
+            self._cycle_count += 1
+            cycle_started = time.time()
+            try:
+                metrics = self.run_cycle()
+                self.heartbeat(metrics)
+                emit("AGENT_STATUS", {
+                    "runId": self.run_id,
+                    "state": "running",
+                    "cycleMs": metrics["cycleMs"],
+                    "candidates": metrics["candidates"],
+                    "triaged": metrics["triaged"],
+                    "emitted": metrics["emitted"],
+                    "timestamp": utc_now_iso(),
+                })
+                if self._cycle_count % cleanup_every == 0:
+                    deleted = self.cleanup_retention()
+                    if deleted > 0:
+                        emit("AGENT_STATUS", {
+                            "runId": self.run_id,
+                            "state": "retention_cleanup",
+                            "cycleMs": 0,
+                            "candidates": 0,
+                            "triaged": 0,
+                            "emitted": deleted,
+                            "timestamp": utc_now_iso(),
+                        })
+            except Exception as exc:
+                reason = "failed"
+                exit_code = 1
+                emit("AGENT_ERROR", {
+                    "runId": self.run_id,
+                    "code": "cycle_error",
+                    "message": str(exc),
+                    "recoverable": True,
+                    "timestamp": utc_now_iso(),
+                })
+
+            elapsed_ms = int((time.time() - cycle_started) * 1000)
+            remaining = max(0, poll_ms - elapsed_ms) / 1000.0
+            if remaining > 0:
+                time.sleep(remaining)
+
+        self.upsert_run("stopped" if reason != "failed" else "failed", reason=reason)
+        emit("AGENT_COMPLETE", {
+            "runId": self.run_id,
+            "success": exit_code == 0,
+            "reason": reason,
+            "stoppedAt": utc_now_iso(),
+        })
+        return exit_code
+
+    # -----------------------------
+    # Single-operation helpers
+    # -----------------------------
+    def list_events(self, limit: int, state: Optional[str], severity: Optional[str], run_id: Optional[str]) -> Dict[str, Any]:
+        events = self.graph.list_anomaly_events(limit=limit, state=state, severity=severity, run_id=run_id)
+        return {"success": True, "events": events}
+
+    def get_event(self, event_id: str) -> Dict[str, Any]:
+        event = self.graph.get_anomaly_event(event_id)
+        if not event:
+            return {"success": False, "error": f"Event not found: {event_id}"}
+        return {"success": True, "event": event}
+
+    def ack_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                SET e.state = 'acknowledged',
+                    e.acknowledged_at = datetime(),
+                    e.ack_note = $note,
+                    e.updated_at = datetime()
+                RETURN count(e) AS cnt
+                """,
+                event_id=event_id,
+                note=note or "",
+            )
+            record = result.single()
+            if not record or record["cnt"] == 0:
+                return {"success": False, "error": f"Event not found: {event_id}"}
+        return {"success": True, "eventId": event_id}
+
+    def get_status(self, run_id: str) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (r:AgentRun {run_id: $run_id})
+                RETURN r
+                LIMIT 1
+                """,
+                run_id=run_id,
+            )
+            row = result.single()
+            if not row:
+                return {"success": False, "error": f"Run not found: {run_id}"}
+            props = dict(row["r"])
+            return {
+                "success": True,
+                "status": props.get("status"),
+                "metrics": {
+                    "cycleCount": props.get("cycle_count", 0),
+                    "lastCycleMs": props.get("last_cycle_ms", 0),
+                    "lastCandidates": props.get("last_candidates", 0),
+                    "lastTriaged": props.get("last_triaged", 0),
+                    "lastEmitted": props.get("last_emitted", 0),
+                },
+                "lastHeartbeatAt": props.get("last_heartbeat_at"),
+                "run": props,
+            }
+
+
+def _load_fixture_cases(path: Path) -> List[Dict[str, Any]]:
+    data = json.loads(path.read_text(encoding="utf-8"))
+    if isinstance(data, dict):
+        return data.get("cases", [])
+    if isinstance(data, list):
+        return data
+    return []
+
+
+def replay_fixtures(config_json: Optional[str], fixture_path: str) -> Dict[str, Any]:
+    config = merge_defaults(json.loads(config_json) if config_json else {})
+    path = Path(fixture_path)
+    cases = _load_fixture_cases(path)
+    thresholds = config.get("thresholds", {})
+    passed = 0
+    failures: List[Dict[str, Any]] = []
+
+    for case in cases:
+        result = compute_deviation_scores(
+            current_value=case.get("current_value"),
+            history_values=case.get("history_values", []),
+            prev_value=case.get("prev_value"),
+            thresholds=thresholds,
+        )
+        expected = bool(case.get("expected_candidate", False))
+        if result.get("candidate") == expected:
+            passed += 1
+        else:
+            failures.append(
+                {
+                    "id": case.get("id"),
+                    "expected_candidate": expected,
+                    "actual_candidate": result.get("candidate"),
+                    "category": result.get("category"),
+                    "reasons": result.get("reasons", []),
+                }
+            )
+
+    return {
+        "success": len(failures) == 0,
+        "total": len(cases),
+        "passed": passed,
+        "failed": len(failures),
+        "failures": failures,
+    }
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Anomaly monitor worker")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p_run = sub.add_parser("run", help="Run continuous anomaly monitoring")
+    p_run.add_argument("--run-id", help="Optional run id")
+    p_run.add_argument("--config-json", default="{}", help="JSON config string")
+
+    p_status = sub.add_parser("status", help="Get status for one run")
+    p_status.add_argument("--run-id", required=True)
+
+    p_list = sub.add_parser("list-events", help="List anomaly events")
+    p_list.add_argument("--limit", type=int, default=100)
+    p_list.add_argument("--state")
+    p_list.add_argument("--severity")
+    p_list.add_argument("--run-id")
+
+    p_get = sub.add_parser("get-event", help="Get one anomaly event")
+    p_get.add_argument("--event-id", required=True)
+
+    p_ack = sub.add_parser("ack-event", help="Acknowledge one anomaly event")
+    p_ack.add_argument("--event-id", required=True)
+    p_ack.add_argument("--note")
+
+    p_cleanup = sub.add_parser("cleanup", help="Delete old anomaly events")
+    p_cleanup.add_argument("--retention-days", type=int, default=14)
+
+    p_replay = sub.add_parser("replay-fixtures", help="Validate deterministic scoring against fixtures")
+    p_replay.add_argument("--fixture-file", required=True)
+    p_replay.add_argument("--config-json", default="{}")
+
+    args = parser.parse_args()
+
+    if args.command == "replay-fixtures":
+        result = replay_fixtures(args.config_json, args.fixture_file)
+        print(json.dumps(result))
+        return 0 if result["success"] else 1
+
+    try:
+        monitor = AnomalyMonitor(
+            config=json.loads(getattr(args, "config_json", "{}") or "{}"),
+            run_id=getattr(args, "run_id", None),
+        )
+    except Exception as exc:
+        print(json.dumps({"success": False, "error": str(exc)}))
+        return 1
+
+    if args.command == "run":
+        def _signal_handler(_signum, _frame):
+            monitor._running = False
+
+        signal.signal(signal.SIGTERM, _signal_handler)
+        if hasattr(signal, "SIGINT"):
+            signal.signal(signal.SIGINT, _signal_handler)
+        return monitor.run_forever()
+
+    if args.command == "status":
+        print(json.dumps(monitor.get_status(args.run_id), default=str))
+        return 0
+
+    if args.command == "list-events":
+        print(json.dumps(monitor.list_events(args.limit, args.state, args.severity, args.run_id), default=str))
+        return 0
+
+    if args.command == "get-event":
+        print(json.dumps(monitor.get_event(args.event_id), default=str))
+        return 0
+
+    if args.command == "ack-event":
+        print(json.dumps(monitor.ack_event(args.event_id, args.note), default=str))
+        return 0
+
+    if args.command == "cleanup":
+        deleted = monitor.graph.cleanup_anomaly_events(args.retention_days)
+        print(json.dumps({"success": True, "deleted": deleted}))
+        return 0
+
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+
diff --git a/scripts/anomaly_rules.py b/scripts/anomaly_rules.py
new file mode 100644
index 0000000..2aa274d
--- /dev/null
+++ b/scripts/anomaly_rules.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""
+Deterministic anomaly scoring primitives for monitoring agents.
+
+This module intentionally avoids external dependencies so it can run in
+packaged/offline environments.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import math
+from datetime import datetime, timezone
+from statistics import mean, median, pstdev
+from typing import Any, Dict, List, Optional
+
+
+def safe_float(value: Any) -> Optional[float]:
+    """Best-effort conversion to float."""
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return float(value)
+    if isinstance(value, (int, float)):
+        if math.isnan(value) or math.isinf(value):
+            return None
+        return float(value)
+    text = str(value).strip()
+    if not text:
+        return None
+    try:
+        result = float(text)
+    except ValueError:
+        return None
+    if math.isnan(result) or math.isinf(result):
+        return None
+    return result
+
+
+def parse_timestamp(ts: Optional[str]) -> Optional[datetime]:
+    """Parse an ISO-like timestamp to UTC-aware datetime."""
+    if not ts:
+        return None
+    text = str(ts).strip()
+    if not text:
+        return None
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+    except ValueError:
+        return None
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt.astimezone(timezone.utc)
+
+
+def is_quality_good(quality: Optional[str]) -> bool:
+    """Conservative quality gate."""
+    if quality is None:
+        return False
+    q = str(quality).strip().lower()
+    if not q:
+        return False
+    if "good" in q or "ok" in q or q in {"192"}:
+        return True
+    return False
+
+
+def is_stale(timestamp: Optional[str], staleness_sec: int, now: Optional[datetime] = None) -> bool:
+    """Return True if sample timestamp is stale or invalid."""
+    if staleness_sec <= 0:
+        return False
+    parsed = parse_timestamp(timestamp)
+    if parsed is None:
+        return True
+    baseline = now or datetime.now(timezone.utc)
+    age = (baseline - parsed).total_seconds()
+    return age > staleness_sec
+
+
+def _mad(values: List[float]) -> float:
+    """Median absolute deviation."""
+    if not values:
+        return 0.0
+    med = median(values)
+    abs_dev = [abs(v - med) for v in values]
+    return median(abs_dev) if abs_dev else 0.0
+
+
+def _percentile_rank(values: List[float], current: float) -> float:
+    """Approximate percentile rank of current within values."""
+    if not values:
+        return 0.0
+    less_or_equal = sum(1 for v in values if v <= current)
+    return less_or_equal / len(values)
+
+
+def compute_deviation_scores(
+    current_value: Any,
+    history_values: List[Any],
+    prev_value: Any = None,
+    thresholds: Optional[Dict[str, float]] = None,
+) -> Dict[str, Any]:
+    """
+    Compute deterministic anomaly scores and candidate flags.
+
+    Threshold defaults are intentionally conservative and should be configured
+    per process during rollout.
+    """
+    cfg = {
+        "z": 3.0,
+        "mad": 3.5,
+        "rate": 0.0,
+        "flatline_std_epsilon": 1e-6,
+        "stuck_window_size": 20,
+    }
+    if thresholds:
+        cfg.update({k: v for k, v in thresholds.items() if v is not None})
+
+    current = safe_float(current_value)
+    hist = [v for v in (safe_float(x) for x in history_values) if v is not None]
+    previous = safe_float(prev_value)
+
+    result: Dict[str, Any] = {
+        "candidate": False,
+        "reasons": [],
+        "category": "normal",
+        "z_score": 0.0,
+        "mad_score": 0.0,
+        "delta_rate": 0.0,
+        "window_volatility": 0.0,
+        "percentile_rank": 0.0,
+        "drift_score": 0.0,
+        "history_points": len(hist),
+    }
+
+    if current is None:
+        result["category"] = "invalid_value"
+        result["reasons"].append("current_value_not_numeric")
+        return result
+    if not hist:
+        result["category"] = "insufficient_history"
+        result["reasons"].append("history_empty")
+        return result
+
+    mu = mean(hist)
+    sigma = pstdev(hist) if len(hist) > 1 else 0.0
+    sigma = max(sigma, 1e-9)
+    z_score = (current - mu) / sigma
+    result["z_score"] = z_score
+    result["window_volatility"] = sigma
+    result["percentile_rank"] = _percentile_rank(hist, current)
+
+    mad = _mad(hist)
+    mad_denom = max(mad * 1.4826, 1e-9)
+    mad_score = abs(current - median(hist)) / mad_denom
+    result["mad_score"] = mad_score
+
+    if previous is not None:
+        result["delta_rate"] = abs(current - previous)
+
+    if abs(z_score) >= float(cfg["z"]):
+        result["candidate"] = True
+        result["reasons"].append("z_score_threshold")
+    if mad_score >= float(cfg["mad"]):
+        result["candidate"] = True
+        result["reasons"].append("mad_score_threshold")
+    if float(cfg["rate"]) > 0 and result["delta_rate"] >= float(cfg["rate"]):
+        result["candidate"] = True
+        result["reasons"].append("delta_rate_threshold")
+
+    if len(hist) >= 20:
+        midpoint = len(hist) // 2
+        first_half = hist[:midpoint]
+        second_half = hist[midpoint:]
+        trend_delta = abs(mean(second_half) - mean(first_half))
+        trend_score = trend_delta / sigma
+        result["drift_score"] = trend_score
+        if trend_score >= 1.25 and (result["percentile_rank"] >= 0.85 or result["percentile_rank"] <= 0.15):
+            result["candidate"] = True
+            result["reasons"].append("drift_trend")
+
+    recent = hist[-int(max(3, cfg["stuck_window_size"])) :]
+    recent_std = pstdev(recent) if len(recent) > 1 else 0.0
+    if recent_std <= float(cfg["flatline_std_epsilon"]):
+        if previous is not None and abs(current - previous) <= float(cfg["flatline_std_epsilon"]):
+            result["candidate"] = True
+            result["reasons"].append("flatline_detected")
+            result["category"] = "stuck"
+
+    if result["category"] == "normal" and result["candidate"]:
+        if "flatline_detected" in result["reasons"]:
+            result["category"] = "stuck"
+        elif result["delta_rate"] > 0 and "delta_rate_threshold" in result["reasons"]:
+            result["category"] = "spike"
+        elif "drift_trend" in result["reasons"]:
+            result["category"] = "drift"
+        elif abs(z_score) > 0 and len(hist) >= 20:
+            # Drift-like heuristic for sustained tail position with moderate rate
+            if result["percentile_rank"] >= 0.95 or result["percentile_rank"] <= 0.05:
+                result["category"] = "drift"
+            else:
+                result["category"] = "spike"
+        else:
+            result["category"] = "deviation"
+
+    return result
+
+
+def dedup_key(tag_path: str, category: str, bucket_minutes: int = 10) -> str:
+    """Create a deterministic dedup signature for event cooldown windows."""
+    now = datetime.now(timezone.utc)
+    bucket = int(now.timestamp() // max(1, bucket_minutes * 60))
+    raw = f"{tag_path}|{category}|{bucket}"
+    return hashlib.sha1(raw.encode("utf-8")).hexdigest()
+
diff --git a/scripts/fixtures/anomaly_replay_cases.json b/scripts/fixtures/anomaly_replay_cases.json
new file mode 100644
index 0000000..544cd3f
--- /dev/null
+++ b/scripts/fixtures/anomaly_replay_cases.json
@@ -0,0 +1,32 @@
+{
+  "cases": [
+    {
+      "id": "normal-baseline",
+      "current_value": 50.3,
+      "prev_value": 50.1,
+      "history_values": [49.9, 50.1, 50.0, 50.2, 50.1, 49.8, 50.3, 50.0, 49.9, 50.2, 50.1, 50.0, 49.9, 50.2, 50.1, 50.0, 50.2, 49.8, 50.0, 50.1, 50.0, 49.9, 50.1, 50.2, 50.0, 50.1, 49.9, 50.0, 50.1, 50.0],
+      "expected_candidate": false
+    },
+    {
+      "id": "sudden-spike",
+      "current_value": 91.0,
+      "prev_value": 49.8,
+      "history_values": [49.9, 50.1, 50.0, 50.2, 50.1, 49.8, 50.3, 50.0, 49.9, 50.2, 50.1, 50.0, 49.9, 50.2, 50.1, 50.0, 50.2, 49.8, 50.0, 50.1, 50.0, 49.9, 50.1, 50.2, 50.0, 50.1, 49.9, 50.0, 50.1, 50.0],
+      "expected_candidate": true
+    },
+    {
+      "id": "slow-drift-tail",
+      "current_value": 61.5,
+      "prev_value": 61.0,
+      "history_values": [50.0, 50.2, 50.3, 50.5, 50.7, 50.9, 51.1, 51.4, 51.8, 52.1, 52.6, 53.0, 53.5, 54.0, 54.5, 55.1, 55.6, 56.0, 56.6, 57.0, 57.5, 58.0, 58.4, 58.9, 59.4, 59.9, 60.2, 60.6, 60.9, 61.2],
+      "expected_candidate": true
+    },
+    {
+      "id": "flatline-stuck",
+      "current_value": 72.0,
+      "prev_value": 72.0,
+      "history_values": [72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0],
+      "expected_candidate": true
+    }
+  ]
+}
diff --git a/scripts/graph_api.py b/scripts/graph_api.py
index 8e36e7c..e3bff45 100644
--- a/scripts/graph_api.py
+++ b/scripts/graph_api.py
@@ -77,6 +77,8 @@ class GraphAPI:
         "processdeviation": "mes",
         "functionallocation": "mes",
         "vendor": "mes",
+        "agentrun": "anomaly",
+        "anomalyevent": "anomaly",
     }
 
     # Color palette for node types
@@ -91,6 +93,7 @@ class GraphAPI:
         "flows": "#E91E63",
         "overview": "#607D8B",
         "mes": "#00897B",
+        "anomaly": "#F44336",
         "other": "#9E9E9E",
     }
 
@@ -252,9 +255,11 @@ def get_neighbors(
                     WHERE center.name = $node_id 
                        OR center.name ENDS WITH $node_id
                        OR center.name CONTAINS $node_id
+                       OR center.event_id = $node_id
+                       OR center.run_id = $node_id
                     RETURN elementId(center) as id,
                            labels(center)[0] as type,
-                           center.name as label,
+                           coalesce(center.name, center.event_id, center.run_id, center.symptom, center.phrase, 'unknown') as label,
                            properties(center) as props
                     LIMIT 1
                 """
@@ -264,9 +269,11 @@ def get_neighbors(
                     WHERE center.name = $node_id 
                        OR center.name ENDS WITH $node_id
                        OR center.name CONTAINS $node_id
+                       OR center.event_id = $node_id
+                       OR center.run_id = $node_id
                     RETURN elementId(center) as id,
                            labels(center)[0] as type,
-                           center.name as label,
+                           coalesce(center.name, center.event_id, center.run_id, center.symptom, center.phrase, 'unknown') as label,
                            properties(center) as props
                     LIMIT 1
                 """
diff --git a/scripts/ignition_api_client.py b/scripts/ignition_api_client.py
index d0d7e41..e8fbccf 100644
--- a/scripts/ignition_api_client.py
+++ b/scripts/ignition_api_client.py
@@ -22,7 +22,11 @@
 from urllib.parse import urljoin, quote
 
 import requests
-from dotenv import load_dotenv
+try:
+    from dotenv import load_dotenv
+except ImportError:  # pragma: no cover - optional fallback for minimal envs
+    def load_dotenv(*_args, **_kwargs):
+        return False
 
 load_dotenv()
 
@@ -243,60 +247,55 @@ def read_tags(self, paths: List[str]) -> List[TagValue]:
 
     @staticmethod
     def _local_iso_to_utc(dt_str: str) -> str:
-        """Convert a bare ISO datetime string (assumed local) to UTC.
+        """
+        Convert a bare ISO datetime string (assumed local time) to UTC.
 
-        If the string already has a timezone indicator (Z, +, -)
-        or looks like epoch milliseconds, it is returned unchanged.
+        If the input already contains timezone info or appears to be epoch
+        milliseconds, it is returned unchanged.
         """
         from datetime import datetime, timezone
 
-        s = str(dt_str).strip()
+        text = str(dt_str).strip()
+        if not text:
+            return text
 
-        # Epoch ms – pass through
-        if s.isdigit():
-            return s
+        # Epoch millis (or seconds) should pass through unchanged.
+        if text.isdigit():
+            return text
 
-        # Already has TZ info – pass through
-        if s.endswith("Z") or "+" in s[10:] or s[10:].count("-") > 0:
-            return s
+        # Already timezone-aware.
+        if text.endswith("Z") or "+" in text[10:] or text[10:].count("-") > 0:
+            return text
 
         try:
-            naive = datetime.fromisoformat(s)
-            local_dt = naive.astimezone()          # attach local TZ
+            naive = datetime.fromisoformat(text)
+            local_dt = naive.astimezone()
             utc_dt = local_dt.astimezone(timezone.utc)
             return utc_dt.strftime("%Y-%m-%dT%H:%M:%S")
         except (ValueError, TypeError):
-            return s
+            return text
 
     def query_tag_history(
         self,
         tag_paths: List[str],
         start_date: str,
         end_date: str,
-        return_size: int = 100,
+        return_size: int = 200,
         aggregation_mode: str = "Average",
         return_format: str = "Wide",
         interval_minutes: Optional[int] = None,
         include_bounding_values: bool = False,
     ) -> Optional[Any]:
-        """Query historical tag values via the WebDev queryTagHistory endpoint.
-
-        Bare ISO datetime strings (no timezone suffix) are assumed to be in
-        the server's local timezone and are converted to UTC before sending
-        to the gateway (which interprets all times as UTC).
-
-        Args:
-            tag_paths: Tag paths with provider prefix, e.g. ['[default]Folder/Tag'].
-            start_date: ISO datetime string (local) or epoch ms.
-            end_date: ISO datetime string (local) or epoch ms.
-            return_size: Max rows to return (default 100).
-            aggregation_mode: Average, MinMax, LastValue, Sum, Minimum, Maximum.
-            return_format: Wide or Tall.
-            interval_minutes: Aggregation interval in minutes.
-            include_bounding_values: Include values at boundaries.
         """
-        normalised = [self._ensure_provider_prefix(p) for p in tag_paths]
+        Query historical tag values from the WebDev queryTagHistory endpoint.
+
+        Dates may be passed as local ISO strings; they are converted to UTC
+        to match Ignition endpoint expectations.
+        """
+        if not tag_paths:
+            return {"error": "No tag paths provided", "tagPaths": []}
 
+        normalised = [self._ensure_provider_prefix(p) for p in tag_paths]
         utc_start = self._local_iso_to_utc(start_date)
         utc_end = self._local_iso_to_utc(end_date)
 
@@ -304,19 +303,17 @@ def query_tag_history(
             "tagPaths": ",".join(normalised),
             "startDate": utc_start,
             "endDate": utc_end,
-            "returnSize": return_size,
+            "returnSize": int(return_size),
             "aggregationMode": aggregation_mode,
             "returnFormat": return_format,
-            "includeBoundingValues": str(include_bounding_values).lower(),
+            "includeBoundingValues": str(bool(include_bounding_values)).lower(),
         }
         if interval_minutes is not None:
-            params["intervalMinutes"] = interval_minutes
+            params["intervalMinutes"] = int(interval_minutes)
 
         data = self._get("system/webdev/Axilon/queryTagHistory", params=params)
-
         if data is None:
             return {"error": "API request failed or not configured", "tagPaths": normalised}
-
         return data
 
     # --------------------------------------------------------------------- #
diff --git a/scripts/neo4j_ontology.py b/scripts/neo4j_ontology.py
index 110719f..380e3cb 100644
--- a/scripts/neo4j_ontology.py
+++ b/scripts/neo4j_ontology.py
@@ -9,7 +9,11 @@
 from typing import Dict, List, Optional, Any, Union
 from dataclasses import dataclass, field
 from contextlib import contextmanager
-from dotenv import load_dotenv
+try:
+    from dotenv import load_dotenv
+except ImportError:  # pragma: no cover - optional fallback for minimal envs
+    def load_dotenv(*_args, **_kwargs):
+        return False
 from neo4j import GraphDatabase, Driver, Session
 
 
@@ -147,6 +151,8 @@ def create_indexes(self) -> None:
                 "CREATE CONSTRAINT project_name IF NOT EXISTS FOR (p:Project) REQUIRE p.name IS UNIQUE",
                 "CREATE CONSTRAINT script_name IF NOT EXISTS FOR (s:Script) REQUIRE s.name IS UNIQUE",
                 "CREATE CONSTRAINT namedquery_name IF NOT EXISTS FOR (q:NamedQuery) REQUIRE q.name IS UNIQUE",
+                "CREATE CONSTRAINT agentrun_id IF NOT EXISTS FOR (r:AgentRun) REQUIRE r.run_id IS UNIQUE",
+                "CREATE CONSTRAINT anomalyevent_id IF NOT EXISTS FOR (e:AnomalyEvent) REQUIRE e.event_id IS UNIQUE",
             ]
 
             # Regular indexes
@@ -186,6 +192,11 @@ def create_indexes(self) -> None:
                 "CREATE INDEX hmitextlist_name IF NOT EXISTS FOR (htl:HMITextList) ON (htl.name)",
                 "CREATE INDEX plctagtable_name IF NOT EXISTS FOR (pt:PLCTagTable) ON (pt.name)",
                 "CREATE INDEX plctag_name IF NOT EXISTS FOR (ptg:PLCTag) ON (ptg.name)",
+                # Agent monitoring indexes
+                "CREATE INDEX anomalyevent_created IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.created_at)",
+                "CREATE INDEX anomalyevent_state IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.state)",
+                "CREATE INDEX anomalyevent_severity IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.severity)",
+                "CREATE INDEX anomalyevent_dedup_key IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.dedup_key)",
             ]
 
             for constraint in constraints:
@@ -202,6 +213,95 @@ def create_indexes(self) -> None:
                     if "already exists" not in str(e).lower():
                         print(f"[WARNING] Index error: {e}")
 
+    def init_agent_monitoring_schema(self) -> None:
+        """Ensure agent monitoring labels and indexes exist."""
+        self.create_indexes()
+
+    def list_anomaly_events(
+        self,
+        limit: int = 100,
+        state: Optional[str] = None,
+        severity: Optional[str] = None,
+        run_id: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """List persisted anomaly events for UI feeds."""
+        with self.session() as session:
+            clauses = []
+            params: Dict[str, Any] = {"limit": max(1, min(limit, 500))}
+            if state:
+                clauses.append("e.state = $state")
+                params["state"] = state
+            if severity:
+                clauses.append("e.severity = $severity")
+                params["severity"] = severity
+            if run_id:
+                clauses.append("e.run_id = $run_id")
+                params["run_id"] = run_id
+            where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
+            query = f"""
+                MATCH (e:AnomalyEvent)
+                {where}
+                OPTIONAL MATCH (e)-[:OBSERVED_ON]->(t:ScadaTag)
+                OPTIONAL MATCH (e)-[:AFFECTS]->(eq:Equipment)
+                RETURN e, collect(DISTINCT t.name) AS tags, collect(DISTINCT eq.name) AS equipment
+                ORDER BY e.created_at DESC
+                LIMIT $limit
+            """
+            result = session.run(query, **params)
+            events: List[Dict[str, Any]] = []
+            for record in result:
+                node = record["e"]
+                props = dict(node)
+                props["tags"] = [x for x in record["tags"] if x]
+                props["equipment"] = [x for x in record["equipment"] if x]
+                events.append(props)
+            return events
+
+    def get_anomaly_event(self, event_id: str) -> Optional[Dict[str, Any]]:
+        """Get one anomaly event with linked context labels."""
+        with self.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                OPTIONAL MATCH (e)-[:OBSERVED_ON]->(t:ScadaTag)
+                OPTIONAL MATCH (e)-[:AFFECTS]->(eq:Equipment)
+                OPTIONAL MATCH (e)-[r:RELATED_TO]->(n)
+                RETURN e,
+                       collect(DISTINCT t.name) AS tags,
+                       collect(DISTINCT eq.name) AS equipment,
+                       collect(DISTINCT {type: type(r), label: labels(n)[0], name: coalesce(n.name, n.symptom, n.phrase)}) AS related
+                LIMIT 1
+                """,
+                event_id=event_id,
+            )
+            record = result.single()
+            if not record:
+                return None
+            data = dict(record["e"])
+            data["tags"] = [x for x in record["tags"] if x]
+            data["equipment"] = [x for x in record["equipment"] if x]
+            data["related"] = [
+                x for x in record["related"] if x and x.get("name")
+            ]
+            return data
+
+    def cleanup_anomaly_events(self, retention_days: int = 14) -> int:
+        """Delete old anomaly events outside retention window."""
+        with self.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent)
+                WHERE e.created_at IS NOT NULL
+                  AND datetime(e.created_at) < datetime() - duration({days: $days})
+                WITH collect(e) AS old_events
+                FOREACH (n IN old_events | DETACH DELETE n)
+                RETURN size(old_events) AS deleted
+                """,
+                days=max(1, retention_days),
+            )
+            record = result.single()
+            return int(record["deleted"]) if record else 0
+
     def clear_all(self) -> None:
         """Clear all nodes and relationships. USE WITH CAUTION."""
         with self.session() as session:
@@ -4192,12 +4292,22 @@ def main():
             "tia-projects",
             "tia-project-resources",
             "db-connections",
+            "init-agent-schema",
+            "list-anomaly-events",
+            "get-anomaly-event",
+            "cleanup-anomaly-events",
         ],
         help="Command to execute",
     )
     parser.add_argument("--file", "-f", help="JSON file for import/export")
     parser.add_argument("--query", "-q", help="Query string for search")
     parser.add_argument("--project", "-p", help="Project name for project-resources")
+    parser.add_argument("--event-id", help="Event ID for get-anomaly-event")
+    parser.add_argument("--state", help="Filter anomaly events by state")
+    parser.add_argument("--severity", help="Filter anomaly events by severity")
+    parser.add_argument("--run-id", help="Filter anomaly events by run_id")
+    parser.add_argument("--limit", type=int, default=100, help="Limit results for list commands")
+    parser.add_argument("--retention-days", type=int, default=14, help="Retention window in days")
     parser.add_argument("--json", action="store_true", help="Output in JSON format")
     parser.add_argument(
         "--enrichment-status",
@@ -4437,7 +4547,43 @@ def main():
                         f"  {c['name']} ({c['database_type']}) "
                         f"- {c['url']} [{enabled}]"
                     )
+        elif args.command == "init-agent-schema":
+            graph.init_agent_monitoring_schema()
+            print("[OK] Initialized agent monitoring schema")
+
+        elif args.command == "list-anomaly-events":
+            events = graph.list_anomaly_events(
+                limit=args.limit,
+                state=args.state,
+                severity=args.severity,
+                run_id=args.run_id,
+            )
+            if args.json:
+                print(json_module.dumps(events))
+            else:
+                print(f"Anomaly events: {len(events)}")
+                for event in events:
+                    print(
+                        f"- {event.get('event_id')} {event.get('severity')} "
+                        f"{event.get('summary', '')[:80]}"
+                    )
+
+        elif args.command == "get-anomaly-event":
+            if not args.event_id:
+                print("[ERROR] --event-id required for get-anomaly-event")
+                return
+            event = graph.get_anomaly_event(args.event_id)
+            if args.json:
+                print(json_module.dumps(event or {}))
+            else:
+                if not event:
+                    print(f"[ERROR] Event not found: {args.event_id}")
+                    return
+                print(json_module.dumps(event, indent=2))
 
+        elif args.command == "cleanup-anomaly-events":
+            deleted = graph.cleanup_anomaly_events(args.retention_days)
+            print(f"[OK] Deleted {deleted} anomaly events older than {args.retention_days} days")
 
 if __name__ == "__main__":
     main()

From 1f1f6b4dc1f7005d8d144d21a10ff2dd77cce070 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 18:06:40 +0000
Subject: [PATCH 02/18] Emit provider failures as anomaly feed events

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py | 183 ++++++++++++++++++++++++++++++++-----
 1 file changed, 161 insertions(+), 22 deletions(-)

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 70a0f4b..9049108 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -263,7 +263,7 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
                 values.append(val)
         return values
 
-    def fetch_history_values(self, tag_path: str) -> List[float]:
+    def fetch_history_values(self, tag_path: str) -> tuple[List[float], Optional[str]]:
         minutes = int(self.config.get("historyWindowMinutes", 360))
         end_dt = datetime.now(timezone.utc)
         start_dt = end_dt - timedelta(minutes=minutes)
@@ -275,7 +275,9 @@ def fetch_history_values(self, tag_path: str) -> List[float]:
             aggregation_mode="Average",
             return_format="Wide",
         )
-        return self._extract_history_values(data, tag_path)
+        if isinstance(data, dict) and data.get("error"):
+            return [], str(data.get("error"))
+        return self._extract_history_values(data, tag_path), None
 
     def get_context(self, tag_path: str) -> Dict[str, Any]:
         with self.graph.session() as session:
@@ -517,6 +519,97 @@ def persist_event(
 
         return event_data
 
+    def _emit_persisted_event(self, persisted: Dict[str, Any]) -> None:
+        """Emit normalized AGENT_EVENT payload for UI stream."""
+        emit("AGENT_EVENT", {
+            "runId": self.run_id,
+            "eventId": persisted["event_id"],
+            "severity": persisted["severity"],
+            "summary": persisted["summary"],
+            "category": persisted.get("category"),
+            "entityRefs": {
+                "tag": persisted.get("tag_name") or persisted.get("source_tag"),
+                "sourceTag": persisted.get("source_tag"),
+            },
+            "createdAt": persisted.get("created_at"),
+        })
+
+    def emit_provider_failure_event(
+        self,
+        code: str,
+        message: str,
+        *,
+        severity: str = "high",
+        category: str = "quality-issue",
+        source_tag: Optional[str] = None,
+        details: Optional[Dict[str, Any]] = None,
+    ) -> bool:
+        """
+        Persist and stream provider-health anomalies so failures appear in feed.
+
+        Returns:
+            True if a new event was persisted (false if deduped).
+        """
+        emit("AGENT_ERROR", {
+            "runId": self.run_id,
+            "code": code,
+            "message": message,
+            "recoverable": True,
+            "timestamp": utc_now_iso(),
+        })
+
+        tag = source_tag or f"provider://{code}"
+        detail_blob = json.dumps(details or {}, default=str)
+        context = {
+            "tag_path": tag,
+            "tag_name": source_tag or "ProviderHealth",
+            "equipment": [],
+            "symptoms": [],
+            "causes": [],
+            "patterns": [],
+            "safety": [],
+        }
+        deterministic = {
+            "candidate": True,
+            "reasons": [code],
+            "category": category,
+            "z_score": 0.0,
+            "mad_score": 0.0,
+            "delta_rate": 0.0,
+            "window_volatility": 0.0,
+            "history_points": 0,
+        }
+        triage = {
+            "summary": message,
+            "category": category,
+            "severity": severity,
+            "confidence": 0.9,
+            "probable_causes": [message],
+            "verification_checks": [
+                "Check Ignition gateway connectivity and credentials.",
+                "Validate tag provider availability and endpoint health.",
+            ],
+            "safety_notes": [],
+            "rationale": f"Provider health event ({code}). Details: {detail_blob}",
+            "related_entities": [],
+        }
+        persisted = self.persist_event(
+            context=context,
+            deterministic=deterministic,
+            live_sample={
+                "path": tag,
+                "value": "",
+                "quality": "Bad",
+                "timestamp": utc_now_iso(),
+                "data_type": "provider_health",
+            },
+            triage=triage,
+        )
+        if persisted:
+            self._emit_persisted_event(persisted)
+            return True
+        return False
+
     # -----------------------------
     # Monitoring loop
     # -----------------------------
@@ -527,13 +620,14 @@ def run_cycle(self) -> Dict[str, Any]:
         min_history = int(self.config.get("minHistoryPoints", 30))
 
         if not self.api.is_configured:
-            emit("AGENT_ERROR", {
-                "runId": self.run_id,
-                "code": "ignition_not_configured",
-                "message": "Ignition API URL/token not configured.",
-                "recoverable": True,
-                "timestamp": utc_now_iso(),
-            })
+            emitted = self.emit_provider_failure_event(
+                "ignition_not_configured",
+                "Ignition API URL/token not configured.",
+                severity="critical",
+                category="state-conflict",
+            )
+            if emitted:
+                metrics["emitted"] += 1
             metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
             return metrics
 
@@ -553,17 +647,31 @@ def run_cycle(self) -> Dict[str, Any]:
         live_values = self.api.read_tags(tag_paths)
         candidates: List[Dict[str, Any]] = []
         now = datetime.now(timezone.utc)
+        live_error_count = 0
+        live_error_samples: List[str] = []
+        history_error_count = 0
+        history_error_samples: List[str] = []
+        valid_live_count = 0
 
         for tv in live_values:
             if tv.error:
+                live_error_count += 1
+                if len(live_error_samples) < 5:
+                    live_error_samples.append(f"{tv.path}: {tv.error}")
                 continue
+            valid_live_count += 1
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
                 continue
             if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
                 continue
 
-            history = self.fetch_history_values(tv.path)
+            history, history_error = self.fetch_history_values(tv.path)
+            if history_error:
+                history_error_count += 1
+                if len(history_error_samples) < 5:
+                    history_error_samples.append(f"{tv.path}: {history_error}")
+                continue
             if len(history) < min_history:
                 continue
 
@@ -594,6 +702,48 @@ def run_cycle(self) -> Dict[str, Any]:
                     }
                 )
 
+        if live_values and live_error_count == len(live_values):
+            emitted = self.emit_provider_failure_event(
+                "live_tag_provider_failed",
+                f"Live tag provider failed for all reads ({live_error_count}/{len(live_values)}).",
+                severity="high",
+                category="quality-issue",
+                details={"samples": live_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+        elif live_error_count > 0:
+            emitted = self.emit_provider_failure_event(
+                "live_tag_provider_partial_failure",
+                f"Live tag provider partially failed ({live_error_count}/{len(live_values)} reads).",
+                severity="medium",
+                category="quality-issue",
+                details={"samples": live_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
+        if valid_live_count > 0 and history_error_count >= max(1, int(valid_live_count * 0.8)):
+            emitted = self.emit_provider_failure_event(
+                "history_provider_failed",
+                f"History provider failed for most queries ({history_error_count}/{valid_live_count}).",
+                severity="high",
+                category="quality-issue",
+                details={"samples": history_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+        elif history_error_count > 0:
+            emitted = self.emit_provider_failure_event(
+                "history_provider_partial_failure",
+                f"History provider partially failed ({history_error_count}/{valid_live_count}).",
+                severity="medium",
+                category="quality-issue",
+                details={"samples": history_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
         metrics["candidates"] = len(candidates)
         max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
         max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
@@ -629,18 +779,7 @@ def run_cycle(self) -> Dict[str, Any]:
             )
             if persisted:
                 metrics["emitted"] += 1
-                emit("AGENT_EVENT", {
-                    "runId": self.run_id,
-                    "eventId": persisted["event_id"],
-                    "severity": persisted["severity"],
-                    "summary": persisted["summary"],
-                    "category": persisted.get("category"),
-                    "entityRefs": {
-                        "tag": persisted.get("tag_name") or persisted.get("source_tag"),
-                        "sourceTag": persisted.get("source_tag"),
-                    },
-                    "createdAt": persisted.get("created_at"),
-                })
+                self._emit_persisted_event(persisted)
 
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics

From 1a17e651432ab28c7109ef46bc5fc05987e4be53 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 18:19:17 +0000
Subject: [PATCH 03/18] Migrate tests to pytest with ingest coverage

Co-authored-by: leor <leor@fortresslabs.com>
---
 .gitignore                                    |   3 +-
 pytest.ini                                    |   4 +
 requirements-dev.txt                          |   1 +
 tests/README.md                               |  46 +++++
 tests/conftest.py                             |  35 ++++
 .../integration/simulated_ignition_server.py  | 170 ++++++++++++++++++
 .../integration/test_live_value_sim_server.py |  75 ++++++++
 tests/unit/test_anomaly_rules.py              |  64 +++++++
 tests/unit/test_ingest_siemens_parser.py      |  72 ++++++++
 tests/unit/test_ingest_workbench_parser.py    | 119 ++++++++++++
 10 files changed, 587 insertions(+), 2 deletions(-)
 create mode 100644 pytest.ini
 create mode 100644 requirements-dev.txt
 create mode 100644 tests/README.md
 create mode 100644 tests/conftest.py
 create mode 100644 tests/integration/simulated_ignition_server.py
 create mode 100644 tests/integration/test_live_value_sim_server.py
 create mode 100644 tests/unit/test_anomaly_rules.py
 create mode 100644 tests/unit/test_ingest_siemens_parser.py
 create mode 100644 tests/unit/test_ingest_workbench_parser.py

diff --git a/.gitignore b/.gitignore
index 085a6d7..28f5878 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,8 +9,7 @@ venv/
 ENV/
 .venv
 
-# Test files and outputs
-tests/
+# Test outputs
 *_updated*.xml
 *_applied*.xml
 *_diffs/
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..3b2c446
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+addopts = -q
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..e079f8a
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1 @@
+pytest
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..350a8d4
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,46 @@
+# Test Flow: Agents Monitoring + Ingest
+
+This repository now includes a lightweight test scaffold using `pytest`.
+
+## Layout
+
+- `tests/unit/`
+  - `test_anomaly_rules.py`  
+    Unit tests for deterministic anomaly scoring and quality/staleness gates.
+  - `test_ingest_workbench_parser.py`  
+    Unit tests for workbench ingest parsing.
+  - `test_ingest_siemens_parser.py`  
+    Unit tests for Siemens `.st` ingest parsing.
+
+- `tests/integration/`
+  - `simulated_ignition_server.py`  
+    Local simulated live/history webserver implementing:
+    - `/system/webdev/Axilon/getTags`
+    - `/system/webdev/Axilon/queryTagHistory`
+  - `test_live_value_sim_server.py`  
+    Integration tests for `IgnitionApiClient` + anomaly scoring with simulated live values.
+
+## Run all tests
+
+```bash
+python3 -m pytest
+```
+
+## Run only unit tests
+
+```bash
+python3 -m pytest tests/unit
+```
+
+## Run only integration tests
+
+```bash
+python3 -m pytest tests/integration
+```
+
+## Notes
+
+- Integration tests are fully local and do **not** require a real Ignition gateway.
+- LLM services are not required for these tests.
+- Neo4j is not required for this test suite.
+
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..5b51088
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SCRIPTS_DIR = REPO_ROOT / "scripts"
+INTEGRATION_DIR = REPO_ROOT / "tests" / "integration"
+
+for path in (SCRIPTS_DIR, INTEGRATION_DIR):
+    path_str = str(path)
+    if path_str not in sys.path:
+        sys.path.insert(0, path_str)
+
+
+@pytest.fixture
+def sim_ignition():
+    from simulated_ignition_server import (
+        start_simulated_ignition_server,
+        stop_simulated_ignition_server,
+    )
+
+    server, thread, state, base_url = start_simulated_ignition_server()
+    try:
+        yield {
+            "server": server,
+            "thread": thread,
+            "state": state,
+            "base_url": base_url,
+        }
+    finally:
+        stop_simulated_ignition_server(server, thread)
diff --git a/tests/integration/simulated_ignition_server.py b/tests/integration/simulated_ignition_server.py
new file mode 100644
index 0000000..607f316
--- /dev/null
+++ b/tests/integration/simulated_ignition_server.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Simulated Ignition WebDev endpoints for local integration tests.
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta, timezone
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from typing import Dict, List, Tuple
+from urllib.parse import parse_qs, urlparse
+
+
+def _utc_iso(offset_minutes: int = 0) -> str:
+    return (datetime.now(timezone.utc) + timedelta(minutes=offset_minutes)).isoformat()
+
+
+@dataclass
+class SimulatedIgnitionState:
+    fail_live_reads: bool = False
+    fail_history_reads: bool = False
+    live_tags: Dict[str, Dict] = field(default_factory=dict)
+    tag_history: Dict[str, List[Tuple[str, float]]] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        if not self.live_tags:
+            self.live_tags = {
+                "[default]Line/Throughput": {
+                    "value": 95.0,
+                    "quality": "Good",
+                    "timestamp": _utc_iso(),
+                    "dataType": "Float8",
+                },
+                "[default]Line/Temperature": {
+                    "value": 42.0,
+                    "quality": "Good",
+                    "timestamp": _utc_iso(),
+                    "dataType": "Float8",
+                },
+            }
+        if not self.tag_history:
+            base = [49.9, 50.1, 50.0, 50.2, 50.1, 49.8, 50.3, 50.0, 49.9, 50.2]
+            self.tag_history = {
+                "[default]Line/Throughput": [
+                    (_utc_iso(offset_minutes=-(len(base) - i)), value)
+                    for i, value in enumerate(base)
+                ],
+                "[default]Line/Temperature": [
+                    (_utc_iso(offset_minutes=-(len(base) - i)), 41.5 + (i * 0.1))
+                    for i in range(len(base))
+                ],
+            }
+
+
+class _IgnitionHandler(BaseHTTPRequestHandler):
+    state: SimulatedIgnitionState
+
+    def _send_json(self, payload, status: int = 200) -> None:
+        body = json.dumps(payload).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def do_GET(self):  # noqa: N802 - BaseHTTPRequestHandler naming
+        parsed = urlparse(self.path)
+        path = parsed.path
+        query = parse_qs(parsed.query)
+
+        if path == "/system/webdev/Axilon/getTags":
+            if self.state.fail_live_reads:
+                self._send_json({"error": "simulated live provider failure"}, status=503)
+                return
+
+            raw = query.get("tagPaths", [""])[0]
+            tag_paths = [p.strip() for p in raw.split(",") if p.strip()]
+            tags = []
+            for tag_path in tag_paths:
+                data = self.state.live_tags.get(tag_path)
+                if not data:
+                    tags.append(
+                        {
+                            "tagPath": tag_path,
+                            "value": None,
+                            "quality": "Bad",
+                            "isGood": False,
+                            "timestamp": _utc_iso(),
+                            "dataType": "Unknown",
+                        }
+                    )
+                    continue
+                tags.append(
+                    {
+                        "tagPath": tag_path,
+                        "value": data.get("value"),
+                        "quality": data.get("quality", "Good"),
+                        "isGood": str(data.get("quality", "Good")).lower() == "good",
+                        "timestamp": data.get("timestamp", _utc_iso()),
+                        "dataType": data.get("dataType", "Unknown"),
+                    }
+                )
+            self._send_json({"success": True, "count": len(tags), "tags": tags})
+            return
+
+        if path == "/system/webdev/Axilon/queryTagHistory":
+            if self.state.fail_history_reads:
+                self._send_json({"error": "simulated history provider failure"}, status=503)
+                return
+
+            raw = query.get("tagPaths", [""])[0]
+            tag_paths = [p.strip() for p in raw.split(",") if p.strip()]
+            rows = []
+
+            primary_path = tag_paths[0] if tag_paths else "[default]Line/Throughput"
+            primary_hist = self.state.tag_history.get(primary_path, [])
+            for ts, _ in primary_hist:
+                row = {"timestamp": ts}
+                for tag_path in tag_paths:
+                    values = self.state.tag_history.get(tag_path, [])
+                    match_val = None
+                    for hist_ts, hist_val in values:
+                        if hist_ts == ts:
+                            match_val = hist_val
+                            break
+                    if match_val is None and values:
+                        match_val = values[-1][1]
+                    row[tag_path] = match_val
+                rows.append(row)
+
+            self._send_json(
+                {
+                    "success": True,
+                    "rows": rows,
+                    "tagPaths": tag_paths,
+                    "returnFormat": "Wide",
+                }
+            )
+            return
+
+        self._send_json({"error": f"unsupported endpoint: {path}"}, status=404)
+
+    def log_message(self, format, *args):  # noqa: A003 - stdlib signature
+        # Silence default HTTP request logs during tests.
+        return
+
+
+def start_simulated_ignition_server() -> tuple[HTTPServer, threading.Thread, SimulatedIgnitionState, str]:
+    state = SimulatedIgnitionState()
+    handler_cls = type(
+        "IgnitionHandlerWithState",
+        (_IgnitionHandler,),
+        {"state": state},
+    )
+    server = HTTPServer(("127.0.0.1", 0), handler_cls)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    host, port = server.server_address
+    base_url = f"http://{host}:{port}"
+    return server, thread, state, base_url
+
+
+def stop_simulated_ignition_server(server: HTTPServer, thread: threading.Thread) -> None:
+    server.shutdown()
+    server.server_close()
+    thread.join(timeout=3)
+
diff --git a/tests/integration/test_live_value_sim_server.py b/tests/integration/test_live_value_sim_server.py
new file mode 100644
index 0000000..d6feeea
--- /dev/null
+++ b/tests/integration/test_live_value_sim_server.py
@@ -0,0 +1,75 @@
+from datetime import datetime, timedelta, timezone
+
+from anomaly_rules import compute_deviation_scores
+from ignition_api_client import IgnitionApiClient
+
+def test_read_tags_history_and_detect_spike(sim_ignition):
+    state = sim_ignition["state"]
+    state.fail_live_reads = False
+    state.fail_history_reads = False
+
+    client = IgnitionApiClient(base_url=sim_ignition["base_url"], api_token="token")
+    try:
+        tag_path = "[default]Line/Throughput"
+        tv = client.read_tag(tag_path)
+        assert tv.error is None
+        assert tv.quality == "Good"
+        assert float(tv.value) == 95.0
+
+        start = (datetime.now(timezone.utc) - timedelta(hours=1)).replace(microsecond=0).isoformat()
+        end = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
+        history = client.query_tag_history([tag_path], start, end, return_size=100)
+        assert isinstance(history, dict)
+        assert "rows" in history
+
+        history_values = [
+            row[tag_path]
+            for row in history["rows"]
+            if isinstance(row, dict) and tag_path in row and row[tag_path] is not None
+        ]
+        assert len(history_values) > 5
+
+        score = compute_deviation_scores(
+            current_value=tv.value,
+            history_values=history_values,
+            prev_value=55.0,
+            thresholds={"z": 3.0, "mad": 3.5, "rate": 10.0},
+        )
+        assert score["candidate"]
+        assert score["category"] in {"spike", "deviation", "drift"}
+    finally:
+        client.close()
+
+
+def test_live_provider_failure_surfaces_as_read_error(sim_ignition):
+    state = sim_ignition["state"]
+    state.fail_live_reads = True
+
+    client = IgnitionApiClient(base_url=sim_ignition["base_url"], api_token="token")
+    try:
+        tv = client.read_tag("[default]Line/Throughput")
+        assert tv.error is not None
+        assert "failed" in tv.error.lower()
+    finally:
+        client.close()
+
+
+def test_history_provider_failure_surfaces_error_payload(sim_ignition):
+    state = sim_ignition["state"]
+    state.fail_history_reads = True
+
+    client = IgnitionApiClient(base_url=sim_ignition["base_url"], api_token="token")
+    try:
+        start = (datetime.now(timezone.utc) - timedelta(hours=1)).replace(microsecond=0).isoformat()
+        end = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
+        history = client.query_tag_history(
+            ["[default]Line/Throughput"],
+            start,
+            end,
+            return_size=100,
+        )
+        assert isinstance(history, dict)
+        assert "error" in history
+    finally:
+        client.close()
+
diff --git a/tests/unit/test_anomaly_rules.py b/tests/unit/test_anomaly_rules.py
new file mode 100644
index 0000000..e5f2af1
--- /dev/null
+++ b/tests/unit/test_anomaly_rules.py
@@ -0,0 +1,64 @@
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from anomaly_rules import compute_deviation_scores, is_quality_good, is_stale
+
+
+def test_detects_sharp_rise_and_sharp_drop():
+    baseline = [50.0, 49.9, 50.1, 50.2, 49.8, 50.0, 50.1, 49.9, 50.0, 50.2] * 3
+
+    rise = compute_deviation_scores(
+        current_value=95.0,
+        history_values=baseline,
+        prev_value=52.0,
+        thresholds={"z": 3.0, "mad": 3.5, "rate": 10.0},
+    )
+    drop = compute_deviation_scores(
+        current_value=12.0,
+        history_values=baseline,
+        prev_value=49.0,
+        thresholds={"z": 3.0, "mad": 3.5, "rate": 10.0},
+    )
+
+    assert rise["candidate"]
+    assert drop["candidate"]
+
+
+def test_detects_flatline_stuck_pattern():
+    flat = [72.0] * 30
+    result = compute_deviation_scores(
+        current_value=72.0,
+        history_values=flat,
+        prev_value=72.0,
+        thresholds={"z": 3.0, "mad": 3.5, "rate": 1.0, "stuck_window_size": 20},
+    )
+    assert result["candidate"]
+    assert "flatline_detected" in result["reasons"]
+    assert result["category"] == "stuck"
+
+
+@pytest.mark.parametrize(
+    "quality,expected",
+    [("Good", True), ("OK", True), ("Bad", False), (None, False)],
+)
+def test_quality_helper(quality, expected):
+    assert is_quality_good(quality) is expected
+
+
+def test_staleness_helper():
+    recent_ts = datetime.now(timezone.utc).isoformat()
+    old_ts = (datetime.now(timezone.utc) - timedelta(minutes=15)).isoformat()
+    assert not is_stale(recent_ts, staleness_sec=300)
+    assert is_stale(old_ts, staleness_sec=300)
+
+
+def test_non_numeric_current_value_is_rejected():
+    result = compute_deviation_scores(
+        current_value="not-a-number",
+        history_values=[1, 2, 3, 4, 5],
+        prev_value=3,
+    )
+    assert not result["candidate"]
+    assert result["category"] == "invalid_value"
+
diff --git a/tests/unit/test_ingest_siemens_parser.py b/tests/unit/test_ingest_siemens_parser.py
new file mode 100644
index 0000000..935bf71
--- /dev/null
+++ b/tests/unit/test_ingest_siemens_parser.py
@@ -0,0 +1,72 @@
+from pathlib import Path
+
+from siemens_parser import SiemensSTParser
+
+
+SAMPLE_ST = """
+NAMESPACE Plant.Process
+
+TYPE MotorData : STRUCT
+    Speed : REAL;
+END_STRUCT
+END_TYPE
+
+CLASS MotorFB
+VAR_INPUT
+    StartCmd : BOOL; // start command
+END_VAR
+VAR_OUTPUT
+    Running : BOOL;
+END_VAR
+METHOD PUBLIC Execute : BOOL
+VAR
+    tempVar : INT := 1;
+END_VAR
+Running := StartCmd;
+END_METHOD
+END_CLASS
+
+PROGRAM MainProgram
+VAR
+    Counter : INT := 0;
+END_VAR
+Counter := Counter + 1;
+END_PROGRAM
+
+CONFIGURATION Config1
+TASK MainTask(INTERVAL := T#100MS, PRIORITY := 1);
+PROGRAM PLC_PRG WITH MainTask: MainProgram;
+END_CONFIGURATION
+
+END_NAMESPACE
+"""
+
+
+def test_parse_structured_text_blocks(tmp_path):
+    st_path = Path(tmp_path) / "sample.st"
+    st_path.write_text(SAMPLE_ST, encoding="utf-8")
+
+    parser = SiemensSTParser()
+    blocks = parser.parse_file(str(st_path))
+    assert len(blocks) >= 4
+
+    by_name = {b.name: b for b in blocks}
+    assert "MotorData" in by_name
+    assert by_name["MotorData"].type == "UDT"
+    assert by_name["MotorData"].local_tags[0].name == "Speed"
+
+    assert "MotorFB" in by_name
+    fb = by_name["MotorFB"]
+    assert fb.type == "FB"
+    assert any(t.name == "StartCmd" and t.direction == "INPUT" for t in fb.input_tags)
+    assert any(t.name == "Running" and t.direction == "OUTPUT" for t in fb.output_tags)
+    assert any(r["name"] == "Execute" for r in fb.routines)
+
+    assert "MainProgram" in by_name
+    assert by_name["MainProgram"].type == "PROGRAM"
+    assert "Counter := Counter + 1" in by_name["MainProgram"].raw_implementation
+
+    assert "Config1" in by_name
+    assert by_name["Config1"].type == "CONFIGURATION"
+    assert "MainTask" in by_name["Config1"].description
+
diff --git a/tests/unit/test_ingest_workbench_parser.py b/tests/unit/test_ingest_workbench_parser.py
new file mode 100644
index 0000000..7609490
--- /dev/null
+++ b/tests/unit/test_ingest_workbench_parser.py
@@ -0,0 +1,119 @@
+import json
+from pathlib import Path
+
+from workbench_parser import WorkbenchParser
+
+
+def test_parse_workbench_project_json_with_inline_resources(tmp_path):
+    root = Path(tmp_path)
+
+    # Script file expected by WorkbenchParser._read_script_file
+    script_file = root / "scripts" / "PlantA" / "utility" / "tags" / "code.py"
+    script_file.parent.mkdir(parents=True, exist_ok=True)
+    script_file.write_text("def read_tag():\n    return 42\n", encoding="utf-8")
+
+    data = {
+        "__typeName": "WorkbenchState",
+        "version": "1.2.3",
+        "root": {
+            "windows": [
+                {
+                    "projectName": "PlantA",
+                    "title": "MainView",
+                    "path": "main/view",
+                    "windowType": "perspective",
+                    "rootContainer": {
+                        "meta": {"name": "Root"},
+                        "type": "ia.container",
+                        "propConfig": {
+                            "props.value": {
+                                "binding": {
+                                    "type": "tag",
+                                    "config": {
+                                        "tagPath": "[default]Line/Speed",
+                                        "bidirectional": True,
+                                    },
+                                }
+                            }
+                        },
+                        "children": [],
+                    },
+                }
+            ],
+            "namedQueries": [
+                {
+                    "projectName": "PlantA",
+                    "queryName": "GetBatches",
+                    "folderPath": "Prod\\Ops",
+                    "query": "SELECT * FROM batches",
+                }
+            ],
+            "scripts": [
+                {
+                    "projectName": "PlantA",
+                    "path": ["utility", "tags"],
+                    "scope": "A",
+                }
+            ],
+            "tags": [
+                {
+                    "name": "LineSpeed",
+                    "type": "Opc",
+                    "dataType": "Float8",
+                    "opcItemPath": "[default]Line/Speed",
+                },
+                {
+                    "name": "BatchCount",
+                    "type": "Memory",
+                    "dataType": "Int4",
+                    "value": 7,
+                },
+            ],
+            "udtDefinitions": [
+                {
+                    "name": "MotorUDT",
+                    "id": "MotorUDT",
+                    "parameters": {
+                        "area": {"dataType": "String", "value": "A1"}
+                    },
+                    "members": [
+                        {
+                            "name": "Run",
+                            "type": "opc",
+                            "dataType": "Boolean",
+                            "opcItemPath": "[default]Motor/Run",
+                            "serverName": {"binding": "default"},
+                        }
+                    ],
+                }
+            ],
+        },
+    }
+
+    project_json = root / "project.json"
+    project_json.write_text(json.dumps(data), encoding="utf-8")
+
+    parser = WorkbenchParser()
+    backup = parser.parse_file(str(project_json))
+
+    assert "PlantA" in backup.projects
+    assert len(backup.windows) == 1
+    assert backup.windows[0].name == "MainView"
+    assert backup.windows[0].components[0].bindings[0].target == "[default]Line/Speed"
+
+    assert len(backup.named_queries) == 1
+    assert backup.named_queries[0].id == "Prod/Ops/GetBatches"
+    assert "SELECT" in backup.named_queries[0].query_text
+
+    assert len(backup.scripts) == 1
+    assert "return 42" in backup.scripts[0].script_text
+
+    tag_types = {t.name: t.tag_type for t in backup.tags}
+    assert tag_types["LineSpeed"] == "opc"
+    assert tag_types["BatchCount"] == "memory"
+
+    assert len(backup.udt_definitions) == 1
+    udt = backup.udt_definitions[0]
+    assert "area" in udt.parameters
+    assert udt.members[0].server_name == "default"
+

From e9ca37d4bbd907fd258af0ebca7e3122dcaa1d1c Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:16:19 +0000
Subject: [PATCH 04/18] Improve anomaly visibility and clear acknowledged
 events

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/index.html     |   1 +
 electron-ui/main.js        |  11 ++++
 electron-ui/preload.js     |   1 +
 electron-ui/renderer.js    |  27 ++++++++--
 scripts/anomaly_monitor.py | 101 ++++++++++++++++++++++++++++++++++++-
 5 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/electron-ui/index.html b/electron-ui/index.html
index 7e5e8a7..99ba9a1 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -593,6 +593,7 @@ <h3>Anomaly Feed</h3>
                   <option value="">All states</option>
                   <option value="open">Open</option>
                   <option value="acknowledged">Acknowledged</option>
+                  <option value="cleared">Cleared</option>
                 </select>
                 <select class="input input-sm" id="agents-filter-severity">
                   <option value="">All severity</option>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index e215fb4..8796380 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -1687,6 +1687,17 @@ ipcMain.handle('agents:ack-event', async (event, eventId, note = '') => {
   }
 });
 
+ipcMain.handle('agents:clear-event', async (event, eventId, note = '') => {
+  try {
+    const args = ['clear-event', '--event-id', String(eventId)];
+    if (note) args.push('--note', String(note));
+    const output = await runPythonScript('anomaly_monitor.py', args);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
 ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
   try {
     const output = await runPythonScript('anomaly_monitor.py', [
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index 1e0930c..e94b546 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -78,6 +78,7 @@ contextBridge.exposeInMainWorld('api', {
   agentsListEvents: (filters) => ipcRenderer.invoke('agents:list-events', filters),
   agentsGetEvent: (eventId) => ipcRenderer.invoke('agents:get-event', eventId),
   agentsAckEvent: (eventId, note) => ipcRenderer.invoke('agents:ack-event', eventId, note),
+  agentsClearEvent: (eventId, note) => ipcRenderer.invoke('agents:clear-event', eventId, note),
   agentsCleanup: (retentionDays) => ipcRenderer.invoke('agents:cleanup', retentionDays),
   
   // Database connections
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index cab7e8b..fef65a6 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3742,7 +3742,19 @@ function renderAgentEventDetails(event) {
   `;
 
   if (el.btnOpenGraph) el.btnOpenGraph.disabled = !resolveAgentGraphTarget(event);
-  if (el.btnAck) el.btnAck.disabled = event.state === 'acknowledged';
+  if (el.btnAck) {
+    const state = String(event.state || '').toLowerCase();
+    if (state === 'acknowledged') {
+      el.btnAck.textContent = 'Clear';
+      el.btnAck.disabled = false;
+    } else if (state === 'cleared') {
+      el.btnAck.textContent = 'Cleared';
+      el.btnAck.disabled = true;
+    } else {
+      el.btnAck.textContent = 'Acknowledge';
+      el.btnAck.disabled = false;
+    }
+  }
 }
 
 async function selectAgentEvent(eventId) {
@@ -3826,11 +3838,15 @@ async function stopAgentsMonitoring() {
 
 async function acknowledgeSelectedAgentEvent() {
   if (!agentsState.selectedEventId) return;
-  const result = await window.api.agentsAckEvent(agentsState.selectedEventId, '');
+  const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+  const state = String(selected?.state || '').toLowerCase();
+  const result = state === 'acknowledged'
+    ? await window.api.agentsClearEvent(agentsState.selectedEventId, '')
+    : await window.api.agentsAckEvent(agentsState.selectedEventId, '');
   if (!result.success) return;
   await loadAgentEvents();
-  const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
-  renderAgentEventDetails(selected || null);
+  const refreshed = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+  renderAgentEventDetails(refreshed || null);
 }
 
 function upsertRealtimeAgentEvent(payload) {
@@ -3864,6 +3880,9 @@ function ensureAgentListeners() {
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
+    if (payload.diagnostics) {
+      console.debug('[Agents diagnostics]', payload.diagnostics);
+    }
   });
 
   window.api.onAgentEvent((payload) => {
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 9049108..46b8b2c 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -615,7 +615,7 @@ def emit_provider_failure_event(
     # -----------------------------
     def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
-        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0}
+        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0, "diagnostics": {}}
         thresholds = self.config.get("thresholds", {})
         min_history = int(self.config.get("minHistoryPoints", 30))
 
@@ -652,6 +652,10 @@ def run_cycle(self) -> Dict[str, Any]:
         history_error_count = 0
         history_error_samples: List[str] = []
         valid_live_count = 0
+        quality_filtered_count = 0
+        stale_filtered_count = 0
+        insufficient_history_count = 0
+        low_history_candidate_count = 0
 
         for tv in live_values:
             if tv.error:
@@ -662,8 +666,10 @@ def run_cycle(self) -> Dict[str, Any]:
             valid_live_count += 1
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
+                quality_filtered_count += 1
                 continue
             if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
+                stale_filtered_count += 1
                 continue
 
             history, history_error = self.fetch_history_values(tv.path)
@@ -673,6 +679,39 @@ def run_cycle(self) -> Dict[str, Any]:
                     history_error_samples.append(f"{tv.path}: {history_error}")
                 continue
             if len(history) < min_history:
+                insufficient_history_count += 1
+                # Low-history fallback: still score dramatic shifts when at least a
+                # small baseline exists, otherwise simulator users see no events.
+                if len(history) >= 5:
+                    prev_val = self._prev_values.get(tv.path)
+                    deterministic = compute_deviation_scores(
+                        current_value=tv.value,
+                        history_values=history,
+                        prev_value=prev_val,
+                        thresholds=thresholds,
+                    )
+                    curr_num = safe_float(tv.value)
+                    if curr_num is not None:
+                        self._prev_values[tv.path] = curr_num
+
+                    if deterministic.get("candidate"):
+                        deterministic["reasons"] = list(deterministic.get("reasons", [])) + ["low_history_override"]
+                        deterministic["history_quality"] = "low"
+                        context = self.get_context(tv.path)
+                        candidates.append(
+                            {
+                                "context": context,
+                                "deterministic": deterministic,
+                                "live_sample": {
+                                    "path": tv.path,
+                                    "value": tv.value,
+                                    "quality": tv.quality,
+                                    "timestamp": tv.timestamp,
+                                    "data_type": tv.data_type,
+                                },
+                            }
+                        )
+                        low_history_candidate_count += 1
                 continue
 
             prev_val = self._prev_values.get(tv.path)
@@ -744,6 +783,28 @@ def run_cycle(self) -> Dict[str, Any]:
             if emitted:
                 metrics["emitted"] += 1
 
+        if valid_live_count > 0 and stale_filtered_count >= max(1, int(valid_live_count * 0.8)):
+            emitted = self.emit_provider_failure_event(
+                "live_timestamp_stale",
+                f"Most live samples were stale ({stale_filtered_count}/{valid_live_count}).",
+                severity="medium",
+                category="quality-issue",
+                details={"staleCount": stale_filtered_count, "validLiveCount": valid_live_count},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
+        if valid_live_count > 0 and quality_filtered_count >= max(1, int(valid_live_count * 0.8)):
+            emitted = self.emit_provider_failure_event(
+                "live_quality_bad",
+                f"Most live samples had non-good quality ({quality_filtered_count}/{valid_live_count}).",
+                severity="medium",
+                category="quality-issue",
+                details={"qualityFilteredCount": quality_filtered_count, "validLiveCount": valid_live_count},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
         metrics["candidates"] = len(candidates)
         max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
         max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
@@ -781,6 +842,16 @@ def run_cycle(self) -> Dict[str, Any]:
                 metrics["emitted"] += 1
                 self._emit_persisted_event(persisted)
 
+        metrics["diagnostics"] = {
+            "monitoredTags": len(tag_paths),
+            "validLiveCount": valid_live_count,
+            "liveErrorCount": live_error_count,
+            "qualityFilteredCount": quality_filtered_count,
+            "staleFilteredCount": stale_filtered_count,
+            "historyErrorCount": history_error_count,
+            "insufficientHistoryCount": insufficient_history_count,
+            "lowHistoryCandidateCount": low_history_candidate_count,
+        }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics
 
@@ -819,6 +890,7 @@ def run_forever(self) -> int:
                     "candidates": metrics["candidates"],
                     "triaged": metrics["triaged"],
                     "emitted": metrics["emitted"],
+                    "diagnostics": metrics.get("diagnostics", {}),
                     "timestamp": utc_now_iso(),
                 })
                 if self._cycle_count % cleanup_every == 0:
@@ -890,6 +962,25 @@ def ack_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
                 return {"success": False, "error": f"Event not found: {event_id}"}
         return {"success": True, "eventId": event_id}
 
+    def clear_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                SET e.state = 'cleared',
+                    e.cleared_at = datetime(),
+                    e.clear_note = $note,
+                    e.updated_at = datetime()
+                RETURN count(e) AS cnt
+                """,
+                event_id=event_id,
+                note=note or "",
+            )
+            record = result.single()
+            if not record or record["cnt"] == 0:
+                return {"success": False, "error": f"Event not found: {event_id}"}
+        return {"success": True, "eventId": event_id}
+
     def get_status(self, run_id: str) -> Dict[str, Any]:
         with self.graph.session() as session:
             result = session.run(
@@ -990,6 +1081,10 @@ def main() -> int:
     p_ack.add_argument("--event-id", required=True)
     p_ack.add_argument("--note")
 
+    p_clear = sub.add_parser("clear-event", help="Clear one acknowledged anomaly event")
+    p_clear.add_argument("--event-id", required=True)
+    p_clear.add_argument("--note")
+
     p_cleanup = sub.add_parser("cleanup", help="Delete old anomaly events")
     p_cleanup.add_argument("--retention-days", type=int, default=14)
 
@@ -1038,6 +1133,10 @@ def _signal_handler(_signum, _frame):
         print(json.dumps(monitor.ack_event(args.event_id, args.note), default=str))
         return 0
 
+    if args.command == "clear-event":
+        print(json.dumps(monitor.clear_event(args.event_id, args.note), default=str))
+        return 0
+
     if args.command == "cleanup":
         deleted = monitor.graph.cleanup_anomaly_events(args.retention_days)
         print(json.dumps({"success": True, "deleted": deleted}))

From e51cc3b692710a5574bcd1ad50b3329b735c742f Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:28:36 +0000
Subject: [PATCH 05/18] Add automatic subsystem-aware anomaly detection

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js                           |   7 +
 electron-ui/renderer.js                       |  26 +-
 scripts/anomaly_monitor.py                    | 309 ++++++++++++++++--
 tests/unit/test_anomaly_monitor_subsystems.py |  56 ++++
 4 files changed, 377 insertions(+), 21 deletions(-)
 create mode 100644 tests/unit/test_anomaly_monitor_subsystems.py

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 8796380..5ebe28b 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -205,7 +205,9 @@ function normalizeAgentConfig(config = {}) {
     minHistoryPoints: Math.max(10, Number(config.minHistoryPoints || 30)),
     maxMonitoredTags: Math.max(10, Number(config.maxMonitoredTags || 200)),
     maxCandidatesPerCycle: Math.max(1, Number(config.maxCandidatesPerCycle || 25)),
+    maxCandidatesPerSubsystem: Math.max(1, Number(config.maxCandidatesPerSubsystem || 8)),
     maxLlmTriagesPerCycle: Math.max(0, Number(config.maxLlmTriagesPerCycle || 5)),
+    maxLlmTriagesPerSubsystem: Math.max(0, Number(config.maxLlmTriagesPerSubsystem || 2)),
     dedupCooldownMinutes: Math.max(1, Number(config.dedupCooldownMinutes || 10)),
     retentionDays: Math.max(1, Number(config.retentionDays || 14)),
     cleanupEveryCycles: Math.max(1, Number(config.cleanupEveryCycles || 40)),
@@ -221,6 +223,11 @@ function normalizeAgentConfig(config = {}) {
       project: scope.project || null,
       equipmentTags: Array.isArray(scope.equipmentTags) ? scope.equipmentTags : [],
       tagRegex: scope.tagRegex || null,
+      subsystemMode: String(scope.subsystemMode || 'auto').toLowerCase() === 'global' ? 'global' : 'auto',
+      subsystemPriority: Array.isArray(scope.subsystemPriority) && scope.subsystemPriority.length
+        ? scope.subsystemPriority.map(String)
+        : ['view', 'equipment', 'group', 'global'],
+      subsystemInclude: Array.isArray(scope.subsystemInclude) ? scope.subsystemInclude.map(String) : [],
     },
   };
 }
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index fef65a6..042666c 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3583,12 +3583,18 @@ function getAgentsConfigFromUI() {
     pollIntervalMs: Number(el.cfgPoll?.value || 15000),
     historyWindowMinutes: Number(el.cfgHist?.value || 360),
     minHistoryPoints: Number(el.cfgPoints?.value || 30),
+    maxCandidatesPerSubsystem: 8,
     maxLlmTriagesPerCycle: Number(el.cfgMaxLlm?.value || 5),
+    maxLlmTriagesPerSubsystem: 2,
     thresholds: {
       z: Number(el.cfgZ?.value || 3),
       mad: Number(el.cfgMad?.value || 3.5),
       stalenessSec: Number(el.cfgStale?.value || 120),
     },
+    scope: {
+      subsystemMode: 'auto',
+      subsystemPriority: ['view', 'equipment', 'group', 'global'],
+    },
   };
 }
 
@@ -3634,6 +3640,8 @@ function getFilteredAgentEvents() {
         event.summary,
         event.source_tag,
         event.tag_name,
+        event.subsystem_name,
+        event.subsystem_type,
         ...(event.equipment || []),
         ...(event.tags || []),
       ]
@@ -3659,6 +3667,12 @@ function renderAgentEventList() {
       const active = event.event_id === agentsState.selectedEventId ? ' active' : '';
       const sev = String(event.severity || 'low').toLowerCase();
       const equipment = (event.equipment || []).slice(0, 2).join(', ');
+      const subsystemLabel = event.subsystem_name
+        ? `${event.subsystem_type || 'subsystem'}: ${event.subsystem_name}`
+        : '';
+      const baseMeta = [event.tag_name || event.source_tag || '', equipment, subsystemLabel]
+        .filter(Boolean)
+        .join(' • ');
       return `
         <div class="agents-event-card${active}" data-event-id="${escapeHtml(event.event_id || '')}">
           <div class="agents-event-line-top">
@@ -3666,7 +3680,7 @@ function renderAgentEventList() {
             <span class="agents-event-time">${escapeHtml(formatAgentTime(event.created_at))}</span>
           </div>
           <div class="agents-event-summary">${escapeHtml(event.summary || 'Untitled anomaly')}</div>
-          <div class="agents-event-meta">${escapeHtml(event.tag_name || event.source_tag || '')}${equipment ? ` • ${escapeHtml(equipment)}` : ''}</div>
+          <div class="agents-event-meta">${escapeHtml(baseMeta)}</div>
         </div>
       `;
     })
@@ -3682,8 +3696,14 @@ function renderAgentEventList() {
 }
 
 function resolveAgentGraphTarget(event) {
+  if (String(event.subsystem_type || '').toLowerCase() === 'view' && event.subsystem_name) {
+    return { name: event.subsystem_name, type: 'View' };
+  }
   const equipment = (event.equipment || []).find(Boolean);
   if (equipment) return { name: equipment, type: 'Equipment' };
+  if (String(event.subsystem_type || '').toLowerCase() === 'equipment' && event.subsystem_name) {
+    return { name: event.subsystem_name, type: 'Equipment' };
+  }
   const tagName = event.tag_name || (event.tags || []).find(Boolean) || event.source_tag;
   if (tagName) return { name: tagName, type: 'ScadaTag' };
   return null;
@@ -3714,6 +3734,8 @@ function renderAgentEventDetails(event) {
       <div class="agents-detail-item"><span class="agents-detail-label">Confidence</span><span class="agents-detail-value">${escapeHtml(String(event.confidence ?? ''))}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Category</span><span class="agents-detail-value">${escapeHtml(event.category || '')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Timestamp</span><span class="agents-detail-value">${escapeHtml(formatAgentTime(event.created_at))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Subsystem Type</span><span class="agents-detail-value">${escapeHtml(event.subsystem_type || 'global')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Subsystem</span><span class="agents-detail-value">${escapeHtml(event.subsystem_name || 'all')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Source Tag</span><span class="agents-detail-value">${escapeHtml(event.source_tag || '')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Tag Name</span><span class="agents-detail-value">${escapeHtml(event.tag_name || '')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">z-score</span><span class="agents-detail-value">${escapeHtml(String(event.z_score ?? '0'))}</span></div>
@@ -3860,6 +3882,8 @@ function upsertRealtimeAgentEvent(payload) {
     created_at: payload.createdAt || new Date().toISOString(),
     source_tag: payload.entityRefs?.sourceTag || payload.entityRefs?.tag || '',
     tag_name: payload.entityRefs?.tag || '',
+    subsystem_type: payload.entityRefs?.subsystemType || '',
+    subsystem_name: payload.entityRefs?.subsystemName || '',
     state: 'open',
   };
   if (idx >= 0) {
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 46b8b2c..beaaf86 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -23,7 +23,7 @@
 import uuid
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 try:
     from dotenv import load_dotenv
@@ -53,6 +53,101 @@ def emit(prefix: str, payload: Dict[str, Any]) -> None:
     print(f"[{prefix}] {json.dumps(payload, default=str)}", flush=True)
 
 
+DEFAULT_SUBSYSTEM_PRIORITY = ["view", "equipment", "group", "global"]
+
+
+def _canonical_subsystem_type(kind: Any) -> str:
+    value = str(kind or "").strip().lower()
+    if value in {"view", "views"}:
+        return "view"
+    if value in {"equipment", "equip", "asset"}:
+        return "equipment"
+    if value in {"group", "groups", "folder", "path", "prefix", "tag_group"}:
+        return "group"
+    if value in {"global", "all", "system"}:
+        return "global"
+    return "group"
+
+
+def _subsystem_ref(kind: Any, name: Any) -> Dict[str, str]:
+    subsystem_type = _canonical_subsystem_type(kind)
+    subsystem_name = str(name or "").strip()
+    if not subsystem_name:
+        subsystem_type = "global"
+        subsystem_name = "all"
+    return {
+        "type": subsystem_type,
+        "name": subsystem_name,
+        "id": f"{subsystem_type}:{subsystem_name.lower()}",
+    }
+
+
+def infer_tag_group(tag_path: Optional[str], folder_name: Optional[str] = None) -> Optional[str]:
+    folder = str(folder_name or "").strip().strip("/")
+    if folder:
+        head = folder.split("/", 1)[0].strip()
+        if head:
+            return head
+
+    raw = str(tag_path or "").strip()
+    if not raw:
+        return None
+    if raw.startswith("[") and "]" in raw:
+        raw = raw.split("]", 1)[1]
+    raw = raw.strip("/")
+    if not raw:
+        return None
+    parts = [p.strip() for p in raw.split("/") if p.strip()]
+    # Ignore flat tags and only infer a group when there is at least one folder segment.
+    if len(parts) < 2:
+        return None
+    return parts[0]
+
+
+def derive_subsystems_for_tag(
+    tag_meta: Dict[str, Any],
+    subsystem_mode: str = "auto",
+    priority: Optional[List[str]] = None,
+) -> Tuple[List[Dict[str, str]], Dict[str, str]]:
+    mode = str(subsystem_mode or "auto").strip().lower()
+    if mode in {"global", "off", "disabled"}:
+        global_ref = _subsystem_ref("global", "all")
+        return [global_ref], global_ref
+
+    refs: List[Dict[str, str]] = []
+    seen: Set[str] = set()
+
+    def add_ref(kind: str, name: Optional[str]) -> None:
+        if not name:
+            return
+        ref = _subsystem_ref(kind, name)
+        if ref["id"] in seen:
+            return
+        seen.add(ref["id"])
+        refs.append(ref)
+
+    for view_name in tag_meta.get("views") or []:
+        add_ref("view", str(view_name))
+    for equipment_name in tag_meta.get("equipment") or []:
+        add_ref("equipment", str(equipment_name))
+    add_ref("group", infer_tag_group(tag_meta.get("path"), tag_meta.get("folder_name")))
+
+    if not refs:
+        refs = [_subsystem_ref("global", "all")]
+
+    ordered_priority = [
+        _canonical_subsystem_type(x) for x in (priority or DEFAULT_SUBSYSTEM_PRIORITY)
+    ]
+    primary = refs[0]
+    for kind in ordered_priority:
+        found = next((s for s in refs if s.get("type") == kind), None)
+        if found:
+            primary = found
+            break
+
+    return refs, primary
+
+
 def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     raw = dict(config or {})
     thresholds = raw.get("thresholds", {}) if isinstance(raw.get("thresholds"), dict) else {}
@@ -62,7 +157,9 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         "minHistoryPoints": 30,
         "maxMonitoredTags": 200,
         "maxCandidatesPerCycle": 25,
+        "maxCandidatesPerSubsystem": 8,
         "maxLlmTriagesPerCycle": 5,
+        "maxLlmTriagesPerSubsystem": 2,
         "dedupCooldownMinutes": 10,
         "retentionDays": 14,
         "cleanupEveryCycles": 40,
@@ -71,6 +168,9 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
             "project": None,
             "equipmentTags": [],
             "tagRegex": None,
+            "subsystemMode": "auto",
+            "subsystemPriority": list(DEFAULT_SUBSYSTEM_PRIORITY),
+            "subsystemInclude": [],
         },
         "thresholds": {
             "z": 3.0,
@@ -81,11 +181,32 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
             "stuck_window_size": 20,
         },
     }
-    cfg = defaults
+    cfg = dict(defaults)
+    cfg["scope"] = dict(defaults["scope"])
+    cfg["thresholds"] = dict(defaults["thresholds"])
     cfg.update({k: v for k, v in raw.items() if k in defaults and k != "thresholds"})
     cfg["thresholds"].update({k: v for k, v in thresholds.items() if v is not None})
     if isinstance(raw.get("scope"), dict):
         cfg["scope"].update(raw["scope"])
+    scope_cfg = cfg["scope"]
+    mode = str(scope_cfg.get("subsystemMode") or "auto").strip().lower()
+    if mode not in {"auto", "global", "off", "disabled"}:
+        mode = "auto"
+    scope_cfg["subsystemMode"] = mode
+    if not isinstance(scope_cfg.get("subsystemPriority"), list) or not scope_cfg.get("subsystemPriority"):
+        scope_cfg["subsystemPriority"] = list(DEFAULT_SUBSYSTEM_PRIORITY)
+    scope_cfg["subsystemPriority"] = [
+        str(x).strip()
+        for x in scope_cfg.get("subsystemPriority", [])
+        if str(x).strip()
+    ] or list(DEFAULT_SUBSYSTEM_PRIORITY)
+    if not isinstance(scope_cfg.get("subsystemInclude"), list):
+        scope_cfg["subsystemInclude"] = []
+    scope_cfg["subsystemInclude"] = [
+        str(x).strip().lower()
+        for x in scope_cfg.get("subsystemInclude", [])
+        if str(x).strip()
+    ]
     return cfg
 
 
@@ -179,11 +300,22 @@ def heartbeat(self, metrics: Dict[str, Any]) -> None:
     # -----------------------------
     # Tag and context collection
     # -----------------------------
-    def get_monitored_tags(self) -> List[Dict[str, str]]:
+    def get_monitored_tags(self) -> List[Dict[str, Any]]:
         max_tags = int(self.config.get("maxMonitoredTags", 200))
         scope = self.config.get("scope", {})
         tag_regex = scope.get("tagRegex")
-        equipment_tags = set(scope.get("equipmentTags") or [])
+        equipment_tags = {
+            str(x).strip().lower()
+            for x in (scope.get("equipmentTags") or [])
+            if str(x).strip()
+        }
+        subsystem_mode = str(scope.get("subsystemMode") or "auto").strip().lower()
+        subsystem_priority = scope.get("subsystemPriority") or list(DEFAULT_SUBSYSTEM_PRIORITY)
+        subsystem_include = {
+            str(x).strip().lower()
+            for x in (scope.get("subsystemInclude") or [])
+            if str(x).strip()
+        }
 
         with self.graph.session() as session:
             result = session.run(
@@ -191,13 +323,29 @@ def get_monitored_tags(self) -> List[Dict[str, str]]:
                 MATCH (t:ScadaTag)
                 WHERE coalesce(t.opc_item_path, t.name) IS NOT NULL
                   AND coalesce(t.opc_item_path, t.name) <> ''
+                OPTIONAL MATCH (c:ViewComponent)-[:BINDS_TO]->(t)
+                OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(c)
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
                 RETURN DISTINCT coalesce(t.opc_item_path, t.name) AS tag_path,
-                                coalesce(t.name, t.opc_item_path) AS tag_name
+                                coalesce(t.name, t.opc_item_path) AS tag_name,
+                                coalesce(t.folder_name, '') AS folder_name,
+                                collect(DISTINCT v.name) AS views,
+                                collect(DISTINCT eq.name) AS equipment
                 LIMIT $limit
                 """,
                 limit=max_tags * 3,
             )
-            tags = [{"path": r["tag_path"], "name": r["tag_name"]} for r in result if r["tag_path"]]
+            tags = [
+                {
+                    "path": r["tag_path"],
+                    "name": r["tag_name"],
+                    "folder_name": r["folder_name"] or "",
+                    "views": [x for x in (r["views"] or []) if x],
+                    "equipment": [x for x in (r["equipment"] or []) if x],
+                }
+                for r in result
+                if r["tag_path"]
+            ]
 
         if tag_regex:
             import re
@@ -214,7 +362,32 @@ def get_monitored_tags(self) -> List[Dict[str, str]]:
                 })
 
         if equipment_tags:
-            tags = [t for t in tags if t["name"] in equipment_tags or t["path"] in equipment_tags]
+            tags = [
+                t for t in tags
+                if t["name"].lower() in equipment_tags
+                or t["path"].lower() in equipment_tags
+                or any(str(eq).strip().lower() in equipment_tags for eq in t.get("equipment", []))
+            ]
+
+        for tag in tags:
+            subsystems, primary = derive_subsystems_for_tag(
+                tag_meta=tag,
+                subsystem_mode=subsystem_mode,
+                priority=subsystem_priority,
+            )
+            tag["subsystems"] = subsystems
+            tag["primary_subsystem"] = primary
+
+        if subsystem_include:
+            tags = [
+                t
+                for t in tags
+                if any(
+                    s.get("id", "").lower() in subsystem_include
+                    or s.get("name", "").lower() in subsystem_include
+                    for s in (t.get("subsystems") or [])
+                )
+            ]
 
         return tags[:max_tags]
 
@@ -285,15 +458,18 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
                 """
                 MATCH (t:ScadaTag)
                 WHERE t.name = $tag OR t.opc_item_path = $tag
+                OPTIONAL MATCH (vc:ViewComponent)-[:BINDS_TO]->(t)
+                OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(vc)
                 OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
                 OPTIONAL MATCH (eq)-[:HAS_SYMPTOM]->(s:FaultSymptom)
-                OPTIONAL MATCH (s)-[:CAUSED_BY]->(c:FaultCause)
+                OPTIONAL MATCH (s)-[:CAUSED_BY]->(fc:FaultCause)
                 OPTIONAL MATCH (eq)-[:HAS_PATTERN]->(p:ControlPattern)
                 OPTIONAL MATCH (eq)-[:SAFETY_CRITICAL]->(se:SafetyElement)
                 RETURN t,
+                       collect(DISTINCT v.name) AS views,
                        collect(DISTINCT eq.name) AS equipment,
                        collect(DISTINCT s.symptom) AS symptoms,
-                       collect(DISTINCT c.cause) AS causes,
+                       collect(DISTINCT fc.cause) AS causes,
                        collect(DISTINCT p.pattern_name) AS patterns,
                        collect(DISTINCT se.name) AS safety
                 LIMIT 1
@@ -304,7 +480,9 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
             if not record:
                 return {
                     "tag_path": tag_path,
+                    "views": [],
                     "equipment": [],
+                    "group": infer_tag_group(tag_path),
                     "symptoms": [],
                     "causes": [],
                     "patterns": [],
@@ -314,7 +492,9 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
             return {
                 "tag_path": tag_path,
                 "tag_name": node.get("name") if node else tag_path,
+                "views": [x for x in record["views"] if x],
                 "equipment": [x for x in record["equipment"] if x],
+                "group": infer_tag_group(tag_path, node.get("folder_name") if node else None),
                 "symptoms": [x for x in record["symptoms"] if x],
                 "causes": [x for x in record["causes"] if x],
                 "patterns": [x for x in record["patterns"] if x],
@@ -344,7 +524,7 @@ def run_llm_triage(
             "rationale": "LLM triage unavailable; using deterministic fallback.",
             "related_entities": [
                 {"label": "Equipment", "name": e} for e in context.get("equipment", [])[:3]
-            ],
+            ] + [{"label": "View", "name": v} for v in context.get("views", [])[:2]],
         }
         if not self.llm:
             return fallback
@@ -423,9 +603,12 @@ def persist_event(
         deterministic: Dict[str, Any],
         live_sample: Dict[str, Any],
         triage: Dict[str, Any],
+        subsystem: Optional[Dict[str, str]] = None,
     ) -> Optional[Dict[str, Any]]:
         category = triage.get("category") or deterministic.get("category", "deviation")
-        dedup_sig = dedup_key(context["tag_path"], category, int(self.config.get("dedupCooldownMinutes", 10)))
+        subsystem_ref = subsystem or _subsystem_ref("global", "all")
+        dedup_source = f"{context['tag_path']}::{subsystem_ref.get('id', 'global:all')}"
+        dedup_sig = dedup_key(dedup_source, category, int(self.config.get("dedupCooldownMinutes", 10)))
         if self.is_duplicate_recent(dedup_sig):
             return None
 
@@ -452,6 +635,9 @@ def persist_event(
             "window_volatility": float(deterministic.get("window_volatility", 0.0)),
             "source_tag": context["tag_path"],
             "tag_name": context.get("tag_name") or context["tag_path"],
+            "subsystem_type": subsystem_ref.get("type"),
+            "subsystem_name": subsystem_ref.get("name"),
+            "subsystem_id": subsystem_ref.get("id"),
             "live_quality": live_sample.get("quality"),
             "live_timestamp": live_sample.get("timestamp"),
             "live_value": str(live_sample.get("value")),
@@ -493,6 +679,27 @@ def persist_event(
                     name=equipment_name,
                 )
 
+            if subsystem_ref.get("type") == "view":
+                session.run(
+                    """
+                    MATCH (e:AnomalyEvent {event_id: $event_id})
+                    MATCH (v:View {name: $name})
+                    MERGE (e)-[:SCOPED_TO]->(v)
+                    """,
+                    event_id=event_id,
+                    name=subsystem_ref.get("name"),
+                )
+            elif subsystem_ref.get("type") == "equipment":
+                session.run(
+                    """
+                    MATCH (e:AnomalyEvent {event_id: $event_id})
+                    MATCH (eq:Equipment {name: $name})
+                    MERGE (e)-[:SCOPED_TO]->(eq)
+                    """,
+                    event_id=event_id,
+                    name=subsystem_ref.get("name"),
+                )
+
             related_inputs: List[Dict[str, str]] = []
             for item in triage.get("related_entities", []) or []:
                 if isinstance(item, dict) and item.get("label") and item.get("name"):
@@ -504,7 +711,7 @@ def persist_event(
 
             for rel in related_inputs[:8]:
                 label = rel["label"]
-                if label not in {"FaultSymptom", "FaultCause", "ControlPattern", "SafetyElement", "Equipment", "ScadaTag"}:
+                if label not in {"FaultSymptom", "FaultCause", "ControlPattern", "SafetyElement", "Equipment", "ScadaTag", "View"}:
                     continue
                 session.run(
                     f"""
@@ -530,6 +737,8 @@ def _emit_persisted_event(self, persisted: Dict[str, Any]) -> None:
             "entityRefs": {
                 "tag": persisted.get("tag_name") or persisted.get("source_tag"),
                 "sourceTag": persisted.get("source_tag"),
+                "subsystemType": persisted.get("subsystem_type"),
+                "subsystemName": persisted.get("subsystem_name"),
             },
             "createdAt": persisted.get("created_at"),
         })
@@ -543,6 +752,7 @@ def emit_provider_failure_event(
         category: str = "quality-issue",
         source_tag: Optional[str] = None,
         details: Optional[Dict[str, Any]] = None,
+        subsystem: Optional[Dict[str, str]] = None,
     ) -> bool:
         """
         Persist and stream provider-health anomalies so failures appear in feed.
@@ -604,6 +814,7 @@ def emit_provider_failure_event(
                 "data_type": "provider_health",
             },
             triage=triage,
+            subsystem=subsystem,
         )
         if persisted:
             self._emit_persisted_event(persisted)
@@ -618,6 +829,10 @@ def run_cycle(self) -> Dict[str, Any]:
         metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0, "diagnostics": {}}
         thresholds = self.config.get("thresholds", {})
         min_history = int(self.config.get("minHistoryPoints", 30))
+        max_candidates_total = max(1, int(self.config.get("maxCandidatesPerCycle", 25)))
+        max_candidates_per_subsystem = max(1, int(self.config.get("maxCandidatesPerSubsystem", 8)))
+        max_triage_total = max(0, int(self.config.get("maxLlmTriagesPerCycle", 5)))
+        max_triage_per_subsystem = max(0, int(self.config.get("maxLlmTriagesPerSubsystem", 2)))
 
         if not self.api.is_configured:
             emitted = self.emit_provider_failure_event(
@@ -644,6 +859,13 @@ def run_cycle(self) -> Dict[str, Any]:
             return metrics
 
         tag_paths = [t["path"] for t in tags]
+        tag_lookup = {t["path"]: t for t in tags}
+        detected_subsystems = sorted(
+            {
+                (t.get("primary_subsystem") or _subsystem_ref("global", "all")).get("id", "global:all")
+                for t in tags
+            }
+        )
         live_values = self.api.read_tags(tag_paths)
         candidates: List[Dict[str, Any]] = []
         now = datetime.now(timezone.utc)
@@ -656,8 +878,12 @@ def run_cycle(self) -> Dict[str, Any]:
         stale_filtered_count = 0
         insufficient_history_count = 0
         low_history_candidate_count = 0
+        candidate_subsystem_counts: Dict[str, int] = {}
 
         for tv in live_values:
+            tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
+            subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
+
             if tv.error:
                 live_error_count += 1
                 if len(live_error_samples) < 5:
@@ -698,6 +924,8 @@ def run_cycle(self) -> Dict[str, Any]:
                         deterministic["reasons"] = list(deterministic.get("reasons", [])) + ["low_history_override"]
                         deterministic["history_quality"] = "low"
                         context = self.get_context(tv.path)
+                        context["subsystem"] = subsystem
+                        context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
                         candidates.append(
                             {
                                 "context": context,
@@ -709,8 +937,11 @@ def run_cycle(self) -> Dict[str, Any]:
                                     "timestamp": tv.timestamp,
                                     "data_type": tv.data_type,
                                 },
+                                "subsystem": subsystem,
                             }
                         )
+                        sub_id = subsystem.get("id", "global:all")
+                        candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
                         low_history_candidate_count += 1
                 continue
 
@@ -727,6 +958,8 @@ def run_cycle(self) -> Dict[str, Any]:
 
             if deterministic.get("candidate"):
                 context = self.get_context(tv.path)
+                context["subsystem"] = subsystem
+                context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
                 candidates.append(
                     {
                         "context": context,
@@ -738,8 +971,11 @@ def run_cycle(self) -> Dict[str, Any]:
                             "timestamp": tv.timestamp,
                             "data_type": tv.data_type,
                         },
+                        "subsystem": subsystem,
                     }
                 )
+                sub_id = subsystem.get("id", "global:all")
+                candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
 
         if live_values and live_error_count == len(live_values):
             emitted = self.emit_provider_failure_event(
@@ -806,12 +1042,28 @@ def run_cycle(self) -> Dict[str, Any]:
                 metrics["emitted"] += 1
 
         metrics["candidates"] = len(candidates)
-        max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
-        max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
-        shortlisted = candidates[:max_candidates]
-
-        for idx, candidate in enumerate(shortlisted):
-            use_llm = idx < max_triage
+        shortlisted: List[Dict[str, Any]] = []
+        selected_per_subsystem: Dict[str, int] = {}
+        for candidate in candidates:
+            subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
+            sub_id = subsystem.get("id", "global:all")
+            if selected_per_subsystem.get(sub_id, 0) >= max_candidates_per_subsystem:
+                continue
+            shortlisted.append(candidate)
+            selected_per_subsystem[sub_id] = selected_per_subsystem.get(sub_id, 0) + 1
+            if len(shortlisted) >= max_candidates_total:
+                break
+
+        llm_total = 0
+        llm_per_subsystem: Dict[str, int] = {}
+
+        for candidate in shortlisted:
+            subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
+            sub_id = subsystem.get("id", "global:all")
+            use_llm = (
+                llm_total < max_triage_total
+                and llm_per_subsystem.get(sub_id, 0) < max_triage_per_subsystem
+            )
             triage = (
                 self.run_llm_triage(
                     candidate["context"],
@@ -820,28 +1072,38 @@ def run_cycle(self) -> Dict[str, Any]:
                 )
                 if use_llm
                 else {
-                    "summary": f"Deviation on {candidate['context'].get('tag_name', candidate['context']['tag_path'])}",
+                    "summary": (
+                        f"Deviation on {candidate['context'].get('tag_name', candidate['context']['tag_path'])} "
+                        f"in subsystem {subsystem.get('name', 'all')}"
+                    ),
                     "category": candidate["deterministic"].get("category", "deviation"),
                     "severity": "medium",
                     "confidence": 0.5,
                     "verification_checks": [],
                     "probable_causes": [],
                     "safety_notes": [],
-                    "rationale": "Triaged in deterministic-only mode due per-cycle LLM cap.",
+                    "rationale": "Triaged in deterministic-only mode due per-cycle/per-subsystem LLM caps.",
                     "related_entities": [],
                 }
             )
+            if use_llm:
+                llm_total += 1
+                llm_per_subsystem[sub_id] = llm_per_subsystem.get(sub_id, 0) + 1
             metrics["triaged"] += 1
             persisted = self.persist_event(
                 candidate["context"],
                 candidate["deterministic"],
                 candidate["live_sample"],
                 triage,
+                subsystem=subsystem,
             )
             if persisted:
                 metrics["emitted"] += 1
                 self._emit_persisted_event(persisted)
 
+        top_candidates_by_subsystem = dict(
+            sorted(candidate_subsystem_counts.items(), key=lambda item: item[1], reverse=True)[:10]
+        )
         metrics["diagnostics"] = {
             "monitoredTags": len(tag_paths),
             "validLiveCount": valid_live_count,
@@ -851,6 +1113,13 @@ def run_cycle(self) -> Dict[str, Any]:
             "historyErrorCount": history_error_count,
             "insufficientHistoryCount": insufficient_history_count,
             "lowHistoryCandidateCount": low_history_candidate_count,
+            "detectedSubsystemCount": len(detected_subsystems),
+            "detectedSubsystems": detected_subsystems[:10],
+            "candidateSubsystemCount": len(candidate_subsystem_counts),
+            "candidateBySubsystem": top_candidates_by_subsystem,
+            "maxCandidatesPerSubsystem": max_candidates_per_subsystem,
+            "maxLlmTriagesPerSubsystem": max_triage_per_subsystem,
+            "llmTriagedCount": llm_total,
         }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics
diff --git a/tests/unit/test_anomaly_monitor_subsystems.py b/tests/unit/test_anomaly_monitor_subsystems.py
new file mode 100644
index 0000000..4fb807c
--- /dev/null
+++ b/tests/unit/test_anomaly_monitor_subsystems.py
@@ -0,0 +1,56 @@
+from anomaly_monitor import derive_subsystems_for_tag, infer_tag_group
+
+
+def test_infer_tag_group_prefers_folder_name():
+    group = infer_tag_group("[default]Area1/Pump101/Speed", folder_name="LineA/Area1")
+    assert group == "LineA"
+
+
+def test_infer_tag_group_from_tag_path():
+    group = infer_tag_group("[default]Boiler/Feedwater/Flow")
+    assert group == "Boiler"
+
+
+def test_infer_tag_group_none_for_flat_tag():
+    assert infer_tag_group("[default]SingleTag") is None
+
+
+def test_derive_subsystems_auto_with_priority():
+    subsystems, primary = derive_subsystems_for_tag(
+        tag_meta={
+            "path": "[default]Line1/PumpA/Pressure",
+            "folder_name": "Line1/PumpA",
+            "views": ["Overview/Main"],
+            "equipment": ["PumpA"],
+        },
+        subsystem_mode="auto",
+        priority=["equipment", "view", "group"],
+    )
+    subsystem_ids = {item["id"] for item in subsystems}
+    assert "equipment:pumpa" in subsystem_ids
+    assert "view:overview/main" in subsystem_ids
+    assert "group:line1" in subsystem_ids
+    assert primary["type"] == "equipment"
+    assert primary["name"] == "PumpA"
+
+
+def test_derive_subsystems_global_mode():
+    subsystems, primary = derive_subsystems_for_tag(
+        tag_meta={
+            "path": "[default]Line1/PumpA/Pressure",
+            "views": ["Overview/Main"],
+            "equipment": ["PumpA"],
+        },
+        subsystem_mode="global",
+    )
+    assert subsystems == [{"type": "global", "name": "all", "id": "global:all"}]
+    assert primary == {"type": "global", "name": "all", "id": "global:all"}
+
+
+def test_derive_subsystems_falls_back_to_global_when_no_ontology_links():
+    subsystems, primary = derive_subsystems_for_tag(
+        tag_meta={"path": "[default]TagOnly"},
+        subsystem_mode="auto",
+    )
+    assert len(subsystems) == 1
+    assert primary["id"] == "global:all"

From 53d5a4787bb3760658a1e0317db90a12f05e3ba1 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:43:00 +0000
Subject: [PATCH 06/18] Add subsystem anomaly diagnostics for tag gating

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py | 172 +++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index beaaf86..32cb492 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -860,6 +860,10 @@ def run_cycle(self) -> Dict[str, Any]:
 
         tag_paths = [t["path"] for t in tags]
         tag_lookup = {t["path"]: t for t in tags}
+        linked_tag_count = sum(
+            1 for t in tags if (t.get("views") or t.get("equipment"))
+        )
+        unlinked_tag_count = max(0, len(tags) - linked_tag_count)
         detected_subsystems = sorted(
             {
                 (t.get("primary_subsystem") or _subsystem_ref("global", "all")).get("id", "global:all")
@@ -879,13 +883,64 @@ def run_cycle(self) -> Dict[str, Any]:
         insufficient_history_count = 0
         low_history_candidate_count = 0
         candidate_subsystem_counts: Dict[str, int] = {}
+        live_error_linked = 0
+        live_error_unlinked = 0
+        history_error_linked = 0
+        history_error_unlinked = 0
+        quality_filtered_linked = 0
+        quality_filtered_unlinked = 0
+        stale_filtered_linked = 0
+        stale_filtered_unlinked = 0
+        evaluated_linked = 0
+        evaluated_unlinked = 0
+        candidate_linked = 0
+        candidate_unlinked = 0
+        near_shift_count = 0
+        near_shift_linked = 0
+        near_shift_unlinked = 0
+        subsystem_shift_signals: Dict[str, Dict[str, Any]] = {}
+
+        def _update_subsystem_signal(
+            subsystem_ref: Dict[str, str], deterministic: Dict[str, Any], tag_path: str
+        ) -> None:
+            sub_id = subsystem_ref.get("id", "global:all")
+            abs_z = abs(float(deterministic.get("z_score", 0.0)))
+            z = float(deterministic.get("z_score", 0.0))
+            bucket = subsystem_shift_signals.setdefault(
+                sub_id,
+                {
+                    "subsystemId": sub_id,
+                    "subsystemType": subsystem_ref.get("type", "global"),
+                    "subsystemName": subsystem_ref.get("name", "all"),
+                    "evaluated": 0,
+                    "candidate": 0,
+                    "nearShift": 0,
+                    "sumAbsZ": 0.0,
+                    "sumZ": 0.0,
+                    "maxAbsZ": 0.0,
+                    "sampleTag": tag_path,
+                },
+            )
+            bucket["evaluated"] += 1
+            bucket["sumAbsZ"] += abs_z
+            bucket["sumZ"] += z
+            if abs_z >= 1.5:
+                bucket["nearShift"] += 1
+            if abs_z > bucket["maxAbsZ"]:
+                bucket["maxAbsZ"] = abs_z
+                bucket["sampleTag"] = tag_path
 
         for tv in live_values:
             tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
             subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
+            is_linked = bool(tag_meta.get("views") or tag_meta.get("equipment"))
 
             if tv.error:
                 live_error_count += 1
+                if is_linked:
+                    live_error_linked += 1
+                else:
+                    live_error_unlinked += 1
                 if len(live_error_samples) < 5:
                     live_error_samples.append(f"{tv.path}: {tv.error}")
                 continue
@@ -893,14 +948,26 @@ def run_cycle(self) -> Dict[str, Any]:
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
                 quality_filtered_count += 1
+                if is_linked:
+                    quality_filtered_linked += 1
+                else:
+                    quality_filtered_unlinked += 1
                 continue
             if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
                 stale_filtered_count += 1
+                if is_linked:
+                    stale_filtered_linked += 1
+                else:
+                    stale_filtered_unlinked += 1
                 continue
 
             history, history_error = self.fetch_history_values(tv.path)
             if history_error:
                 history_error_count += 1
+                if is_linked:
+                    history_error_linked += 1
+                else:
+                    history_error_unlinked += 1
                 if len(history_error_samples) < 5:
                     history_error_samples.append(f"{tv.path}: {history_error}")
                 continue
@@ -920,7 +987,39 @@ def run_cycle(self) -> Dict[str, Any]:
                     if curr_num is not None:
                         self._prev_values[tv.path] = curr_num
 
+                    _update_subsystem_signal(subsystem, deterministic, tv.path)
+                    if is_linked:
+                        evaluated_linked += 1
+                    else:
+                        evaluated_unlinked += 1
+                    if abs(float(deterministic.get("z_score", 0.0))) >= 1.5:
+                        near_shift_count += 1
+                        if is_linked:
+                            near_shift_linked += 1
+                        else:
+                            near_shift_unlinked += 1
+
                     if deterministic.get("candidate"):
+                        sub_bucket = subsystem_shift_signals.setdefault(
+                            subsystem.get("id", "global:all"),
+                            {
+                                "subsystemId": subsystem.get("id", "global:all"),
+                                "subsystemType": subsystem.get("type", "global"),
+                                "subsystemName": subsystem.get("name", "all"),
+                                "evaluated": 0,
+                                "candidate": 0,
+                                "nearShift": 0,
+                                "sumAbsZ": 0.0,
+                                "sumZ": 0.0,
+                                "maxAbsZ": 0.0,
+                                "sampleTag": tv.path,
+                            },
+                        )
+                        sub_bucket["candidate"] += 1
+                        if is_linked:
+                            candidate_linked += 1
+                        else:
+                            candidate_unlinked += 1
                         deterministic["reasons"] = list(deterministic.get("reasons", [])) + ["low_history_override"]
                         deterministic["history_quality"] = "low"
                         context = self.get_context(tv.path)
@@ -956,7 +1055,39 @@ def run_cycle(self) -> Dict[str, Any]:
             if curr_num is not None:
                 self._prev_values[tv.path] = curr_num
 
+            _update_subsystem_signal(subsystem, deterministic, tv.path)
+            if is_linked:
+                evaluated_linked += 1
+            else:
+                evaluated_unlinked += 1
+            if abs(float(deterministic.get("z_score", 0.0))) >= 1.5:
+                near_shift_count += 1
+                if is_linked:
+                    near_shift_linked += 1
+                else:
+                    near_shift_unlinked += 1
+
             if deterministic.get("candidate"):
+                sub_bucket = subsystem_shift_signals.setdefault(
+                    subsystem.get("id", "global:all"),
+                    {
+                        "subsystemId": subsystem.get("id", "global:all"),
+                        "subsystemType": subsystem.get("type", "global"),
+                        "subsystemName": subsystem.get("name", "all"),
+                        "evaluated": 0,
+                        "candidate": 0,
+                        "nearShift": 0,
+                        "sumAbsZ": 0.0,
+                        "sumZ": 0.0,
+                        "maxAbsZ": 0.0,
+                        "sampleTag": tv.path,
+                    },
+                )
+                sub_bucket["candidate"] += 1
+                if is_linked:
+                    candidate_linked += 1
+                else:
+                    candidate_unlinked += 1
                 context = self.get_context(tv.path)
                 context["subsystem"] = subsystem
                 context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
@@ -1056,6 +1187,7 @@ def run_cycle(self) -> Dict[str, Any]:
 
         llm_total = 0
         llm_per_subsystem: Dict[str, int] = {}
+        dedup_suppressed_count = 0
 
         for candidate in shortlisted:
             subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
@@ -1100,26 +1232,66 @@ def run_cycle(self) -> Dict[str, Any]:
             if persisted:
                 metrics["emitted"] += 1
                 self._emit_persisted_event(persisted)
+            else:
+                dedup_suppressed_count += 1
 
         top_candidates_by_subsystem = dict(
             sorted(candidate_subsystem_counts.items(), key=lambda item: item[1], reverse=True)[:10]
         )
+        top_shift_signals = sorted(
+            subsystem_shift_signals.values(),
+            key=lambda item: (
+                int(item.get("candidate", 0)),
+                float(item.get("maxAbsZ", 0.0)),
+                int(item.get("nearShift", 0)),
+                int(item.get("evaluated", 0)),
+            ),
+            reverse=True,
+        )[:8]
+        for item in top_shift_signals:
+            evaluated = max(1, int(item.get("evaluated", 0)))
+            item["avgAbsZ"] = round(float(item.get("sumAbsZ", 0.0)) / evaluated, 3)
+            item["avgZ"] = round(float(item.get("sumZ", 0.0)) / evaluated, 3)
+            item["shiftRatio"] = round(float(item.get("nearShift", 0)) / evaluated, 3)
+            item["candidateRatio"] = round(float(item.get("candidate", 0)) / evaluated, 3)
+            item.pop("sumAbsZ", None)
+            item.pop("sumZ", None)
+
         metrics["diagnostics"] = {
             "monitoredTags": len(tag_paths),
+            "linkedTags": linked_tag_count,
+            "unlinkedTags": unlinked_tag_count,
             "validLiveCount": valid_live_count,
             "liveErrorCount": live_error_count,
+            "liveErrorLinked": live_error_linked,
+            "liveErrorUnlinked": live_error_unlinked,
             "qualityFilteredCount": quality_filtered_count,
+            "qualityFilteredLinked": quality_filtered_linked,
+            "qualityFilteredUnlinked": quality_filtered_unlinked,
             "staleFilteredCount": stale_filtered_count,
+            "staleFilteredLinked": stale_filtered_linked,
+            "staleFilteredUnlinked": stale_filtered_unlinked,
             "historyErrorCount": history_error_count,
+            "historyErrorLinked": history_error_linked,
+            "historyErrorUnlinked": history_error_unlinked,
             "insufficientHistoryCount": insufficient_history_count,
             "lowHistoryCandidateCount": low_history_candidate_count,
+            "evaluatedLinked": evaluated_linked,
+            "evaluatedUnlinked": evaluated_unlinked,
+            "candidateLinked": candidate_linked,
+            "candidateUnlinked": candidate_unlinked,
+            "nearShiftCount": near_shift_count,
+            "nearShiftLinked": near_shift_linked,
+            "nearShiftUnlinked": near_shift_unlinked,
             "detectedSubsystemCount": len(detected_subsystems),
             "detectedSubsystems": detected_subsystems[:10],
             "candidateSubsystemCount": len(candidate_subsystem_counts),
             "candidateBySubsystem": top_candidates_by_subsystem,
+            "subsystemShiftSignals": top_shift_signals,
             "maxCandidatesPerSubsystem": max_candidates_per_subsystem,
             "maxLlmTriagesPerSubsystem": max_triage_per_subsystem,
             "llmTriagedCount": llm_total,
+            "dedupSuppressedCount": dedup_suppressed_count,
         }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics

From 5db2eb8080f6699f359d76af6c723a7dfc38b2a8 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:47:00 +0000
Subject: [PATCH 07/18] Fix fcose startup deps and surface agent diagnostics

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/index.html  | 4 +++-
 electron-ui/renderer.js | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/electron-ui/index.html b/electron-ui/index.html
index 99ba9a1..2c43657 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -3,7 +3,7 @@
 <head>
   <meta charset="UTF-8">
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <meta http-equiv="Content-Security-Policy" content="default-src 'self'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; script-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://unpkg.com;">
+  <meta http-equiv="Content-Security-Policy" content="default-src 'self'; img-src 'self' data: https:; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; script-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://unpkg.com;">
   <title>Axilon</title>
   <link rel="stylesheet" href="styles.css">
 </head>
@@ -1521,6 +1521,8 @@ <h3 id="graph-modal-title">Graph: Node</h3>
   <script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.28.1/cytoscape.min.js"></script>
   <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
   <script src="https://unpkg.com/cytoscape-dagre@2.5.0/cytoscape-dagre.js"></script>
+  <script src="https://unpkg.com/layout-base@2.0.1/layout-base.js"></script>
+  <script src="https://unpkg.com/cose-base@2.2.0/cose-base.js"></script>
   <script src="https://unpkg.com/cytoscape-fcose@2.2.0/cytoscape-fcose.js"></script>
   
   <script src="graph-renderer.js"></script>
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 042666c..fa0009f 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3905,7 +3905,7 @@ function ensureAgentListeners() {
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
     if (payload.diagnostics) {
-      console.debug('[Agents diagnostics]', payload.diagnostics);
+      console.info('[Agents diagnostics]', payload.diagnostics);
     }
   });
 

From 730676d99c545cdbb6e963067b9180d9f7dd8b49 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:53:30 +0000
Subject: [PATCH 08/18] Prioritize view-bound live tag paths for monitoring

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js                           |   1 +
 electron-ui/renderer.js                       |   1 +
 scripts/anomaly_monitor.py                    | 177 +++++++++++++++---
 tests/unit/test_anomaly_monitor_subsystems.py |  20 +-
 4 files changed, 177 insertions(+), 22 deletions(-)

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 5ebe28b..5f5babb 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -228,6 +228,7 @@ function normalizeAgentConfig(config = {}) {
         ? scope.subsystemPriority.map(String)
         : ['view', 'equipment', 'group', 'global'],
       subsystemInclude: Array.isArray(scope.subsystemInclude) ? scope.subsystemInclude.map(String) : [],
+      includeUnlinkedTags: Boolean(scope.includeUnlinkedTags),
     },
   };
 }
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index fa0009f..b597326 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3594,6 +3594,7 @@ function getAgentsConfigFromUI() {
     scope: {
       subsystemMode: 'auto',
       subsystemPriority: ['view', 'equipment', 'group', 'global'],
+      includeUnlinkedTags: false,
     },
   };
 }
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 32cb492..58b0720 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -104,6 +104,31 @@ def infer_tag_group(tag_path: Optional[str], folder_name: Optional[str] = None)
     return parts[0]
 
 
+def _last_segment_from_tag_path(tag_path: Optional[str]) -> str:
+    raw = str(tag_path or "").strip()
+    if not raw:
+        return ""
+    if raw.startswith("[") and "]" in raw:
+        raw = raw.split("]", 1)[1]
+    raw = raw.strip("/")
+    if not raw:
+        return ""
+    parts = [p.strip() for p in raw.split("/") if p.strip()]
+    return parts[-1] if parts else raw
+
+
+def _looks_like_live_tag_path(value: Optional[str]) -> bool:
+    path = str(value or "").strip()
+    if not path:
+        return False
+    # Typical Ignition path shape: [provider]Folder/Tag or Folder/Tag
+    if path.startswith("[") and "]" in path:
+        return True
+    if "/" in path and not any(ch in path for ch in "{}()"):
+        return True
+    return False
+
+
 def derive_subsystems_for_tag(
     tag_meta: Dict[str, Any],
     subsystem_mode: str = "auto",
@@ -171,6 +196,7 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
             "subsystemMode": "auto",
             "subsystemPriority": list(DEFAULT_SUBSYSTEM_PRIORITY),
             "subsystemInclude": [],
+            "includeUnlinkedTags": False,
         },
         "thresholds": {
             "z": 3.0,
@@ -207,6 +233,7 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         for x in scope_cfg.get("subsystemInclude", [])
         if str(x).strip()
     ]
+    scope_cfg["includeUnlinkedTags"] = bool(scope_cfg.get("includeUnlinkedTags", False))
     return cfg
 
 
@@ -316,36 +343,118 @@ def get_monitored_tags(self) -> List[Dict[str, Any]]:
             for x in (scope.get("subsystemInclude") or [])
             if str(x).strip()
         }
+        include_unlinked = bool(scope.get("includeUnlinkedTags", False))
+        tag_map: Dict[str, Dict[str, Any]] = {}
+
+        def upsert_tag(
+            *,
+            tag_path: str,
+            tag_name: str,
+            folder_name: str = "",
+            views: Optional[List[str]] = None,
+            equipment: Optional[List[str]] = None,
+            source: str = "unknown",
+        ) -> None:
+            path = str(tag_path or "").strip()
+            if not path:
+                return
+            entry = tag_map.setdefault(
+                path,
+                {
+                    "path": path,
+                    "name": str(tag_name or _last_segment_from_tag_path(path) or path),
+                    "folder_name": str(folder_name or ""),
+                    "views": [],
+                    "equipment": [],
+                    "source": source,
+                    "bound_to_view": False,
+                },
+            )
+            if source == "view_binding":
+                entry["bound_to_view"] = True
+                entry["source"] = source
+            if folder_name and not entry.get("folder_name"):
+                entry["folder_name"] = str(folder_name)
+            if tag_name and (
+                not entry.get("name")
+                or entry.get("name") == entry.get("path")
+                or entry.get("name") == _last_segment_from_tag_path(entry.get("path"))
+            ):
+                entry["name"] = str(tag_name)
+            for view_name in views or []:
+                v = str(view_name or "").strip()
+                if v and v not in entry["views"]:
+                    entry["views"].append(v)
+            for eq_name in equipment or []:
+                eq = str(eq_name or "").strip()
+                if eq and eq not in entry["equipment"]:
+                    entry["equipment"].append(eq)
 
         with self.graph.session() as session:
-            result = session.run(
+            bound_result = session.run(
+                """
+                MATCH (v:View)-[:HAS_COMPONENT]->(c:ViewComponent)-[r:BINDS_TO]->(n)
+                WHERE r.tag_path IS NOT NULL
+                  AND trim(r.tag_path) <> ''
+                  AND toLower(coalesce(r.binding_type, 'tag')) = 'tag'
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(n)
+                RETURN DISTINCT trim(r.tag_path) AS tag_path,
+                                coalesce(n.name, '') AS tag_name,
+                                collect(DISTINCT v.name) AS views,
+                                collect(DISTINCT eq.name) AS equipment
+                LIMIT $limit
+                """,
+                limit=max_tags * 4,
+            )
+            for r in bound_result:
+                path = str(r["tag_path"] or "").strip()
+                if not _looks_like_live_tag_path(path):
+                    continue
+                upsert_tag(
+                    tag_path=path,
+                    tag_name=str(r["tag_name"] or _last_segment_from_tag_path(path)),
+                    folder_name=infer_tag_group(path) or "",
+                    views=[x for x in (r["views"] or []) if x],
+                    equipment=[x for x in (r["equipment"] or []) if x],
+                    source="view_binding",
+                )
+
+            scada_result = session.run(
                 """
                 MATCH (t:ScadaTag)
-                WHERE coalesce(t.opc_item_path, t.name) IS NOT NULL
-                  AND coalesce(t.opc_item_path, t.name) <> ''
+                WHERE t.opc_item_path IS NOT NULL
+                  AND trim(t.opc_item_path) <> ''
                 OPTIONAL MATCH (c:ViewComponent)-[:BINDS_TO]->(t)
                 OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(c)
                 OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
-                RETURN DISTINCT coalesce(t.opc_item_path, t.name) AS tag_path,
+                RETURN DISTINCT trim(t.opc_item_path) AS tag_path,
                                 coalesce(t.name, t.opc_item_path) AS tag_name,
                                 coalesce(t.folder_name, '') AS folder_name,
                                 collect(DISTINCT v.name) AS views,
                                 collect(DISTINCT eq.name) AS equipment
                 LIMIT $limit
                 """,
-                limit=max_tags * 3,
+                limit=max_tags * 6,
             )
-            tags = [
-                {
-                    "path": r["tag_path"],
-                    "name": r["tag_name"],
-                    "folder_name": r["folder_name"] or "",
-                    "views": [x for x in (r["views"] or []) if x],
-                    "equipment": [x for x in (r["equipment"] or []) if x],
-                }
-                for r in result
-                if r["tag_path"]
-            ]
+            for r in scada_result:
+                path = str(r["tag_path"] or "").strip()
+                if not _looks_like_live_tag_path(path):
+                    continue
+                upsert_tag(
+                    tag_path=path,
+                    tag_name=str(r["tag_name"] or _last_segment_from_tag_path(path)),
+                    folder_name=str(r["folder_name"] or ""),
+                    views=[x for x in (r["views"] or []) if x],
+                    equipment=[x for x in (r["equipment"] or []) if x],
+                    source="scada_tag",
+                )
+
+        tags = list(tag_map.values())
+
+        if not include_unlinked:
+            linked = [t for t in tags if (t.get("views") or t.get("equipment") or t.get("bound_to_view"))]
+            if linked:
+                tags = linked
 
         if tag_regex:
             import re
@@ -369,6 +478,14 @@ def get_monitored_tags(self) -> List[Dict[str, Any]]:
                 or any(str(eq).strip().lower() in equipment_tags for eq in t.get("equipment", []))
             ]
 
+        tags.sort(
+            key=lambda t: (
+                0 if t.get("bound_to_view") else 1,
+                0 if (t.get("views") or t.get("equipment")) else 1,
+                str(t.get("path", "")),
+            )
+        )
+
         for tag in tags:
             subsystems, primary = derive_subsystems_for_tag(
                 tag_meta=tag,
@@ -477,11 +594,29 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
                 tag=tag_path,
             )
             record = result.single()
+            fallback_views: List[str] = []
+            fallback_equipment: List[str] = []
+            fallback_result = session.run(
+                """
+                MATCH (v:View)-[:HAS_COMPONENT]->(vc:ViewComponent)-[r:BINDS_TO]->(n)
+                WHERE r.tag_path = $tag
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(n)
+                RETURN collect(DISTINCT v.name) AS views,
+                       collect(DISTINCT eq.name) AS equipment
+                LIMIT 1
+                """,
+                tag=tag_path,
+            ).single()
+            if fallback_result:
+                fallback_views = [x for x in (fallback_result["views"] or []) if x]
+                fallback_equipment = [x for x in (fallback_result["equipment"] or []) if x]
+
             if not record:
                 return {
                     "tag_path": tag_path,
-                    "views": [],
-                    "equipment": [],
+                    "tag_name": _last_segment_from_tag_path(tag_path) or tag_path,
+                    "views": fallback_views,
+                    "equipment": fallback_equipment,
                     "group": infer_tag_group(tag_path),
                     "symptoms": [],
                     "causes": [],
@@ -491,9 +626,9 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
             node = record["t"]
             return {
                 "tag_path": tag_path,
-                "tag_name": node.get("name") if node else tag_path,
-                "views": [x for x in record["views"] if x],
-                "equipment": [x for x in record["equipment"] if x],
+                "tag_name": node.get("name") if node else (_last_segment_from_tag_path(tag_path) or tag_path),
+                "views": sorted(set([x for x in record["views"] if x] + fallback_views)),
+                "equipment": sorted(set([x for x in record["equipment"] if x] + fallback_equipment)),
                 "group": infer_tag_group(tag_path, node.get("folder_name") if node else None),
                 "symptoms": [x for x in record["symptoms"] if x],
                 "causes": [x for x in record["causes"] if x],
diff --git a/tests/unit/test_anomaly_monitor_subsystems.py b/tests/unit/test_anomaly_monitor_subsystems.py
index 4fb807c..83795ce 100644
--- a/tests/unit/test_anomaly_monitor_subsystems.py
+++ b/tests/unit/test_anomaly_monitor_subsystems.py
@@ -1,4 +1,9 @@
-from anomaly_monitor import derive_subsystems_for_tag, infer_tag_group
+from anomaly_monitor import (
+    _last_segment_from_tag_path,
+    _looks_like_live_tag_path,
+    derive_subsystems_for_tag,
+    infer_tag_group,
+)
 
 
 def test_infer_tag_group_prefers_folder_name():
@@ -54,3 +59,16 @@ def test_derive_subsystems_falls_back_to_global_when_no_ontology_links():
     )
     assert len(subsystems) == 1
     assert primary["id"] == "global:all"
+
+
+def test_tag_path_helpers_identify_live_paths():
+    assert _looks_like_live_tag_path("[default]Line/Pump/Speed")
+    assert _looks_like_live_tag_path("Line/Pump/Speed")
+    assert not _looks_like_live_tag_path("SimpleTagNameOnly")
+    assert not _looks_like_live_tag_path("{../props.value}")
+
+
+def test_last_segment_from_tag_path():
+    assert _last_segment_from_tag_path("[default]Line/Pump/Speed") == "Speed"
+    assert _last_segment_from_tag_path("Line/Pump/Speed") == "Speed"
+    assert _last_segment_from_tag_path("Speed") == "Speed"

From 8d529c015bd9928525f969d8c5050041dd34e556 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:01:29 +0000
Subject: [PATCH 09/18] Add staleness/tool-call diagnostics and shutdown-safe
 IPC

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js              | 98 +++++++++++++++++++++-----------
 electron-ui/renderer.js          |  6 ++
 scripts/anomaly_monitor.py       | 53 ++++++++++++++++-
 scripts/anomaly_rules.py         | 13 ++++-
 tests/unit/test_anomaly_rules.py | 16 +++++-
 5 files changed, 150 insertions(+), 36 deletions(-)

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 5f5babb..9f4f350 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -5,6 +5,7 @@ const { spawn } = require('child_process');
 
 let mainWindow;
 let activeAgentRun = null;
+let isAppShuttingDown = false;
 
 // ---------------------------------------------------------------------------
 // Python backend configuration  (works in both dev and packaged modes)
@@ -89,6 +90,10 @@ function createWindow() {
   });
 
   mainWindow.loadFile('index.html');
+
+  mainWindow.on('closed', () => {
+    mainWindow = null;
+  });
   
   // Open DevTools in development
   if (process.argv.includes('--dev')) {
@@ -105,11 +110,15 @@ app.on('window-all-closed', () => {
 });
 
 app.on('before-quit', () => {
+  isAppShuttingDown = true;
+  console.info('[Shutdown] before-quit triggered');
   if (activeAgentRun && activeAgentRun.process && !activeAgentRun.process.killed) {
     try {
+      console.info(`[Shutdown] Stopping active agent run ${activeAgentRun.runId}`);
       activeAgentRun.process.kill('SIGTERM');
     } catch (err) {
       // Ignore termination errors during shutdown.
+      console.warn('[Shutdown] Failed to terminate active agent process:', err.message);
     }
   }
 });
@@ -135,27 +144,27 @@ function runPythonScript(scriptName, args = [], options = {}) {
       stdout += text;
       
       // Send streaming output to renderer if enabled
-      if (streaming && mainWindow) {
+      if (streaming) {
         // Parse and emit tool calls separately
         const lines = text.split('\n');
         for (const line of lines) {
           if (line.startsWith('[TOOL]')) {
-            mainWindow.webContents.send('tool-call', {
+            sendToRenderer('tool-call', {
               streamId,
               tool: line.replace('[TOOL]', '').trim()
-            });
+            }, 'runPythonScript stdout tool');
           } else if (line.startsWith('[DEBUG]')) {
-            mainWindow.webContents.send('stream-output', {
+            sendToRenderer('stream-output', {
               streamId,
               text: line,
               type: 'debug'
-            });
+            }, 'runPythonScript stdout debug');
           } else if (line.trim()) {
-            mainWindow.webContents.send('stream-output', {
+            sendToRenderer('stream-output', {
               streamId,
               text: line,
               type: 'output'
-            });
+            }, 'runPythonScript stdout output');
           }
         }
       }
@@ -166,21 +175,21 @@ function runPythonScript(scriptName, args = [], options = {}) {
       stderr += text;
       
       // Stream stderr too (useful for verbose output)
-      if (streaming && mainWindow) {
-        mainWindow.webContents.send('stream-output', {
+      if (streaming) {
+        sendToRenderer('stream-output', {
           streamId,
           text,
           type: 'stderr'
-        });
+        }, 'runPythonScript stderr');
       }
     });
 
     pythonProcess.on('close', (code) => {
-      if (streaming && mainWindow) {
-        mainWindow.webContents.send('stream-complete', {
+      if (streaming) {
+        sendToRenderer('stream-complete', {
           streamId,
           success: code === 0
-        });
+        }, 'runPythonScript close');
       }
       
       if (code === 0) {
@@ -233,12 +242,35 @@ function normalizeAgentConfig(config = {}) {
   };
 }
 
-function routeAgentMessage(channel, payload) {
-  if (mainWindow) {
+function canSendToRenderer() {
+  if (!mainWindow) return false;
+  if (typeof mainWindow.isDestroyed === 'function' && mainWindow.isDestroyed()) return false;
+  const wc = mainWindow.webContents;
+  if (!wc) return false;
+  if (typeof wc.isDestroyed === 'function' && wc.isDestroyed()) return false;
+  return true;
+}
+
+function sendToRenderer(channel, payload, context = '') {
+  if (!canSendToRenderer()) {
+    if (isAppShuttingDown) {
+      console.info(`[Shutdown] Dropped renderer message ${channel}${context ? ` (${context})` : ''}`);
+    }
+    return false;
+  }
+  try {
     mainWindow.webContents.send(channel, payload);
+    return true;
+  } catch (err) {
+    console.warn(`[IPC] Failed sending ${channel}${context ? ` (${context})` : ''}: ${err.message}`);
+    return false;
   }
 }
 
+function routeAgentMessage(channel, payload) {
+  sendToRenderer(channel, payload, 'agent-stream');
+}
+
 function parseAgentLine(line) {
   const trimmed = (line || '').trim();
   if (!trimmed) return null;
@@ -566,22 +598,22 @@ ipcMain.handle('troubleshoot', async (event, question, history) => {
         stderr += text;
         
         // Stream tool calls, debug info, and Claude response from stderr to frontend
-        if (mainWindow) {
+        if (canSendToRenderer()) {
           // Check for special prefixes first (they appear on their own lines)
           if (text.includes('[TOOL]') || text.includes('[DEBUG]') || text.includes('[INFO]')) {
             const lines = text.split('\n');
             for (const line of lines) {
               if (line.startsWith('[TOOL]')) {
-                mainWindow.webContents.send('tool-call', {
+                sendToRenderer('tool-call', {
                   streamId,
                   tool: line.replace('[TOOL]', '').trim()
-                });
+                }, 'troubleshoot stderr tool');
               } else if (line.startsWith('[DEBUG]') || line.startsWith('[INFO]')) {
-                mainWindow.webContents.send('stream-output', {
+                sendToRenderer('stream-output', {
                   streamId,
                   text: line,
                   type: 'debug'
-                });
+                }, 'troubleshoot stderr debug');
               }
             }
           } else if (text.includes('[STREAM]')) {
@@ -589,29 +621,29 @@ ipcMain.handle('troubleshoot', async (event, question, history) => {
             const streamStart = text.indexOf('[STREAM]');
             const afterStream = text.substring(streamStart + 8); // 8 = length of '[STREAM]'
             if (afterStream) {
-              mainWindow.webContents.send('stream-output', {
+              sendToRenderer('stream-output', {
                 streamId,
                 text: afterStream,
                 type: 'claude-stream'
-              });
+              }, 'troubleshoot stderr stream-start');
             }
           } else if (text && !text.startsWith('[')) {
             // Continuation of Claude streaming (no prefix)
-            mainWindow.webContents.send('stream-output', {
+            sendToRenderer('stream-output', {
               streamId,
               text: text,
               type: 'claude-stream'
-            });
+            }, 'troubleshoot stderr stream-cont');
           }
         }
       });
       
       proc.on('close', (code) => {
-        if (mainWindow) {
-          mainWindow.webContents.send('stream-complete', {
+        if (canSendToRenderer()) {
+          sendToRenderer('stream-complete', {
             streamId,
             success: code === 0
-          });
+          }, 'troubleshoot close');
         }
         
         if (code === 0) {
@@ -1164,25 +1196,25 @@ ipcMain.handle('graph:ai-propose', async (event, description) => {
         stderr += text;
         
         // Stream tool calls to frontend
-        if (mainWindow && text.includes('[TOOL]')) {
+        if (canSendToRenderer() && text.includes('[TOOL]')) {
           const lines = text.split('\n');
           for (const line of lines) {
             if (line.startsWith('[TOOL]')) {
-              mainWindow.webContents.send('tool-call', {
+              sendToRenderer('tool-call', {
                 streamId,
                 tool: line.replace('[TOOL]', '').trim()
-              });
+              }, 'ai-propose stderr tool');
             }
           }
         }
       });
       
       proc.on('close', (code) => {
-        if (mainWindow) {
-          mainWindow.webContents.send('stream-complete', {
+        if (canSendToRenderer()) {
+          sendToRenderer('stream-complete', {
             streamId,
             success: code === 0
-          });
+          }, 'ai-propose close');
         }
         
         if (code === 0) {
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index b597326..4e85c2b 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3907,6 +3907,12 @@ function ensureAgentListeners() {
     updateAgentMetrics(payload, payload.timestamp);
     if (payload.diagnostics) {
       console.info('[Agents diagnostics]', payload.diagnostics);
+      if (Array.isArray(payload.diagnostics.toolCalls) && payload.diagnostics.toolCalls.length) {
+        console.info('[Agents tool calls]', payload.diagnostics.toolCalls);
+      }
+      if (Array.isArray(payload.diagnostics.staleSamples) && payload.diagnostics.staleSamples.length) {
+        console.info('[Agents stale samples]', payload.diagnostics.staleSamples);
+      }
     }
   });
 
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 58b0720..efad1d0 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -1006,6 +1006,28 @@ def run_cycle(self) -> Dict[str, Any]:
             }
         )
         live_values = self.api.read_tags(tag_paths)
+        tool_calls: List[Dict[str, Any]] = []
+        tool_calls.append({
+            "tool": "read_tags",
+            "request": {
+                "count": len(tag_paths),
+                "samplePaths": tag_paths[:8],
+            },
+            "result": {
+                "count": len(live_values),
+                "errorCount": sum(1 for tv in live_values if tv.error),
+                "qualityGoodCount": sum(1 for tv in live_values if is_quality_good(tv.quality)),
+                "sample": [
+                    {
+                        "path": tv.path,
+                        "quality": tv.quality,
+                        "timestamp": tv.timestamp,
+                        "error": tv.error,
+                    }
+                    for tv in live_values[:5]
+                ],
+            },
+        })
         candidates: List[Dict[str, Any]] = []
         now = datetime.now(timezone.utc)
         live_error_count = 0
@@ -1033,6 +1055,7 @@ def run_cycle(self) -> Dict[str, Any]:
         near_shift_count = 0
         near_shift_linked = 0
         near_shift_unlinked = 0
+        stale_samples: List[Dict[str, Any]] = []
         subsystem_shift_signals: Dict[str, Dict[str, Any]] = {}
 
         def _update_subsystem_signal(
@@ -1088,15 +1111,39 @@ def _update_subsystem_signal(
                 else:
                     quality_filtered_unlinked += 1
                 continue
-            if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
+            parsed_ts = parse_timestamp(tv.timestamp)
+            age_sec = (now - parsed_ts).total_seconds() if parsed_ts is not None else None
+            stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
+            if is_stale(tv.timestamp, stale_threshold_sec, now=now):
                 stale_filtered_count += 1
                 if is_linked:
                     stale_filtered_linked += 1
                 else:
                     stale_filtered_unlinked += 1
+                if len(stale_samples) < 8:
+                    stale_samples.append({
+                        "path": tv.path,
+                        "timestampRaw": tv.timestamp,
+                        "timestampParsedUtc": parsed_ts.isoformat() if parsed_ts else None,
+                        "ageSec": round(age_sec, 3) if age_sec is not None else None,
+                        "thresholdSec": stale_threshold_sec,
+                        "reason": "timestamp_parse_failed" if parsed_ts is None else "age_exceeds_threshold",
+                    })
                 continue
 
             history, history_error = self.fetch_history_values(tv.path)
+            if len(tool_calls) < 18:
+                tool_calls.append({
+                    "tool": "query_tag_history",
+                    "request": {
+                        "tagPath": tv.path,
+                        "historyWindowMinutes": int(self.config.get("historyWindowMinutes", 360)),
+                    },
+                    "result": {
+                        "historyPoints": len(history),
+                        "error": history_error,
+                    },
+                })
             if history_error:
                 history_error_count += 1
                 if is_linked:
@@ -1418,6 +1465,9 @@ def _update_subsystem_signal(
             "nearShiftCount": near_shift_count,
             "nearShiftLinked": near_shift_linked,
             "nearShiftUnlinked": near_shift_unlinked,
+            "stalenessThresholdSec": int(thresholds.get("stalenessSec", 120)),
+            "staleSamples": stale_samples,
+            "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
             "detectedSubsystemCount": len(detected_subsystems),
             "detectedSubsystems": detected_subsystems[:10],
             "candidateSubsystemCount": len(candidate_subsystem_counts),
@@ -1427,6 +1477,7 @@ def _update_subsystem_signal(
             "maxLlmTriagesPerSubsystem": max_triage_per_subsystem,
             "llmTriagedCount": llm_total,
             "dedupSuppressedCount": dedup_suppressed_count,
+            "toolCalls": tool_calls,
         }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics
diff --git a/scripts/anomaly_rules.py b/scripts/anomaly_rules.py
index 2aa274d..5e0e6aa 100644
--- a/scripts/anomaly_rules.py
+++ b/scripts/anomaly_rules.py
@@ -44,6 +44,15 @@ def parse_timestamp(ts: Optional[str]) -> Optional[datetime]:
     text = str(ts).strip()
     if not text:
         return None
+    # Handle unix epoch (seconds or milliseconds) represented as numeric text.
+    if text.isdigit():
+        try:
+            raw = int(text)
+            if raw > 10_000_000_000:  # likely milliseconds
+                raw = raw / 1000.0
+            return datetime.fromtimestamp(raw, tz=timezone.utc)
+        except (ValueError, OSError, OverflowError):
+            return None
     if text.endswith("Z"):
         text = text[:-1] + "+00:00"
     try:
@@ -51,7 +60,9 @@ def parse_timestamp(ts: Optional[str]) -> Optional[datetime]:
     except ValueError:
         return None
     if dt.tzinfo is None:
-        dt = dt.replace(tzinfo=timezone.utc)
+        # Ignition often returns naive local timestamps; assume local timezone.
+        local_tz = datetime.now().astimezone().tzinfo or timezone.utc
+        dt = dt.replace(tzinfo=local_tz)
     return dt.astimezone(timezone.utc)
 
 
diff --git a/tests/unit/test_anomaly_rules.py b/tests/unit/test_anomaly_rules.py
index e5f2af1..7a75c9e 100644
--- a/tests/unit/test_anomaly_rules.py
+++ b/tests/unit/test_anomaly_rules.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from anomaly_rules import compute_deviation_scores, is_quality_good, is_stale
+from anomaly_rules import compute_deviation_scores, is_quality_good, is_stale, parse_timestamp
 
 
 def test_detects_sharp_rise_and_sharp_drop():
@@ -53,6 +53,20 @@ def test_staleness_helper():
     assert is_stale(old_ts, staleness_sec=300)
 
 
+def test_staleness_accepts_epoch_seconds_and_millis():
+    now = datetime.now(timezone.utc)
+    recent = int(now.timestamp())
+    recent_ms = int(now.timestamp() * 1000)
+    assert not is_stale(str(recent), staleness_sec=300, now=now)
+    assert not is_stale(str(recent_ms), staleness_sec=300, now=now)
+
+
+def test_parse_timestamp_naive_assumed_local_time():
+    local_now = datetime.now().replace(microsecond=0)
+    parsed = parse_timestamp(local_now.isoformat())
+    assert parsed is not None
+
+
 def test_non_numeric_current_value_is_rejected():
     result = compute_deviation_scores(
         current_value="not-a-number",

From c9a240a37836600aa9c478965d9e0b4185eab901 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:05:52 +0000
Subject: [PATCH 10/18] Infer missing live timestamps and expand tool call
 diagnostics

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py                    | 30 ++++++
 scripts/ignition_api_client.py                | 91 +++++++++++++++++--
 tests/unit/test_ignition_api_client_parser.py | 45 +++++++++
 3 files changed, 156 insertions(+), 10 deletions(-)
 create mode 100644 tests/unit/test_ignition_api_client_parser.py

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index efad1d0..5768e83 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -56,6 +56,15 @@ def emit(prefix: str, payload: Dict[str, Any]) -> None:
 DEFAULT_SUBSYSTEM_PRIORITY = ["view", "equipment", "group", "global"]
 
 
+def _preview_value(value: Any, max_len: int = 120) -> Any:
+    if value is None or isinstance(value, (bool, int, float)):
+        return value
+    text = str(value)
+    if len(text) <= max_len:
+        return text
+    return text[: max_len - 3] + "..."
+
+
 def _canonical_subsystem_type(kind: Any) -> str:
     value = str(kind or "").strip().lower()
     if value in {"view", "views"}:
@@ -1017,11 +1026,24 @@ def run_cycle(self) -> Dict[str, Any]:
                 "count": len(live_values),
                 "errorCount": sum(1 for tv in live_values if tv.error),
                 "qualityGoodCount": sum(1 for tv in live_values if is_quality_good(tv.quality)),
+                "timestampMissingCount": sum(1 for tv in live_values if not tv.timestamp),
+                "timestampInferredCount": sum(
+                    1
+                    for tv in live_values
+                    if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred"))
+                ),
                 "sample": [
                     {
                         "path": tv.path,
+                        "value": _preview_value(tv.value),
                         "quality": tv.quality,
                         "timestamp": tv.timestamp,
+                        "timestampInferred": bool(tv.config.get("timestamp_inferred"))
+                        if isinstance(tv.config, dict)
+                        else False,
+                        "configKeys": sorted(list(tv.config.keys()))[:8]
+                        if isinstance(tv.config, dict)
+                        else [],
                         "error": tv.error,
                     }
                     for tv in live_values[:5]
@@ -1035,6 +1057,8 @@ def run_cycle(self) -> Dict[str, Any]:
         history_error_count = 0
         history_error_samples: List[str] = []
         valid_live_count = 0
+        missing_timestamp_count = 0
+        inferred_timestamp_count = 0
         quality_filtered_count = 0
         stale_filtered_count = 0
         insufficient_history_count = 0
@@ -1103,6 +1127,10 @@ def _update_subsystem_signal(
                     live_error_samples.append(f"{tv.path}: {tv.error}")
                 continue
             valid_live_count += 1
+            if not tv.timestamp:
+                missing_timestamp_count += 1
+            if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred")):
+                inferred_timestamp_count += 1
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
                 quality_filtered_count += 1
@@ -1444,6 +1472,8 @@ def _update_subsystem_signal(
             "linkedTags": linked_tag_count,
             "unlinkedTags": unlinked_tag_count,
             "validLiveCount": valid_live_count,
+            "missingTimestampCount": missing_timestamp_count,
+            "inferredTimestampCount": inferred_timestamp_count,
             "liveErrorCount": live_error_count,
             "liveErrorLinked": live_error_linked,
             "liveErrorUnlinked": live_error_unlinked,
diff --git a/scripts/ignition_api_client.py b/scripts/ignition_api_client.py
index e8fbccf..eae959a 100644
--- a/scripts/ignition_api_client.py
+++ b/scripts/ignition_api_client.py
@@ -17,6 +17,7 @@
 import os
 import json
 import logging
+from datetime import datetime, timezone
 from typing import Dict, List, Optional, Any
 from dataclasses import dataclass, field
 from urllib.parse import urljoin, quote
@@ -239,7 +240,11 @@ def read_tags(self, paths: List[str]) -> List[TagValue]:
                 for p in normalised
             ]
 
-        return self._parse_tags_response(normalised, data)
+        return self._parse_tags_response(
+            normalised,
+            data,
+            fallback_timestamp=datetime.now(timezone.utc).isoformat(),
+        )
 
     # --------------------------------------------------------------------- #
     #  Tag history – WebDev module endpoint
@@ -344,11 +349,36 @@ def _ensure_provider_prefix(path: str) -> str:
             return path
         return f"[default]{path}"
 
-    _TAG_ITEM_KNOWN_KEYS = {"value", "quality", "tagPath", "isGood",
-                             "timestamp", "t", "dataType", "data_type"}
+    _TAG_ITEM_KNOWN_KEYS = {
+        "value",
+        "v",
+        "quality",
+        "q",
+        "tagPath",
+        "path",
+        "fullPath",
+        "isGood",
+        "timestamp",
+        "t",
+        "ts",
+        "time",
+        "timeStamp",
+        "dateTime",
+        "datetime",
+        "lastChange",
+        "lastChanged",
+        "timestampMs",
+        "eventTime",
+        "dataType",
+        "data_type",
+    }
 
     @staticmethod
-    def _parse_tags_response(paths: List[str], data: Any) -> List["TagValue"]:
+    def _parse_tags_response(
+        paths: List[str],
+        data: Any,
+        fallback_timestamp: Optional[str] = None,
+    ) -> List["TagValue"]:
         """Parse the response from the WebDev getTags endpoint.
 
         Expected shape: {"allGood": bool, "success": bool, "count": N,
@@ -367,9 +397,41 @@ def _parse_tags_response(paths: List[str], data: Any) -> List["TagValue"]:
             return [TagValue(path=p, value=data, quality="Unknown") for p in paths]
 
         by_path: Dict[str, dict] = {}
+
+        def extract_item_path(item: Dict[str, Any]) -> Optional[str]:
+            for key in ("tagPath", "path", "fullPath"):
+                val = item.get(key)
+                if isinstance(val, str) and val.strip():
+                    return val.strip()
+            return None
+
+        def extract_item_timestamp(item: Dict[str, Any]) -> Optional[str]:
+            for key in (
+                "timestamp",
+                "t",
+                "ts",
+                "time",
+                "timeStamp",
+                "dateTime",
+                "datetime",
+                "lastChange",
+                "lastChanged",
+                "timestampMs",
+                "eventTime",
+            ):
+                val = item.get(key)
+                if val is None:
+                    continue
+                text = str(val).strip()
+                if text:
+                    return text
+            return None
+
         for item in items:
-            if isinstance(item, dict) and "tagPath" in item:
-                by_path[item["tagPath"]] = item
+            if isinstance(item, dict):
+                item_path = extract_item_path(item)
+                if item_path:
+                    by_path[item_path] = item
 
         results: List[TagValue] = []
         for i, path in enumerate(paths):
@@ -380,13 +442,22 @@ def _parse_tags_response(paths: List[str], data: Any) -> List["TagValue"]:
             if item is None:
                 results.append(TagValue(path=path, error="No data returned for this path"))
             elif isinstance(item, dict):
+                ts = extract_item_timestamp(item)
+                inferred_timestamp = False
+                if not ts and fallback_timestamp:
+                    ts = fallback_timestamp
+                    inferred_timestamp = True
                 extra = {k: v for k, v in item.items()
                          if k not in IgnitionApiClient._TAG_ITEM_KNOWN_KEYS} or None
+                if inferred_timestamp:
+                    if extra is None:
+                        extra = {}
+                    extra["timestamp_inferred"] = True
                 results.append(TagValue(
-                    path=item.get("tagPath", path),
-                    value=item.get("value"),
-                    quality=str(item.get("quality", "Good" if item.get("isGood") else "Unknown")),
-                    timestamp=item.get("timestamp") or item.get("t"),
+                    path=extract_item_path(item) or path,
+                    value=item.get("value", item.get("v")),
+                    quality=str(item.get("quality", item.get("q", "Good" if item.get("isGood") else "Unknown"))),
+                    timestamp=ts,
                     data_type=item.get("dataType") or item.get("data_type"),
                     config=extra,
                 ))
diff --git a/tests/unit/test_ignition_api_client_parser.py b/tests/unit/test_ignition_api_client_parser.py
new file mode 100644
index 0000000..2157673
--- /dev/null
+++ b/tests/unit/test_ignition_api_client_parser.py
@@ -0,0 +1,45 @@
+from ignition_api_client import IgnitionApiClient
+
+
+def test_parse_tags_response_infers_timestamp_when_missing():
+    paths = ["[default]Feed_Storage/Tank1_Level"]
+    fallback_ts = "2026-03-02T00:00:00+00:00"
+    payload = {
+        "tags": [
+            {
+                "tagPath": paths[0],
+                "value": 42.5,
+                "quality": "Good",
+            }
+        ]
+    }
+
+    rows = IgnitionApiClient._parse_tags_response(paths, payload, fallback_timestamp=fallback_ts)
+    assert len(rows) == 1
+    assert rows[0].path == paths[0]
+    assert rows[0].timestamp == fallback_ts
+    assert rows[0].config is not None
+    assert rows[0].config.get("timestamp_inferred") is True
+
+
+def test_parse_tags_response_supports_alt_keys():
+    paths = ["[default]Feed_Storage/Tank1_Pressure"]
+    payload = {
+        "items": [
+            {
+                "path": paths[0],
+                "v": 101.3,
+                "q": "Good",
+                "ts": "1710000000000",
+                "data_type": "Float8",
+            }
+        ]
+    }
+
+    rows = IgnitionApiClient._parse_tags_response(paths, payload)
+    assert len(rows) == 1
+    assert rows[0].path == paths[0]
+    assert rows[0].value == 101.3
+    assert rows[0].quality == "Good"
+    assert rows[0].timestamp == "1710000000000"
+    assert rows[0].data_type == "Float8"

From 0a49338cb9f96cca166ada8f99cf19ec09b39fc8 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:08:27 +0000
Subject: [PATCH 11/18] Add raw agent event logging to debug missing
 diagnostics

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js     | 15 +++++++++++++--
 electron-ui/renderer.js |  7 +++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 9f4f350..43eb3fb 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -255,6 +255,8 @@ function sendToRenderer(channel, payload, context = '') {
   if (!canSendToRenderer()) {
     if (isAppShuttingDown) {
       console.info(`[Shutdown] Dropped renderer message ${channel}${context ? ` (${context})` : ''}`);
+    } else {
+      console.warn(`[IPC] Renderer unavailable for ${channel}${context ? ` (${context})` : ''}`);
     }
     return false;
   }
@@ -268,7 +270,10 @@ function sendToRenderer(channel, payload, context = '') {
 }
 
 function routeAgentMessage(channel, payload) {
-  sendToRenderer(channel, payload, 'agent-stream');
+  const ok = sendToRenderer(channel, payload, 'agent-stream');
+  if (!ok) {
+    console.warn(`[Agent IPC] Failed to route message on ${channel}`);
+  }
 }
 
 function parseAgentLine(line) {
@@ -309,7 +314,12 @@ function handleAgentStdoutChunk(text) {
   activeAgentRun.stdoutBuffer = lines.pop() || '';
   for (const line of lines) {
     const parsed = parseAgentLine(line);
-    if (!parsed) continue;
+    if (!parsed) {
+      if (line.trim().startsWith('[AGENT')) {
+        console.warn('[Agent stream] Unparsed line:', line.slice(0, 300));
+      }
+      continue;
+    }
     if (parsed.channel === 'agent-status' && parsed.payload) {
       activeAgentRun.status = parsed.payload.state || activeAgentRun.status;
       activeAgentRun.metrics = {
@@ -1615,6 +1625,7 @@ ipcMain.handle('agents:start', async (event, rawConfig = {}) => {
     proc.stderr.on('data', (data) => {
       const text = data.toString().trim();
       if (!text) return;
+      console.warn('[Agent stderr]', text.slice(0, 500));
       routeAgentMessage('agent-error', {
         runId,
         code: 'worker_stderr',
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 4e85c2b..90958d6 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3838,11 +3838,14 @@ async function refreshAgentStatus() {
 
 async function startAgentsMonitoring() {
   const config = getAgentsConfigFromUI();
+  console.warn('[Agents start requested]', config);
   const result = await window.api.agentsStart(config);
   if (!result.success) {
+    console.error('[Agents start failed]', result);
     updateAgentStatusUi('error', result.error || 'Failed to start monitoring');
     return;
   }
+  console.warn('[Agents started]', result);
   agentsState.runId = result.runId;
   agentsState.status = 'running';
   updateAgentStatusUi('running', `Run ${result.runId}`);
@@ -3901,6 +3904,7 @@ function ensureAgentListeners() {
 
   window.api.onAgentStatus((payload) => {
     if (!payload) return;
+    console.warn('[Agents status]', payload);
     if (payload.runId) agentsState.runId = payload.runId;
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
@@ -3917,16 +3921,19 @@ function ensureAgentListeners() {
   });
 
   window.api.onAgentEvent((payload) => {
+    console.warn('[Agents event]', payload);
     upsertRealtimeAgentEvent(payload);
   });
 
   window.api.onAgentError((payload) => {
     if (!payload) return;
+    console.error('[Agents error]', payload);
     updateAgentStatusUi('error', payload.message || 'Agent runtime error');
   });
 
   window.api.onAgentComplete((payload) => {
     if (!payload) return;
+    console.warn('[Agents complete]', payload);
     agentsState.status = payload.success ? 'stopped' : 'failed';
     updateAgentStatusUi(agentsState.status, payload.reason || 'Run complete');
     refreshAgentStatus();

From 8465607682ebf5aa20a0f1c612be8a7a2dc6a02f Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:11:01 +0000
Subject: [PATCH 12/18] Always log agent diagnostics and stale/tool details

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/renderer.js | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 90958d6..d4fe40d 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3909,15 +3909,20 @@ function ensureAgentListeners() {
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
-    if (payload.diagnostics) {
-      console.info('[Agents diagnostics]', payload.diagnostics);
-      if (Array.isArray(payload.diagnostics.toolCalls) && payload.diagnostics.toolCalls.length) {
-        console.info('[Agents tool calls]', payload.diagnostics.toolCalls);
-      }
-      if (Array.isArray(payload.diagnostics.staleSamples) && payload.diagnostics.staleSamples.length) {
-        console.info('[Agents stale samples]', payload.diagnostics.staleSamples);
-      }
-    }
+    const diagnostics = payload.diagnostics || {};
+    console.warn('[Agents diagnostics]', diagnostics);
+    console.warn('[Agents diagnostics summary]', {
+      monitoredTags: diagnostics.monitoredTags ?? null,
+      linkedTags: diagnostics.linkedTags ?? null,
+      validLiveCount: diagnostics.validLiveCount ?? null,
+      staleFilteredCount: diagnostics.staleFilteredCount ?? null,
+      missingTimestampCount: diagnostics.missingTimestampCount ?? null,
+      inferredTimestampCount: diagnostics.inferredTimestampCount ?? null,
+      detectedSubsystemCount: diagnostics.detectedSubsystemCount ?? null,
+      candidateSubsystemCount: diagnostics.candidateSubsystemCount ?? null,
+    });
+    console.warn('[Agents tool calls]', Array.isArray(diagnostics.toolCalls) ? diagnostics.toolCalls : []);
+    console.warn('[Agents stale samples]', Array.isArray(diagnostics.staleSamples) ? diagnostics.staleSamples : []);
   });
 
   window.api.onAgentEvent((payload) => {

From da80bf16703361cede0ae47ef7c7f37e0f6bad77 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:14:13 +0000
Subject: [PATCH 13/18] Always emit shaped agent diagnostics with status phase

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/renderer.js    |   2 +
 scripts/anomaly_monitor.py | 103 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index d4fe40d..9b5da03 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3912,6 +3912,8 @@ function ensureAgentListeners() {
     const diagnostics = payload.diagnostics || {};
     console.warn('[Agents diagnostics]', diagnostics);
     console.warn('[Agents diagnostics summary]', {
+      phase: diagnostics.phase ?? null,
+      reason: diagnostics.reason ?? null,
       monitoredTags: diagnostics.monitoredTags ?? null,
       linkedTags: diagnostics.linkedTags ?? null,
       validLiveCount: diagnostics.validLiveCount ?? null,
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 5768e83..2fa7f73 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -65,6 +65,58 @@ def _preview_value(value: Any, max_len: int = 120) -> Any:
     return text[: max_len - 3] + "..."
 
 
+def make_default_diagnostics(
+    *,
+    staleness_threshold_sec: int = 120,
+    phase: str = "initializing",
+    reason: str = "",
+) -> Dict[str, Any]:
+    return {
+        "phase": phase,
+        "reason": reason,
+        "monitoredTags": 0,
+        "linkedTags": 0,
+        "unlinkedTags": 0,
+        "validLiveCount": 0,
+        "missingTimestampCount": 0,
+        "inferredTimestampCount": 0,
+        "liveErrorCount": 0,
+        "liveErrorLinked": 0,
+        "liveErrorUnlinked": 0,
+        "qualityFilteredCount": 0,
+        "qualityFilteredLinked": 0,
+        "qualityFilteredUnlinked": 0,
+        "staleFilteredCount": 0,
+        "staleFilteredLinked": 0,
+        "staleFilteredUnlinked": 0,
+        "historyErrorCount": 0,
+        "historyErrorLinked": 0,
+        "historyErrorUnlinked": 0,
+        "insufficientHistoryCount": 0,
+        "lowHistoryCandidateCount": 0,
+        "evaluatedLinked": 0,
+        "evaluatedUnlinked": 0,
+        "candidateLinked": 0,
+        "candidateUnlinked": 0,
+        "nearShiftCount": 0,
+        "nearShiftLinked": 0,
+        "nearShiftUnlinked": 0,
+        "stalenessThresholdSec": staleness_threshold_sec,
+        "staleSamples": [],
+        "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
+        "detectedSubsystemCount": 0,
+        "detectedSubsystems": [],
+        "candidateSubsystemCount": 0,
+        "candidateBySubsystem": {},
+        "subsystemShiftSignals": [],
+        "maxCandidatesPerSubsystem": 0,
+        "maxLlmTriagesPerSubsystem": 0,
+        "llmTriagedCount": 0,
+        "dedupSuppressedCount": 0,
+        "toolCalls": [],
+    }
+
+
 def _canonical_subsystem_type(kind: Any) -> str:
     value = str(kind or "").strip().lower()
     if value in {"view", "views"}:
@@ -970,8 +1022,19 @@ def emit_provider_failure_event(
     # -----------------------------
     def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
-        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0, "diagnostics": {}}
         thresholds = self.config.get("thresholds", {})
+        stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
+        metrics = {
+            "candidates": 0,
+            "triaged": 0,
+            "emitted": 0,
+            "cycleMs": 0,
+            "diagnostics": make_default_diagnostics(
+                staleness_threshold_sec=stale_threshold_sec,
+                phase="cycle_start",
+                reason="cycle_initialized",
+            ),
+        }
         min_history = int(self.config.get("minHistoryPoints", 30))
         max_candidates_total = max(1, int(self.config.get("maxCandidatesPerCycle", 25)))
         max_candidates_per_subsystem = max(1, int(self.config.get("maxCandidatesPerSubsystem", 8)))
@@ -987,6 +1050,8 @@ def run_cycle(self) -> Dict[str, Any]:
             )
             if emitted:
                 metrics["emitted"] += 1
+            metrics["diagnostics"]["phase"] = "cycle_early_exit"
+            metrics["diagnostics"]["reason"] = "ignition_not_configured"
             metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
             return metrics
 
@@ -999,6 +1064,8 @@ def run_cycle(self) -> Dict[str, Any]:
                 "recoverable": True,
                 "timestamp": utc_now_iso(),
             })
+            metrics["diagnostics"]["phase"] = "cycle_early_exit"
+            metrics["diagnostics"]["reason"] = "no_tags_found"
             metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
             return metrics
 
@@ -1468,6 +1535,11 @@ def _update_subsystem_signal(
             item.pop("sumZ", None)
 
         metrics["diagnostics"] = {
+            **make_default_diagnostics(
+                staleness_threshold_sec=int(thresholds.get("stalenessSec", 120)),
+                phase="cycle_complete",
+                reason="ok",
+            ),
             "monitoredTags": len(tag_paths),
             "linkedTags": linked_tag_count,
             "unlinkedTags": unlinked_tag_count,
@@ -1519,6 +1591,11 @@ def cleanup_retention(self) -> int:
     def run_forever(self) -> int:
         self.init_schema()
         self.upsert_run("running")
+        startup_diag = make_default_diagnostics(
+            staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
+            phase="startup",
+            reason="worker_started",
+        )
         emit("AGENT_STATUS", {
             "runId": self.run_id,
             "state": "running",
@@ -1526,6 +1603,7 @@ def run_forever(self) -> int:
             "candidates": 0,
             "triaged": 0,
             "emitted": 0,
+            "diagnostics": startup_diag,
             "timestamp": utc_now_iso(),
         })
 
@@ -1553,6 +1631,12 @@ def run_forever(self) -> int:
                 if self._cycle_count % cleanup_every == 0:
                     deleted = self.cleanup_retention()
                     if deleted > 0:
+                        cleanup_diag = make_default_diagnostics(
+                            staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
+                            phase="retention_cleanup",
+                            reason="cleanup_complete",
+                        )
+                        cleanup_diag["emittedCleanupCount"] = deleted
                         emit("AGENT_STATUS", {
                             "runId": self.run_id,
                             "state": "retention_cleanup",
@@ -1560,6 +1644,7 @@ def run_forever(self) -> int:
                             "candidates": 0,
                             "triaged": 0,
                             "emitted": deleted,
+                            "diagnostics": cleanup_diag,
                             "timestamp": utc_now_iso(),
                         })
             except Exception as exc:
@@ -1572,6 +1657,22 @@ def run_forever(self) -> int:
                     "recoverable": True,
                     "timestamp": utc_now_iso(),
                 })
+                error_diag = make_default_diagnostics(
+                    staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
+                    phase="cycle_error",
+                    reason="exception",
+                )
+                error_diag["errorMessage"] = str(exc)
+                emit("AGENT_STATUS", {
+                    "runId": self.run_id,
+                    "state": "running",
+                    "cycleMs": int((time.time() - cycle_started) * 1000),
+                    "candidates": 0,
+                    "triaged": 0,
+                    "emitted": 0,
+                    "diagnostics": error_diag,
+                    "timestamp": utc_now_iso(),
+                })
 
             elapsed_ms = int((time.time() - cycle_started) * 1000)
             remaining = max(0, poll_ms - elapsed_ms) / 1000.0

From f5252446348ee6f6837af8795605f36927c85cf6 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:16:24 +0000
Subject: [PATCH 14/18] Set agent poll interval default to 1s

Co-authored-by: leor <leor@fortresslabs.com>
---
 docs/agents_monitoring_handoff.md | 2 +-
 electron-ui/index.html            | 2 +-
 electron-ui/main.js               | 2 +-
 electron-ui/renderer.js           | 2 +-
 scripts/anomaly_monitor.py        | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/agents_monitoring_handoff.md b/docs/agents_monitoring_handoff.md
index a5368fb..4880c31 100644
--- a/docs/agents_monitoring_handoff.md
+++ b/docs/agents_monitoring_handoff.md
@@ -93,7 +93,7 @@ python3 scripts/anomaly_monitor.py replay-fixtures --fixture-file scripts/fixtur
 ### Monitor worker manual run
 
 ```bash
-python3 scripts/anomaly_monitor.py run --run-id demo-run --config-json '{"pollIntervalMs":15000}'
+python3 scripts/anomaly_monitor.py run --run-id demo-run --config-json '{"pollIntervalMs":1000}'
 ```
 
 ### Event operations
diff --git a/electron-ui/index.html b/electron-ui/index.html
index 2c43657..a4a7a9b 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -561,7 +561,7 @@ <h2>Long-Running Agents</h2>
 
         <div class="agents-config-row">
           <label>Poll (ms)</label>
-          <input class="input input-sm" id="agents-config-poll-ms" type="number" min="5000" step="1000" value="15000">
+          <input class="input input-sm" id="agents-config-poll-ms" type="number" min="1000" step="1000" value="1000">
           <label>History (min)</label>
           <input class="input input-sm" id="agents-config-history-min" type="number" min="10" step="10" value="360">
           <label>Min Points</label>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index 43eb3fb..5b6a081 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -209,7 +209,7 @@ function normalizeAgentConfig(config = {}) {
   const thresholds = (config && typeof config.thresholds === 'object' && config.thresholds) || {};
   const scope = (config && typeof config.scope === 'object' && config.scope) || {};
   return {
-    pollIntervalMs: Math.max(5000, Number(config.pollIntervalMs || 15000)),
+    pollIntervalMs: Math.max(1000, Number(config.pollIntervalMs || 1000)),
     historyWindowMinutes: Math.max(10, Number(config.historyWindowMinutes || 360)),
     minHistoryPoints: Math.max(10, Number(config.minHistoryPoints || 30)),
     maxMonitoredTags: Math.max(10, Number(config.maxMonitoredTags || 200)),
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 9b5da03..8479580 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3580,7 +3580,7 @@ function getAgentsElements() {
 function getAgentsConfigFromUI() {
   const el = getAgentsElements();
   return {
-    pollIntervalMs: Number(el.cfgPoll?.value || 15000),
+    pollIntervalMs: Number(el.cfgPoll?.value || 1000),
     historyWindowMinutes: Number(el.cfgHist?.value || 360),
     minHistoryPoints: Number(el.cfgPoints?.value || 30),
     maxCandidatesPerSubsystem: 8,
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 2fa7f73..ff033dc 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -238,7 +238,7 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     raw = dict(config or {})
     thresholds = raw.get("thresholds", {}) if isinstance(raw.get("thresholds"), dict) else {}
     defaults = {
-        "pollIntervalMs": 15000,
+        "pollIntervalMs": 1000,
         "historyWindowMinutes": 360,
         "minHistoryPoints": 30,
         "maxMonitoredTags": 200,
@@ -1607,7 +1607,7 @@ def run_forever(self) -> int:
             "timestamp": utc_now_iso(),
         })
 
-        poll_ms = int(self.config.get("pollIntervalMs", 15000))
+        poll_ms = int(self.config.get("pollIntervalMs", 1000))
         cleanup_every = max(1, int(self.config.get("cleanupEveryCycles", 40)))
         exit_code = 0
         reason = "stopped"

From 6dcfd15f81912b3ce743e6e5eab1507c2f6f093e Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:18:37 +0000
Subject: [PATCH 15/18] Emit in-cycle agent status progress updates

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py | 47 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index ff033dc..54065c8 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -1024,6 +1024,8 @@ def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
         thresholds = self.config.get("thresholds", {})
         stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
+        progress_emit_interval_tags = max(5, int(self.config.get("progressEveryTags", 10)))
+        progress_emit_interval_sec = max(1, int(self.config.get("progressEverySec", 2)))
         metrics = {
             "candidates": 0,
             "triaged": 0,
@@ -1148,6 +1150,42 @@ def run_cycle(self) -> Dict[str, Any]:
         near_shift_unlinked = 0
         stale_samples: List[Dict[str, Any]] = []
         subsystem_shift_signals: Dict[str, Dict[str, Any]] = {}
+        processed_live_count = 0
+        total_live_count = len(live_values)
+        last_progress_emit = 0.0
+
+        def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
+            nonlocal last_progress_emit
+            diag = make_default_diagnostics(
+                staleness_threshold_sec=stale_threshold_sec,
+                phase="cycle_in_progress",
+                reason=reason,
+            )
+            diag.update({
+                "processedLiveCount": processed_live_count,
+                "totalLiveCount": total_live_count,
+                "currentTag": current_tag,
+                "candidatesSoFar": len(candidates),
+                "liveErrorCount": live_error_count,
+                "qualityFilteredCount": quality_filtered_count,
+                "staleFilteredCount": stale_filtered_count,
+                "historyErrorCount": history_error_count,
+                "linkedTags": linked_tag_count,
+                "unlinkedTags": unlinked_tag_count,
+            })
+            emit("AGENT_STATUS", {
+                "runId": self.run_id,
+                "state": "running",
+                "cycleMs": int((time.time() - cycle_start) * 1000),
+                "candidates": len(candidates),
+                "triaged": 0,
+                "emitted": metrics.get("emitted", 0),
+                "diagnostics": diag,
+                "timestamp": utc_now_iso(),
+            })
+            last_progress_emit = time.time()
+
+        emit_cycle_progress("cycle_started")
 
         def _update_subsystem_signal(
             subsystem_ref: Dict[str, str], deterministic: Dict[str, Any], tag_path: str
@@ -1180,10 +1218,19 @@ def _update_subsystem_signal(
                 bucket["sampleTag"] = tag_path
 
         for tv in live_values:
+            processed_live_count += 1
             tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
             subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
             is_linked = bool(tag_meta.get("views") or tag_meta.get("equipment"))
 
+            now_progress = time.time()
+            if (
+                processed_live_count == 1
+                or processed_live_count % progress_emit_interval_tags == 0
+                or (now_progress - last_progress_emit) >= progress_emit_interval_sec
+            ):
+                emit_cycle_progress("processing_live_tags", current_tag=tv.path)
+
             if tv.error:
                 live_error_count += 1
                 if is_linked:

From 5157857bb4ff28e2d4b42f16027b5c7de52f3115 Mon Sep 17 00:00:00 2001
From: Leor Barak Fishman <leor.fishman@gmail.com>
Date: Mon, 2 Mar 2026 17:27:50 -0800
Subject: [PATCH 16/18] more agentics fixing

---
 electron-ui/index.html     |  20 +-
 electron-ui/main.js        |  13 ++
 electron-ui/preload.js     |   1 +
 electron-ui/renderer.js    | 353 +++++++++++++++++++++++++++++++---
 electron-ui/styles.css     | 385 ++++++++++++++++++++++++++++++++++++-
 scripts/anomaly_monitor.py | 316 ++++++++++++++++++++++++++----
 tests/quick_import_test.py |  76 ++++++++
 7 files changed, 1100 insertions(+), 64 deletions(-)
 create mode 100644 tests/quick_import_test.py

diff --git a/electron-ui/index.html b/electron-ui/index.html
index a4a7a9b..08adc5d 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -566,8 +566,11 @@ <h2>Long-Running Agents</h2>
           <input class="input input-sm" id="agents-config-history-min" type="number" min="10" step="10" value="360">
           <label>Min Points</label>
           <input class="input input-sm" id="agents-config-min-points" type="number" min="10" step="5" value="30">
-          <label>Max LLM/Cycle</label>
-          <input class="input input-sm" id="agents-config-max-llm" type="number" min="0" step="1" value="5">
+          <label class="agents-toggle-label">
+            <input type="checkbox" id="agents-config-auto-llm"> Auto LLM
+          </label>
+          <label>Max/Cycle</label>
+          <input class="input input-sm" id="agents-config-max-llm" type="number" min="1" step="1" value="5">
           <label>Z</label>
           <input class="input input-sm" id="agents-config-threshold-z" type="number" min="0.5" step="0.5" value="3">
           <label>MAD</label>
@@ -584,6 +587,18 @@ <h2>Long-Running Agents</h2>
           <div class="metric-card"><span class="metric-label">Last heartbeat</span><span class="metric-value" id="agents-metric-heartbeat">n/a</span></div>
         </div>
 
+        <div class="agents-health-section">
+          <div class="agents-health-header">
+            <h3>Subsystem Health</h3>
+            <div class="agents-health-actions">
+              <button class="btn btn-ghost btn-sm" id="btn-agents-clear-subsystem" style="display:none">Show All</button>
+            </div>
+          </div>
+          <div class="agents-health-grid" id="agents-health-grid">
+            <div class="agents-health-empty">Start monitoring to see subsystem health.</div>
+          </div>
+        </div>
+
         <div class="agents-main">
           <aside class="agents-feed-panel">
             <div class="agents-feed-header">
@@ -614,6 +629,7 @@ <h3>Anomaly Feed</h3>
             <div class="agents-detail-header">
               <h3>Event Details</h3>
               <div class="agents-detail-actions">
+                <button class="btn btn-sm btn-primary" id="btn-agents-deep-analyze" disabled>Deep Analyze</button>
                 <button class="btn btn-sm btn-secondary" id="btn-agents-open-graph" disabled>Open in Graph</button>
                 <button class="btn btn-sm btn-ghost" id="btn-agents-ack" disabled>Acknowledge</button>
               </div>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index 5b6a081..f3034f1 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -1749,6 +1749,19 @@ ipcMain.handle('agents:clear-event', async (event, eventId, note = '') => {
   }
 });
 
+ipcMain.handle('agents:deep-analyze', async (event, eventId) => {
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', [
+      'deep-analyze',
+      '--event-id',
+      String(eventId),
+    ]);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
 ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
   try {
     const output = await runPythonScript('anomaly_monitor.py', [
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index e94b546..cf1d75c 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -79,6 +79,7 @@ contextBridge.exposeInMainWorld('api', {
   agentsGetEvent: (eventId) => ipcRenderer.invoke('agents:get-event', eventId),
   agentsAckEvent: (eventId, note) => ipcRenderer.invoke('agents:ack-event', eventId, note),
   agentsClearEvent: (eventId, note) => ipcRenderer.invoke('agents:clear-event', eventId, note),
+  agentsDeepAnalyze: (eventId) => ipcRenderer.invoke('agents:deep-analyze', eventId),
   agentsCleanup: (retentionDays) => ipcRenderer.invoke('agents:cleanup', retentionDays),
   
   // Database connections
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 8479580..bba8767 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3539,12 +3539,17 @@ btnSaveDbCreds?.addEventListener('click', async () => {
 // Agents Tab - Long-running monitoring
 // ============================================
 
+const HEALTH_TREND_MAX_CYCLES = 20;
+
 const agentsState = {
   runId: null,
   status: 'idle',
   events: [],
   selectedEventId: null,
+  selectedSubsystemId: null,
   listenersReady: false,
+  subsystemHealth: {},
+  subsystemHistory: {},
 };
 
 function getAgentsElements() {
@@ -3553,6 +3558,7 @@ function getAgentsElements() {
     btnStop: document.getElementById('btn-agents-stop'),
     btnRefresh: document.getElementById('btn-agents-refresh'),
     btnCleanup: document.getElementById('btn-agents-cleanup'),
+    btnDeepAnalyze: document.getElementById('btn-agents-deep-analyze'),
     btnOpenGraph: document.getElementById('btn-agents-open-graph'),
     btnAck: document.getElementById('btn-agents-ack'),
     statusChip: document.getElementById('agents-status-chip'),
@@ -3570,6 +3576,7 @@ function getAgentsElements() {
     cfgPoll: document.getElementById('agents-config-poll-ms'),
     cfgHist: document.getElementById('agents-config-history-min'),
     cfgPoints: document.getElementById('agents-config-min-points'),
+    cfgAutoLlm: document.getElementById('agents-config-auto-llm'),
     cfgMaxLlm: document.getElementById('agents-config-max-llm'),
     cfgZ: document.getElementById('agents-config-threshold-z'),
     cfgMad: document.getElementById('agents-config-threshold-mad'),
@@ -3584,8 +3591,8 @@ function getAgentsConfigFromUI() {
     historyWindowMinutes: Number(el.cfgHist?.value || 360),
     minHistoryPoints: Number(el.cfgPoints?.value || 30),
     maxCandidatesPerSubsystem: 8,
-    maxLlmTriagesPerCycle: Number(el.cfgMaxLlm?.value || 5),
-    maxLlmTriagesPerSubsystem: 2,
+    maxLlmTriagesPerCycle: el.cfgAutoLlm?.checked ? Number(el.cfgMaxLlm?.value || 5) : 0,
+    maxLlmTriagesPerSubsystem: el.cfgAutoLlm?.checked ? 2 : 0,
     thresholds: {
       z: Number(el.cfgZ?.value || 3),
       mad: Number(el.cfgMad?.value || 3.5),
@@ -3606,6 +3613,250 @@ function formatAgentTime(ts) {
   return d.toLocaleString();
 }
 
+function computeHealthLevel(signal) {
+  const avgAbsZ = parseFloat(signal.avgAbsZ || 0);
+  const candidateRatio = parseFloat(signal.candidateRatio || 0);
+  const maxAbsZ = parseFloat(signal.maxAbsZ || 0);
+  if (candidateRatio >= 0.25 || maxAbsZ >= 5) return 'critical';
+  if (candidateRatio >= 0.10 || avgAbsZ >= 2.5) return 'warning';
+  if (signal.shiftRatio > 0.1 || avgAbsZ >= 1.5) return 'elevated';
+  return 'healthy';
+}
+
+function healthLevelToScore(level) {
+  return { healthy: 0.1, elevated: 0.4, warning: 0.7, critical: 1.0 }[level] || 0.1;
+}
+
+function updateSubsystemHealthFromDiagnostics(diagnostics) {
+  const tagMap = diagnostics?.subsystemTagMap;
+  if (tagMap && typeof tagMap === 'object') {
+    for (const [subId, info] of Object.entries(tagMap)) {
+      if (!agentsState.subsystemHealth[subId]) {
+        agentsState.subsystemHealth[subId] = {
+          subsystemId: subId,
+          subsystemType: info.type || 'global',
+          subsystemName: info.name || subId,
+          evaluated: (info.tags || []).length,
+          candidate: 0,
+          nearShift: 0,
+          maxAbsZ: 0,
+          avgAbsZ: 0,
+          healthLevel: 'healthy',
+          tagSignals: (info.tags || []).map((t) => ({
+            path: t.path,
+            name: t.name || t.path,
+            z: 0,
+            mad: 0,
+            value: null,
+          })),
+        };
+      }
+    }
+  }
+
+  const signals = diagnostics?.subsystemShiftSignals;
+  if (Array.isArray(signals) && signals.length) {
+    for (const sig of signals) {
+      const subId = sig.subsystemId || sig.subsystemName || 'global:all';
+      const healthLevel = computeHealthLevel(sig);
+      agentsState.subsystemHealth[subId] = { ...sig, healthLevel };
+
+      if (!agentsState.subsystemHistory[subId]) {
+        agentsState.subsystemHistory[subId] = [];
+      }
+      const history = agentsState.subsystemHistory[subId];
+      history.push({
+        healthLevel,
+        avgAbsZ: parseFloat(sig.avgAbsZ || 0),
+        candidateRatio: parseFloat(sig.candidateRatio || 0),
+        candidates: parseInt(sig.candidate || 0, 10),
+        evaluated: parseInt(sig.evaluated || 0, 10),
+        ts: Date.now(),
+      });
+      if (history.length > HEALTH_TREND_MAX_CYCLES) {
+        history.splice(0, history.length - HEALTH_TREND_MAX_CYCLES);
+      }
+    }
+  }
+
+  renderSubsystemHealthGrid();
+}
+
+function renderSubsystemHealthGrid() {
+  const container = document.getElementById('agents-health-grid');
+  if (!container) return;
+
+  const entries = Object.entries(agentsState.subsystemHealth);
+  if (!entries.length) {
+    container.innerHTML = '<div class="agents-health-empty">Start monitoring to see subsystem health.</div>';
+    return;
+  }
+
+  const severityOrder = { critical: 0, warning: 1, elevated: 2, healthy: 3 };
+  entries.sort((a, b) => {
+    const sa = severityOrder[a[1].healthLevel] ?? 3;
+    const sb = severityOrder[b[1].healthLevel] ?? 3;
+    if (sa !== sb) return sa - sb;
+    return (b[1].candidate || 0) - (a[1].candidate || 0);
+  });
+
+  container.innerHTML = entries
+    .map(([subId, sig]) => {
+      const level = sig.healthLevel || 'healthy';
+      const isExpanded = agentsState.selectedSubsystemId === subId;
+      const expandedClass = isExpanded ? ' expanded selected' : '';
+      const name = sig.subsystemName || subId;
+      const type = sig.subsystemType || 'global';
+      const evaluated = parseInt(sig.evaluated || 0, 10);
+      const candidates = parseInt(sig.candidate || 0, 10);
+      const maxZ = parseFloat(sig.maxAbsZ || 0).toFixed(1);
+      const anomalyClass = candidates > 0 ? (level === 'critical' ? ' has-critical' : ' has-anomalies') : '';
+      const history = agentsState.subsystemHistory[subId] || [];
+
+      let expandedBody = '';
+      if (isExpanded) {
+        const bigTrend = renderTrendBars(history, 48);
+        const tagRows = renderTagSignalRows(sig.tagSignals || []);
+        const tagCount = (sig.tagSignals || []).length;
+        expandedBody = `
+          <div class="health-expanded-body">
+            <div class="health-expanded-trend">${bigTrend}</div>
+            <div class="health-tag-list-header">
+              <h4>Tags</h4>
+              <span>${tagCount} tags</span>
+            </div>
+            <div class="health-tag-col-headers">
+              <span>Name</span><span>Trend</span><span>z-score</span><span>Avg</span><span>Current</span>
+            </div>
+            <div class="health-tag-list">${tagRows}</div>
+          </div>
+        `;
+      } else {
+        expandedBody = `<div class="health-trend">${renderTrendBars(history, 28)}</div>`;
+      }
+
+      return `
+        <div class="agents-health-card health-${escapeHtml(level)}${expandedClass}" data-subsystem-id="${escapeHtml(subId)}">
+          <div class="health-card-top">
+            <div class="health-card-identity">
+              <span class="health-indicator health-${escapeHtml(level)}"></span>
+              <span class="health-card-name" title="${escapeHtml(name)}">${escapeHtml(name)}</span>
+            </div>
+            <span class="health-card-type">${escapeHtml(type)}</span>
+          </div>
+          <div class="health-card-stats">
+            <div class="health-stat">
+              <span class="health-stat-label">Tags</span>
+              <span class="health-stat-value">${evaluated}</span>
+            </div>
+            <div class="health-stat">
+              <span class="health-stat-label">Anomalies</span>
+              <span class="health-stat-value${anomalyClass}">${candidates}</span>
+            </div>
+            <div class="health-stat">
+              <span class="health-stat-label">Peak z</span>
+              <span class="health-stat-value">${maxZ}</span>
+            </div>
+          </div>
+          ${expandedBody}
+          <span class="health-card-health-label health-${escapeHtml(level)}">${escapeHtml(level)}</span>
+        </div>
+      `;
+    })
+    .join('');
+
+  container.querySelectorAll('.agents-health-card').forEach((card) => {
+    card.addEventListener('click', (e) => {
+      if (e.target.closest('.health-tag-list')) return;
+      const subId = card.getAttribute('data-subsystem-id');
+      selectSubsystem(subId);
+    });
+  });
+}
+
+function renderTrendBars(history, maxHeight) {
+  const h = maxHeight || 28;
+  const slots = HEALTH_TREND_MAX_CYCLES;
+  const bars = [];
+  for (let i = 0; i < slots; i++) {
+    const idx = history.length - slots + i;
+    if (idx < 0) {
+      bars.push('<div class="health-trend-bar trend-empty" style="height: 3px"></div>');
+      continue;
+    }
+    const entry = history[idx];
+    const level = entry.healthLevel || 'healthy';
+    const score = healthLevelToScore(level);
+    const height = Math.max(3, Math.round(score * h));
+    bars.push(`<div class="health-trend-bar trend-${escapeHtml(level)}" style="height: ${height}px"></div>`);
+  }
+  return bars.join('');
+}
+
+function tagZToHealthLevel(absZ) {
+  if (absZ >= 5) return 'critical';
+  if (absZ >= 2.5) return 'warning';
+  if (absZ >= 1.5) return 'elevated';
+  return 'healthy';
+}
+
+function renderSparklineSvg(values, width, height) {
+  if (!values || values.length < 2) {
+    return `<svg width="${width}" height="${height}" class="tag-sparkline"><line x1="0" y1="${height / 2}" x2="${width}" y2="${height / 2}" stroke="var(--color-border)" stroke-width="1"/></svg>`;
+  }
+  const min = Math.min(...values);
+  const max = Math.max(...values);
+  const range = max - min || 1;
+  const pad = 1;
+  const usableH = height - pad * 2;
+  const step = width / (values.length - 1);
+  const points = values
+    .map((v, i) => `${(i * step).toFixed(1)},${(pad + usableH - ((v - min) / range) * usableH).toFixed(1)}`)
+    .join(' ');
+  return `<svg width="${width}" height="${height}" class="tag-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none"><polyline points="${points}" fill="none" stroke="var(--color-accent)" stroke-width="1.5" vector-effect="non-scaling-stroke"/></svg>`;
+}
+
+function renderTagSignalRows(tagSignals) {
+  if (!tagSignals || !tagSignals.length) {
+    return '<div class="health-tag-empty">No tag data available yet.</div>';
+  }
+
+  return tagSignals
+    .map((tag) => {
+      const absZ = Math.abs(tag.z || 0);
+      const level = tagZToHealthLevel(absZ);
+      const currentVal = tag.value != null ? String(tag.value) : '—';
+      const avgVal = tag.avg != null ? String(tag.avg) : '—';
+      const zDisplay = (tag.z || 0).toFixed(2);
+      const sparkline = tag.sparkline && tag.sparkline.length >= 2
+        ? renderSparklineSvg(tag.sparkline, 120, 24)
+        : renderSparklineSvg(null, 120, 24);
+      return `
+        <div class="health-tag-row" title="${escapeHtml(tag.path || tag.name || '')}">
+          <span class="health-tag-name">${escapeHtml(tag.name || tag.path || '')}</span>
+          <div class="health-tag-sparkline">${sparkline}</div>
+          <span class="health-tag-zscore tag-z-${escapeHtml(level)}">z ${escapeHtml(zDisplay)}</span>
+          <span class="health-tag-avg" title="Avg over window">${escapeHtml(avgVal)}</span>
+          <span class="health-tag-value" title="Current">${escapeHtml(currentVal)}</span>
+        </div>
+      `;
+    })
+    .join('');
+}
+
+function selectSubsystem(subId) {
+  const clearBtn = document.getElementById('btn-agents-clear-subsystem');
+  if (agentsState.selectedSubsystemId === subId) {
+    agentsState.selectedSubsystemId = null;
+    if (clearBtn) clearBtn.style.display = 'none';
+  } else {
+    agentsState.selectedSubsystemId = subId;
+    if (clearBtn) clearBtn.style.display = '';
+  }
+  renderSubsystemHealthGrid();
+  renderAgentEventList();
+}
+
 function updateAgentStatusUi(status, text) {
   const el = getAgentsElements();
   if (!el.statusChip || !el.statusText) return;
@@ -3633,9 +3884,15 @@ function getFilteredAgentEvents() {
   const state = (el.filterState?.value || '').toLowerCase();
   const severity = (el.filterSeverity?.value || '').toLowerCase();
   const search = (el.filterSearch?.value || '').trim().toLowerCase();
+  const subFilter = agentsState.selectedSubsystemId || '';
   return agentsState.events.filter((event) => {
     if (state && String(event.state || '').toLowerCase() !== state) return false;
     if (severity && String(event.severity || '').toLowerCase() !== severity) return false;
+    if (subFilter) {
+      const eventSubId = event.subsystem_id
+        || `${(event.subsystem_type || 'global')}:${(event.subsystem_name || 'all').toLowerCase()}`;
+      if (eventSubId !== subFilter) return false;
+    }
     if (search) {
       const haystack = [
         event.summary,
@@ -3660,7 +3917,13 @@ function renderAgentEventList() {
   if (!el.list) return;
   const events = getFilteredAgentEvents();
   if (!events.length) {
-    el.list.innerHTML = '<div class="agents-empty">No anomaly events match the current filters.</div>';
+    const subName = agentsState.selectedSubsystemId
+      ? (agentsState.subsystemHealth[agentsState.selectedSubsystemId]?.subsystemName || agentsState.selectedSubsystemId)
+      : '';
+    const msg = subName
+      ? `No anomaly events for "${subName}".`
+      : 'No anomaly events match the current filters.';
+    el.list.innerHTML = `<div class="agents-empty">${escapeHtml(msg)}</div>`;
     return;
   }
   el.list.innerHTML = events
@@ -3715,6 +3978,7 @@ function renderAgentEventDetails(event) {
   if (!el.detail) return;
   if (!event) {
     el.detail.innerHTML = '<p class="text-muted">Select an anomaly event from the feed.</p>';
+    if (el.btnDeepAnalyze) el.btnDeepAnalyze.disabled = true;
     if (el.btnOpenGraph) el.btnOpenGraph.disabled = true;
     if (el.btnAck) el.btnAck.disabled = true;
     return;
@@ -3764,6 +4028,10 @@ function renderAgentEventDetails(event) {
     </div>
   `;
 
+  if (el.btnDeepAnalyze) {
+    el.btnDeepAnalyze.disabled = false;
+    el.btnDeepAnalyze.textContent = event.llm_triaged ? 'Re-Analyze' : 'Deep Analyze';
+  }
   if (el.btnOpenGraph) el.btnOpenGraph.disabled = !resolveAgentGraphTarget(event);
   if (el.btnAck) {
     const state = String(event.state || '').toLowerCase();
@@ -3838,14 +4106,19 @@ async function refreshAgentStatus() {
 
 async function startAgentsMonitoring() {
   const config = getAgentsConfigFromUI();
-  console.warn('[Agents start requested]', config);
+  agentsState.subsystemHealth = {};
+  agentsState.subsystemHistory = {};
+  agentsState.selectedSubsystemId = null;
+  renderSubsystemHealthGrid();
+  const clearSubBtn = document.getElementById('btn-agents-clear-subsystem');
+  if (clearSubBtn) clearSubBtn.style.display = 'none';
   const result = await window.api.agentsStart(config);
   if (!result.success) {
     console.error('[Agents start failed]', result);
     updateAgentStatusUi('error', result.error || 'Failed to start monitoring');
     return;
   }
-  console.warn('[Agents started]', result);
+  console.log('[Agents] started, runId=' + (result.runId || 'n/a'));
   agentsState.runId = result.runId;
   agentsState.status = 'running';
   updateAgentStatusUi('running', `Run ${result.runId}`);
@@ -3862,6 +4135,36 @@ async function stopAgentsMonitoring() {
   updateAgentStatusUi('stopped', 'Monitoring stopped');
 }
 
+async function deepAnalyzeSelectedEvent() {
+  if (!agentsState.selectedEventId) return;
+  const el = getAgentsElements();
+  if (el.btnDeepAnalyze) {
+    el.btnDeepAnalyze.disabled = true;
+    el.btnDeepAnalyze.textContent = 'Analyzing…';
+  }
+  try {
+    const result = await window.api.agentsDeepAnalyze(agentsState.selectedEventId);
+    if (result.success && result.event) {
+      const idx = agentsState.events.findIndex((e) => e.event_id === agentsState.selectedEventId);
+      if (idx >= 0) agentsState.events[idx] = { ...agentsState.events[idx], ...result.event };
+      renderAgentEventList();
+      renderAgentEventDetails(result.event);
+    } else {
+      console.error('[Agents] deep-analyze failed:', result.error);
+      if (el.btnDeepAnalyze) {
+        el.btnDeepAnalyze.textContent = 'Failed — Retry';
+        el.btnDeepAnalyze.disabled = false;
+      }
+    }
+  } catch (err) {
+    console.error('[Agents] deep-analyze error:', err);
+    if (el.btnDeepAnalyze) {
+      el.btnDeepAnalyze.textContent = 'Failed — Retry';
+      el.btnDeepAnalyze.disabled = false;
+    }
+  }
+}
+
 async function acknowledgeSelectedAgentEvent() {
   if (!agentsState.selectedEventId) return;
   const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
@@ -3904,31 +4207,25 @@ function ensureAgentListeners() {
 
   window.api.onAgentStatus((payload) => {
     if (!payload) return;
-    console.warn('[Agents status]', payload);
     if (payload.runId) agentsState.runId = payload.runId;
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
     const diagnostics = payload.diagnostics || {};
-    console.warn('[Agents diagnostics]', diagnostics);
-    console.warn('[Agents diagnostics summary]', {
-      phase: diagnostics.phase ?? null,
-      reason: diagnostics.reason ?? null,
-      monitoredTags: diagnostics.monitoredTags ?? null,
-      linkedTags: diagnostics.linkedTags ?? null,
-      validLiveCount: diagnostics.validLiveCount ?? null,
-      staleFilteredCount: diagnostics.staleFilteredCount ?? null,
-      missingTimestampCount: diagnostics.missingTimestampCount ?? null,
-      inferredTimestampCount: diagnostics.inferredTimestampCount ?? null,
-      detectedSubsystemCount: diagnostics.detectedSubsystemCount ?? null,
-      candidateSubsystemCount: diagnostics.candidateSubsystemCount ?? null,
-    });
-    console.warn('[Agents tool calls]', Array.isArray(diagnostics.toolCalls) ? diagnostics.toolCalls : []);
-    console.warn('[Agents stale samples]', Array.isArray(diagnostics.staleSamples) ? diagnostics.staleSamples : []);
+    const phase = diagnostics.phase || '?';
+    console.log(`[Agents] phase=${phase} tags=${diagnostics.monitoredTags ?? '?'}`);
+
+    if (phase === 'cycle_complete') {
+      const signals = diagnostics.subsystemShiftSignals;
+      const subCount = Array.isArray(signals) ? signals.length : 0;
+      const evaluated = (diagnostics.evaluatedLinked || 0) + (diagnostics.evaluatedUnlinked || 0);
+      console.log(`[Agents] cycle_complete: ${subCount} subsystems, ${evaluated} evaluated, ${diagnostics.candidateLinked || 0} candidates`);
+    }
+
+    updateSubsystemHealthFromDiagnostics(diagnostics);
   });
 
   window.api.onAgentEvent((payload) => {
-    console.warn('[Agents event]', payload);
     upsertRealtimeAgentEvent(payload);
   });
 
@@ -3940,7 +4237,7 @@ function ensureAgentListeners() {
 
   window.api.onAgentComplete((payload) => {
     if (!payload) return;
-    console.warn('[Agents complete]', payload);
+    console.log('[Agents] run complete, success=' + payload.success);
     agentsState.status = payload.success ? 'stopped' : 'failed';
     updateAgentStatusUi(agentsState.status, payload.reason || 'Run complete');
     refreshAgentStatus();
@@ -3960,6 +4257,7 @@ function initAgentsTab() {
       await window.api.agentsCleanup(14);
       await loadAgentEvents();
     });
+    el.btnDeepAnalyze?.addEventListener('click', deepAnalyzeSelectedEvent);
     el.btnAck?.addEventListener('click', acknowledgeSelectedAgentEvent);
     el.btnOpenGraph?.addEventListener('click', () => {
       const event = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
@@ -3971,9 +4269,18 @@ function initAgentsTab() {
     el.filterState?.addEventListener('change', loadAgentEvents);
     el.filterSeverity?.addEventListener('change', loadAgentEvents);
     el.filterSearch?.addEventListener('input', renderAgentEventList);
+
+    const clearSubBtn = document.getElementById('btn-agents-clear-subsystem');
+    clearSubBtn?.addEventListener('click', () => {
+      agentsState.selectedSubsystemId = null;
+      clearSubBtn.style.display = 'none';
+      renderSubsystemHealthGrid();
+      renderAgentEventList();
+    });
   }
   refreshAgentStatus();
   loadAgentEvents();
+  renderSubsystemHealthGrid();
 }
 
 // Initialize graph tab when it's first shown
diff --git a/electron-ui/styles.css b/electron-ui/styles.css
index f1e066e..c967b08 100644
--- a/electron-ui/styles.css
+++ b/electron-ui/styles.css
@@ -3036,12 +3036,20 @@ select.input,
 
 .agents-config-row {
   display: grid;
-  grid-template-columns: repeat(14, minmax(0, 1fr));
+  grid-template-columns: repeat(16, minmax(0, 1fr));
   gap: var(--space-2);
   margin-bottom: var(--space-4);
   align-items: center;
 }
 
+.agents-toggle-label {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+  grid-column: span 2;
+  cursor: pointer;
+}
+
 .agents-config-row label {
   font-size: var(--text-xs);
   color: var(--color-text-secondary);
@@ -3081,6 +3089,381 @@ select.input,
   color: var(--color-text);
 }
 
+/* ---- Subsystem Health Dashboard ---- */
+
+.agents-health-section {
+  margin-bottom: var(--space-4);
+}
+
+.agents-health-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: var(--space-3);
+}
+
+.agents-health-header h3 {
+  font-size: var(--text-md);
+  font-weight: 600;
+  color: var(--color-text);
+}
+
+.agents-health-actions {
+  display: flex;
+  gap: var(--space-2);
+  align-items: center;
+}
+
+.agents-health-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
+  gap: var(--space-3);
+}
+
+.agents-health-empty {
+  grid-column: 1 / -1;
+  color: var(--color-text-muted);
+  font-size: var(--text-sm);
+  padding: var(--space-4);
+  text-align: center;
+  border: 1px dashed var(--color-border);
+  border-radius: var(--radius-lg);
+  background: var(--color-bg-panel);
+}
+
+.agents-health-card {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel);
+  border-radius: var(--radius-lg);
+  padding: var(--space-3);
+  cursor: pointer;
+  transition: border-color var(--transition-fast), transform var(--transition-fast), box-shadow var(--transition-fast);
+  position: relative;
+  border-left: 3px solid var(--color-border);
+}
+
+.agents-health-card:hover {
+  border-color: var(--color-border-active);
+  transform: translateY(-1px);
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
+}
+
+.agents-health-card.selected {
+  border-color: var(--color-accent);
+  box-shadow: 0 0 0 1px rgba(34, 211, 238, 0.25) inset, 0 2px 12px rgba(34, 211, 238, 0.08);
+}
+
+.agents-health-card.health-healthy {
+  border-left-color: #22c55e;
+}
+
+.agents-health-card.health-elevated {
+  border-left-color: #eab308;
+}
+
+.agents-health-card.health-warning {
+  border-left-color: #f97316;
+}
+
+.agents-health-card.health-critical {
+  border-left-color: #ef4444;
+}
+
+.health-card-top {
+  display: flex;
+  justify-content: space-between;
+  align-items: flex-start;
+  margin-bottom: var(--space-2);
+}
+
+.health-card-identity {
+  display: flex;
+  align-items: center;
+  gap: var(--space-2);
+  min-width: 0;
+  flex: 1;
+}
+
+.health-indicator {
+  width: 10px;
+  height: 10px;
+  border-radius: 50%;
+  flex-shrink: 0;
+  box-shadow: 0 0 6px currentColor;
+}
+
+.health-indicator.health-healthy {
+  background: #22c55e;
+  color: #22c55e;
+}
+
+.health-indicator.health-elevated {
+  background: #eab308;
+  color: #eab308;
+}
+
+.health-indicator.health-warning {
+  background: #f97316;
+  color: #f97316;
+}
+
+.health-indicator.health-critical {
+  background: #ef4444;
+  color: #ef4444;
+  animation: pulse-critical 2s ease-in-out infinite;
+}
+
+@keyframes pulse-critical {
+  0%, 100% { opacity: 1; box-shadow: 0 0 6px currentColor; }
+  50% { opacity: 0.6; box-shadow: 0 0 12px currentColor; }
+}
+
+.health-card-name {
+  font-size: var(--text-sm);
+  font-weight: 600;
+  color: var(--color-text);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-card-type {
+  font-size: 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+  color: var(--color-text-muted);
+  padding: 1px 6px;
+  border-radius: 999px;
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel-2);
+  flex-shrink: 0;
+  white-space: nowrap;
+}
+
+.health-card-stats {
+  display: grid;
+  grid-template-columns: 1fr 1fr 1fr;
+  gap: var(--space-1);
+  margin-bottom: var(--space-2);
+}
+
+.health-stat {
+  display: flex;
+  flex-direction: column;
+  gap: 1px;
+}
+
+.health-stat-label {
+  font-size: 10px;
+  color: var(--color-text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.3px;
+}
+
+.health-stat-value {
+  font-family: var(--font-mono);
+  font-size: var(--text-sm);
+  color: var(--color-text);
+}
+
+.health-stat-value.has-anomalies {
+  color: #f97316;
+}
+
+.health-stat-value.has-critical {
+  color: #ef4444;
+}
+
+.health-trend {
+  display: flex;
+  align-items: flex-end;
+  gap: 2px;
+  height: 28px;
+  padding-top: var(--space-1);
+  border-top: 1px solid var(--color-border-subtle);
+}
+
+.health-trend-bar {
+  flex: 1;
+  min-width: 3px;
+  max-width: 8px;
+  border-radius: 2px 2px 0 0;
+  transition: height 0.3s ease;
+}
+
+.health-trend-bar.trend-healthy {
+  background: rgba(34, 197, 94, 0.5);
+}
+
+.health-trend-bar.trend-elevated {
+  background: rgba(234, 179, 8, 0.5);
+}
+
+.health-trend-bar.trend-warning {
+  background: rgba(249, 115, 22, 0.6);
+}
+
+.health-trend-bar.trend-critical {
+  background: rgba(239, 68, 68, 0.6);
+}
+
+.health-trend-bar.trend-empty {
+  background: var(--color-border-subtle);
+}
+
+.health-card-health-label {
+  font-size: 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.4px;
+  font-weight: 600;
+  margin-top: 2px;
+}
+
+.health-card-health-label.health-healthy { color: #22c55e; }
+.health-card-health-label.health-elevated { color: #eab308; }
+.health-card-health-label.health-warning { color: #f97316; }
+.health-card-health-label.health-critical { color: #ef4444; }
+
+/* ---- Expanded Subsystem Card ---- */
+
+.agents-health-card.expanded {
+  grid-column: 1 / -1;
+  border-color: var(--color-accent);
+  background: var(--color-bg-elevated);
+}
+
+.health-expanded-body {
+  margin-top: var(--space-3);
+  border-top: 1px solid var(--color-border-subtle);
+  padding-top: var(--space-3);
+}
+
+.health-expanded-trend {
+  display: flex;
+  align-items: flex-end;
+  gap: 3px;
+  height: 48px;
+  margin-bottom: var(--space-3);
+}
+
+.health-expanded-trend .health-trend-bar {
+  max-width: 14px;
+}
+
+.health-tag-list-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: var(--space-2);
+}
+
+.health-tag-list-header h4 {
+  font-size: var(--text-sm);
+  font-weight: 600;
+  color: var(--color-text-secondary);
+}
+
+.health-tag-list-header span {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+}
+
+.health-tag-col-headers {
+  display: grid;
+  grid-template-columns: minmax(110px, 1fr) 120px 55px 55px 55px;
+  gap: var(--space-2);
+  padding: 0 var(--space-2) 2px;
+  font-size: 10px;
+  font-weight: 600;
+  color: var(--color-text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+}
+
+.health-tag-col-headers span:nth-child(n+3) {
+  text-align: right;
+}
+
+.health-tag-list {
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-1);
+  max-height: 320px;
+  overflow-y: auto;
+}
+
+.health-tag-row {
+  display: grid;
+  grid-template-columns: minmax(110px, 1fr) 120px 55px 55px 55px;
+  gap: var(--space-2);
+  align-items: center;
+  padding: 5px var(--space-2);
+  border-radius: var(--radius-sm);
+  background: var(--color-bg-panel);
+  border: 1px solid var(--color-border-subtle);
+  font-size: var(--text-xs);
+}
+
+.health-tag-row:hover {
+  border-color: var(--color-border-active);
+}
+
+.health-tag-name {
+  font-family: var(--font-mono);
+  color: var(--color-text);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-tag-sparkline {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.tag-sparkline {
+  display: block;
+  width: 120px;
+  height: 24px;
+}
+
+.health-tag-zscore {
+  font-family: var(--font-mono);
+  color: var(--color-text-secondary);
+  text-align: right;
+}
+
+.health-tag-zscore.tag-z-healthy { color: #22c55e; }
+.health-tag-zscore.tag-z-elevated { color: #eab308; }
+.health-tag-zscore.tag-z-warning { color: #f97316; }
+.health-tag-zscore.tag-z-critical { color: #ef4444; }
+
+.health-tag-avg {
+  font-family: var(--font-mono);
+  color: var(--color-text-muted);
+  text-align: right;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-tag-value {
+  font-family: var(--font-mono);
+  color: var(--color-text);
+  text-align: right;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-tag-empty {
+  color: var(--color-text-muted);
+  font-size: var(--text-sm);
+  padding: var(--space-3);
+  text-align: center;
+}
+
 .agents-main {
   display: grid;
   grid-template-columns: minmax(300px, 38%) minmax(0, 1fr);
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 54065c8..0d0d91a 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -244,11 +244,13 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         "maxMonitoredTags": 200,
         "maxCandidatesPerCycle": 25,
         "maxCandidatesPerSubsystem": 8,
-        "maxLlmTriagesPerCycle": 5,
-        "maxLlmTriagesPerSubsystem": 2,
+        "maxLlmTriagesPerCycle": 0,
+        "maxLlmTriagesPerSubsystem": 0,
         "dedupCooldownMinutes": 10,
         "retentionDays": 14,
         "cleanupEveryCycles": 40,
+        "historyCacheTtlSec": 30,
+        "tagCacheTtlSec": 60,
         "runMode": "live",
         "scope": {
             "project": None,
@@ -337,6 +339,9 @@ def __init__(self, config: Dict[str, Any], run_id: Optional[str] = None):
         self._running = True
         self._cycle_count = 0
         self._prev_values: Dict[str, float] = {}
+        self._history_cache: Dict[str, Dict[str, Any]] = {}
+        self._tag_cache: Optional[Dict[str, Any]] = None
+        self._tag_cache_at: float = 0.0
 
     # -----------------------------
     # Schema / run lifecycle
@@ -389,6 +394,17 @@ def heartbeat(self, metrics: Dict[str, Any]) -> None:
     # Tag and context collection
     # -----------------------------
     def get_monitored_tags(self) -> List[Dict[str, Any]]:
+        ttl = float(self.config.get("tagCacheTtlSec", 60))
+        now = time.time()
+        if self._tag_cache is not None and ttl > 0 and (now - self._tag_cache_at) < ttl:
+            return self._tag_cache
+
+        result = self._fetch_monitored_tags()
+        self._tag_cache = result
+        self._tag_cache_at = time.time()
+        return result
+
+    def _fetch_monitored_tags(self) -> List[Dict[str, Any]]:
         max_tags = int(self.config.get("maxMonitoredTags", 200))
         scope = self.config.get("scope", {})
         tag_regex = scope.get("tagRegex")
@@ -589,6 +605,12 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
             if not rows and "tagHistory" in history_data and isinstance(history_data["tagHistory"], list):
                 rows = history_data["tagHistory"]
 
+        prefixed = self.api._ensure_provider_prefix(tag_path) if hasattr(self, "api") else tag_path
+        stripped = tag_path
+        if stripped.startswith("[") and "]" in stripped:
+            stripped = stripped[stripped.index("]") + 1:]
+        path_variants = {tag_path, prefixed, stripped}
+
         for row in rows:
             if isinstance(row, (int, float, str)):
                 val = safe_float(row)
@@ -600,21 +622,28 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
             candidate = None
             if "value" in row:
                 candidate = row.get("value")
-            elif tag_path in row:
-                candidate = row.get(tag_path)
             else:
-                # Wide format often has timestamp + one tag column.
-                for k, v in row.items():
-                    if k.lower() in {"timestamp", "ts", "t", "time"}:
-                        continue
-                    candidate = v
-                    break
+                matched_key = next((k for k in path_variants if k in row), None)
+                if matched_key:
+                    candidate = row.get(matched_key)
+                elif len(row) <= 2:
+                    for k, v in row.items():
+                        if k.lower() in {"timestamp", "ts", "t", "time"}:
+                            continue
+                        candidate = v
+                        break
             val = safe_float(candidate)
             if val is not None:
                 values.append(val)
         return values
 
     def fetch_history_values(self, tag_path: str) -> tuple[List[float], Optional[str]]:
+        ttl = float(self.config.get("historyCacheTtlSec", 30))
+        now = time.time()
+        cached = self._history_cache.get(tag_path)
+        if cached and ttl > 0 and (now - cached["fetched_at"]) < ttl:
+            return list(cached["values"]), cached.get("error")
+
         minutes = int(self.config.get("historyWindowMinutes", 360))
         end_dt = datetime.now(timezone.utc)
         start_dt = end_dt - timedelta(minutes=minutes)
@@ -627,8 +656,61 @@ def fetch_history_values(self, tag_path: str) -> tuple[List[float], Optional[str
             return_format="Wide",
         )
         if isinstance(data, dict) and data.get("error"):
-            return [], str(data.get("error"))
-        return self._extract_history_values(data, tag_path), None
+            err = str(data.get("error"))
+            self._history_cache[tag_path] = {"values": [], "error": err, "fetched_at": now}
+            return [], err
+        values = self._extract_history_values(data, tag_path)
+        self._history_cache[tag_path] = {"values": values, "error": None, "fetched_at": now}
+        return values, None
+
+    def fetch_history_batch(self, tag_paths: List[str]) -> Dict[str, Tuple[List[float], Optional[str]]]:
+        """Fetch history for many tags, using cache and batched API calls."""
+        ttl = float(self.config.get("historyCacheTtlSec", 30))
+        now = time.time()
+        results: Dict[str, Tuple[List[float], Optional[str]]] = {}
+        uncached: List[str] = []
+
+        for path in tag_paths:
+            cached = self._history_cache.get(path)
+            if cached and ttl > 0 and (now - cached["fetched_at"]) < ttl:
+                results[path] = (list(cached["values"]), cached.get("error"))
+            else:
+                uncached.append(path)
+
+        if not uncached:
+            return results
+
+        minutes = int(self.config.get("historyWindowMinutes", 360))
+        end_dt = datetime.now(timezone.utc)
+        start_dt = end_dt - timedelta(minutes=minutes)
+        return_size = max(100, int(self.config.get("minHistoryPoints", 30)) * 4)
+        batch_size = 20
+
+        for i in range(0, len(uncached), batch_size):
+            batch = uncached[i : i + batch_size]
+            data = self.api.query_tag_history(
+                batch,
+                start_dt.isoformat(),
+                end_dt.isoformat(),
+                return_size=return_size,
+                aggregation_mode="Average",
+                return_format="Wide",
+            )
+            fetch_ts = time.time()
+
+            if isinstance(data, dict) and data.get("error"):
+                err = str(data.get("error"))
+                for path in batch:
+                    results[path] = ([], err)
+                    self._history_cache[path] = {"values": [], "error": err, "fetched_at": fetch_ts}
+                continue
+
+            for path in batch:
+                values = self._extract_history_values(data, path)
+                results[path] = (values, None)
+                self._history_cache[path] = {"values": values, "error": None, "fetched_at": fetch_ts}
+
+        return results
 
     def get_context(self, tag_path: str) -> Dict[str, Any]:
         with self.graph.session() as session:
@@ -648,7 +730,7 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
                        collect(DISTINCT eq.name) AS equipment,
                        collect(DISTINCT s.symptom) AS symptoms,
                        collect(DISTINCT fc.cause) AS causes,
-                       collect(DISTINCT p.pattern_name) AS patterns,
+                       collect(DISTINCT p.name) AS patterns,
                        collect(DISTINCT se.name) AS safety
                 LIMIT 1
                 """,
@@ -1024,8 +1106,6 @@ def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
         thresholds = self.config.get("thresholds", {})
         stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
-        progress_emit_interval_tags = max(5, int(self.config.get("progressEveryTags", 10)))
-        progress_emit_interval_sec = max(1, int(self.config.get("progressEverySec", 2)))
         metrics = {
             "candidates": 0,
             "triaged": 0,
@@ -1083,6 +1163,24 @@ def run_cycle(self) -> Dict[str, Any]:
                 for t in tags
             }
         )
+
+        subsystem_tag_map: Dict[str, Dict[str, Any]] = {}
+        for t in tags:
+            sub = t.get("primary_subsystem") or _subsystem_ref("global", "all")
+            sub_id = sub.get("id", "global:all")
+            bucket = subsystem_tag_map.setdefault(sub_id, {
+                "type": sub.get("type", "global"),
+                "name": sub.get("name", "all"),
+                "tags": [],
+            })
+            bucket["tags"].append({
+                "path": t["path"],
+                "name": t.get("name", t["path"]),
+                "views": t.get("views", []),
+                "equipment": t.get("equipment", []),
+                "allSubsystems": [s.get("id") for s in (t.get("subsystems") or [])],
+            })
+
         live_values = self.api.read_tags(tag_paths)
         tool_calls: List[Dict[str, Any]] = []
         tool_calls.append({
@@ -1154,7 +1252,7 @@ def run_cycle(self) -> Dict[str, Any]:
         total_live_count = len(live_values)
         last_progress_emit = 0.0
 
-        def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
+        def emit_cycle_progress(reason: str, current_tag: str = "", include_tag_map: bool = False) -> None:
             nonlocal last_progress_emit
             diag = make_default_diagnostics(
                 staleness_threshold_sec=stale_threshold_sec,
@@ -1170,9 +1268,14 @@ def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
                 "qualityFilteredCount": quality_filtered_count,
                 "staleFilteredCount": stale_filtered_count,
                 "historyErrorCount": history_error_count,
+                "monitoredTags": len(tags),
                 "linkedTags": linked_tag_count,
                 "unlinkedTags": unlinked_tag_count,
+                "detectedSubsystemCount": len(detected_subsystems),
+                "detectedSubsystems": detected_subsystems[:10],
             })
+            if include_tag_map:
+                diag["subsystemTagMap"] = subsystem_tag_map
             emit("AGENT_STATUS", {
                 "runId": self.run_id,
                 "state": "running",
@@ -1185,14 +1288,16 @@ def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
             })
             last_progress_emit = time.time()
 
-        emit_cycle_progress("cycle_started")
+        emit_cycle_progress("cycle_started", include_tag_map=True)
 
         def _update_subsystem_signal(
-            subsystem_ref: Dict[str, str], deterministic: Dict[str, Any], tag_path: str
+            subsystem_ref: Dict[str, str], deterministic: Dict[str, Any],
+            tag_path: str, live_value: Any = None,
         ) -> None:
             sub_id = subsystem_ref.get("id", "global:all")
             abs_z = abs(float(deterministic.get("z_score", 0.0)))
             z = float(deterministic.get("z_score", 0.0))
+            mad = float(deterministic.get("mad_score", 0.0))
             bucket = subsystem_shift_signals.setdefault(
                 sub_id,
                 {
@@ -1206,6 +1311,7 @@ def _update_subsystem_signal(
                     "sumZ": 0.0,
                     "maxAbsZ": 0.0,
                     "sampleTag": tag_path,
+                    "_tagEntries": [],
                 },
             )
             bucket["evaluated"] += 1
@@ -1216,21 +1322,29 @@ def _update_subsystem_signal(
             if abs_z > bucket["maxAbsZ"]:
                 bucket["maxAbsZ"] = abs_z
                 bucket["sampleTag"] = tag_path
+            tag_name = tag_path.rsplit("/", 1)[-1] if "/" in str(tag_path) else str(tag_path)
+            bucket["_tagEntries"].append({
+                "path": str(tag_path),
+                "name": tag_name,
+                "z": round(z, 3),
+                "absZ": round(abs_z, 3),
+                "mad": round(mad, 3),
+                "value": live_value,
+            })
 
-        for tv in live_values:
+        # ---- Phase 1: Filter live values (no I/O) ----
+        TagEntry = Tuple[Any, Dict[str, Any], Dict[str, str], bool]  # (tv, tag_meta, subsystem, is_linked)
+        tags_for_history: List[TagEntry] = []
+
+        for idx, tv in enumerate(live_values):
             processed_live_count += 1
-            tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
+            tag_meta = (
+                tags[idx] if idx < len(tags)
+                else tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
+            )
             subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
             is_linked = bool(tag_meta.get("views") or tag_meta.get("equipment"))
 
-            now_progress = time.time()
-            if (
-                processed_live_count == 1
-                or processed_live_count % progress_emit_interval_tags == 0
-                or (now_progress - last_progress_emit) >= progress_emit_interval_sec
-            ):
-                emit_cycle_progress("processing_live_tags", current_tag=tv.path)
-
             if tv.error:
                 live_error_count += 1
                 if is_linked:
@@ -1246,7 +1360,6 @@ def _update_subsystem_signal(
             if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred")):
                 inferred_timestamp_count += 1
             if not is_quality_good(tv.quality):
-                # quality gate: only emit quality anomalies if this persists via triage.
                 quality_filtered_count += 1
                 if is_linked:
                     quality_filtered_linked += 1
@@ -1255,7 +1368,6 @@ def _update_subsystem_signal(
                 continue
             parsed_ts = parse_timestamp(tv.timestamp)
             age_sec = (now - parsed_ts).total_seconds() if parsed_ts is not None else None
-            stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
             if is_stale(tv.timestamp, stale_threshold_sec, now=now):
                 stale_filtered_count += 1
                 if is_linked:
@@ -1273,7 +1385,27 @@ def _update_subsystem_signal(
                     })
                 continue
 
-            history, history_error = self.fetch_history_values(tv.path)
+            tags_for_history.append((tv, tag_meta, subsystem, is_linked))
+
+        emit_cycle_progress(
+            "filtering_complete",
+            current_tag=f"{len(tags_for_history)} tags passed filters",
+        )
+
+        # ---- Phase 2: Batched history fetch ----
+        history_fetch_start = time.time()
+        history_paths = [tv.path for tv, _, _, _ in tags_for_history]
+        history_results = self.fetch_history_batch(history_paths) if history_paths else {}
+        history_fetch_elapsed = time.time() - history_fetch_start
+        emit_cycle_progress(
+            "history_complete",
+            current_tag=f"{len(history_results)} in {round(history_fetch_elapsed, 1)}s",
+        )
+
+        # ---- Phase 3: Score and build candidates using pre-fetched history ----
+        for tv, tag_meta, subsystem, is_linked in tags_for_history:
+            history, history_error = history_results.get(tv.path, ([], "No history result"))
+
             if len(tool_calls) < 18:
                 tool_calls.append({
                     "tool": "query_tag_history",
@@ -1297,8 +1429,6 @@ def _update_subsystem_signal(
                 continue
             if len(history) < min_history:
                 insufficient_history_count += 1
-                # Low-history fallback: still score dramatic shifts when at least a
-                # small baseline exists, otherwise simulator users see no events.
                 if len(history) >= 5:
                     prev_val = self._prev_values.get(tv.path)
                     deterministic = compute_deviation_scores(
@@ -1311,7 +1441,7 @@ def _update_subsystem_signal(
                     if curr_num is not None:
                         self._prev_values[tv.path] = curr_num
 
-                    _update_subsystem_signal(subsystem, deterministic, tv.path)
+                    _update_subsystem_signal(subsystem, deterministic, tv.path, live_value=tv.value)
                     if is_linked:
                         evaluated_linked += 1
                     else:
@@ -1337,6 +1467,7 @@ def _update_subsystem_signal(
                                 "sumZ": 0.0,
                                 "maxAbsZ": 0.0,
                                 "sampleTag": tv.path,
+                                "_tagEntries": [],
                             },
                         )
                         sub_bucket["candidate"] += 1
@@ -1379,7 +1510,7 @@ def _update_subsystem_signal(
             if curr_num is not None:
                 self._prev_values[tv.path] = curr_num
 
-            _update_subsystem_signal(subsystem, deterministic, tv.path)
+            _update_subsystem_signal(subsystem, deterministic, tv.path, live_value=tv.value)
             if is_linked:
                 evaluated_linked += 1
             else:
@@ -1405,6 +1536,7 @@ def _update_subsystem_signal(
                         "sumZ": 0.0,
                         "maxAbsZ": 0.0,
                         "sampleTag": tv.path,
+                        "_tagEntries": [],
                     },
                 )
                 sub_bucket["candidate"] += 1
@@ -1432,6 +1564,8 @@ def _update_subsystem_signal(
                 sub_id = subsystem.get("id", "global:all")
                 candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
 
+        emit_cycle_progress("scoring_complete")
+
         if live_values and live_error_count == len(live_values):
             emitted = self.emit_provider_failure_event(
                 "live_tag_provider_failed",
@@ -1513,9 +1647,16 @@ def _update_subsystem_signal(
         llm_per_subsystem: Dict[str, int] = {}
         dedup_suppressed_count = 0
 
-        for candidate in shortlisted:
+        if shortlisted:
+            emit_cycle_progress(
+                "triage_started",
+                current_tag=f"{len(shortlisted)} candidates to process",
+            )
+
+        for ci, candidate in enumerate(shortlisted):
             subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
             sub_id = subsystem.get("id", "global:all")
+            tag_name = candidate["context"].get("tag_name", candidate["context"].get("tag_path", "?"))
             use_llm = (
                 llm_total < max_triage_total
                 and llm_per_subsystem.get(sub_id, 0) < max_triage_per_subsystem
@@ -1538,7 +1679,7 @@ def _update_subsystem_signal(
                     "verification_checks": [],
                     "probable_causes": [],
                     "safety_notes": [],
-                    "rationale": "Triaged in deterministic-only mode due per-cycle/per-subsystem LLM caps.",
+                    "rationale": "Deterministic-only triage (LLM triage disabled or cap reached).",
                     "related_entities": [],
                 }
             )
@@ -1559,6 +1700,12 @@ def _update_subsystem_signal(
             else:
                 dedup_suppressed_count += 1
 
+            if (ci + 1) % 5 == 0 or ci == len(shortlisted) - 1:
+                emit_cycle_progress(
+                    "triaging",
+                    current_tag=f"{ci + 1}/{len(shortlisted)} ({tag_name})",
+                )
+
         top_candidates_by_subsystem = dict(
             sorted(candidate_subsystem_counts.items(), key=lambda item: item[1], reverse=True)[:10]
         )
@@ -1571,7 +1718,8 @@ def _update_subsystem_signal(
                 int(item.get("evaluated", 0)),
             ),
             reverse=True,
-        )[:8]
+        )
+        sparkline_size = 20
         for item in top_shift_signals:
             evaluated = max(1, int(item.get("evaluated", 0)))
             item["avgAbsZ"] = round(float(item.get("sumAbsZ", 0.0)) / evaluated, 3)
@@ -1580,6 +1728,22 @@ def _update_subsystem_signal(
             item["candidateRatio"] = round(float(item.get("candidate", 0)) / evaluated, 3)
             item.pop("sumAbsZ", None)
             item.pop("sumZ", None)
+            raw_tags = item.pop("_tagEntries", [])
+            sorted_tags = sorted(raw_tags, key=lambda t: t.get("absZ", 0.0), reverse=True)
+            tag_signals = []
+            for t in sorted_tags:
+                entry = {k: v for k, v in t.items() if k != "absZ"}
+                cached_hist = self._history_cache.get(t.get("path", ""))
+                if cached_hist and cached_hist.get("values"):
+                    vals = cached_hist["values"]
+                    entry["avg"] = round(sum(vals) / len(vals), 2)
+                    if len(vals) <= sparkline_size:
+                        entry["sparkline"] = [round(v, 2) for v in vals]
+                    else:
+                        step = len(vals) / sparkline_size
+                        entry["sparkline"] = [round(vals[int(i * step)], 2) for i in range(sparkline_size)]
+                tag_signals.append(entry)
+            item["tagSignals"] = tag_signals
 
         metrics["diagnostics"] = {
             **make_default_diagnostics(
@@ -1619,6 +1783,7 @@ def _update_subsystem_signal(
             "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
             "detectedSubsystemCount": len(detected_subsystems),
             "detectedSubsystems": detected_subsystems[:10],
+            "subsystemTagMap": subsystem_tag_map,
             "candidateSubsystemCount": len(candidate_subsystem_counts),
             "candidateBySubsystem": top_candidates_by_subsystem,
             "subsystemShiftSignals": top_shift_signals,
@@ -1786,6 +1951,74 @@ def clear_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
                 return {"success": False, "error": f"Event not found: {event_id}"}
         return {"success": True, "eventId": event_id}
 
+    def deep_analyze(self, event_id: str) -> Dict[str, Any]:
+        """Run LLM triage on an existing event and update it in-place."""
+        event = self.graph.get_anomaly_event(event_id)
+        if not event:
+            return {"success": False, "error": f"Event not found: {event_id}"}
+
+        tag_path = event.get("source_tag") or event.get("tag_name", "")
+        if not tag_path:
+            return {"success": False, "error": "Event has no source_tag"}
+
+        context = self.get_context(tag_path)
+        context["subsystem"] = {
+            "id": event.get("subsystem_id", "global:all"),
+            "type": event.get("subsystem_type", "global"),
+            "name": event.get("subsystem_name", "all"),
+        }
+
+        deterministic = {
+            "candidate": True,
+            "z_score": float(event.get("z_score", 0)),
+            "mad_score": float(event.get("mad_score", 0)),
+            "delta_rate": float(event.get("delta_rate", 0)),
+            "window_volatility": float(event.get("window_volatility", 0)),
+            "reasons": json.loads(event.get("deterministic_reasons_json", "[]")),
+            "category": event.get("category", "deviation"),
+        }
+
+        live_sample = {
+            "value": event.get("live_value"),
+            "quality": event.get("live_quality"),
+            "timestamp": event.get("live_timestamp"),
+        }
+
+        if not self.llm:
+            return {"success": False, "error": "LLM client not configured"}
+
+        triage = self.run_llm_triage(context, deterministic, live_sample)
+
+        severity = self._severity_from_scores(deterministic, triage)
+        with self.graph.session() as session:
+            session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                SET e.summary = $summary,
+                    e.explanation = $explanation,
+                    e.severity = $severity,
+                    e.confidence = $confidence,
+                    e.recommended_checks_json = $checks,
+                    e.probable_causes_json = $causes,
+                    e.safety_notes_json = $safety,
+                    e.updated_at = $updated_at,
+                    e.llm_triaged = true
+                RETURN e
+                """,
+                event_id=event_id,
+                summary=triage.get("summary", ""),
+                explanation=triage.get("rationale", ""),
+                severity=severity,
+                confidence=float(max(0.0, min(1.0, triage.get("confidence", 0.5)))),
+                checks=json.dumps(triage.get("verification_checks", []), default=str),
+                causes=json.dumps(triage.get("probable_causes", []), default=str),
+                safety=json.dumps(triage.get("safety_notes", []), default=str),
+                updated_at=utc_now_iso(),
+            )
+
+        updated_event = self.graph.get_anomaly_event(event_id)
+        return {"success": True, "event": updated_event}
+
     def get_status(self, run_id: str) -> Dict[str, Any]:
         with self.graph.session() as session:
             result = session.run(
@@ -1890,6 +2123,9 @@ def main() -> int:
     p_clear.add_argument("--event-id", required=True)
     p_clear.add_argument("--note")
 
+    p_deep = sub.add_parser("deep-analyze", help="Run LLM triage on an existing event")
+    p_deep.add_argument("--event-id", required=True)
+
     p_cleanup = sub.add_parser("cleanup", help="Delete old anomaly events")
     p_cleanup.add_argument("--retention-days", type=int, default=14)
 
@@ -1942,6 +2178,10 @@ def _signal_handler(_signum, _frame):
         print(json.dumps(monitor.clear_event(args.event_id, args.note), default=str))
         return 0
 
+    if args.command == "deep-analyze":
+        print(json.dumps(monitor.deep_analyze(args.event_id), default=str))
+        return 0
+
     if args.command == "cleanup":
         deleted = monitor.graph.cleanup_anomaly_events(args.retention_days)
         print(json.dumps({"success": True, "deleted": deleted}))
diff --git a/tests/quick_import_test.py b/tests/quick_import_test.py
new file mode 100644
index 0000000..1edd415
--- /dev/null
+++ b/tests/quick_import_test.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+"""
+Quick test script to verify import files are ready.
+Shows what will be imported without needing CODESYS running.
+"""
+
+import os
+from pathlib import Path
+
+
+def analyze_import_directory(import_dir):
+    """Analyze what will be imported."""
+    import_path = Path(import_dir)
+    
+    if not import_path.exists():
+        print(f"Error: Directory not found: {import_dir}")
+        return
+    
+    print(f"Analyzing import directory: {import_dir}\n")
+    print("=" * 60)
+    
+    pous = []
+    gvls = []
+    
+    for st_file in import_path.rglob("*.st"):
+        filename = st_file.name
+        
+        if filename.endswith('.prg.st'):
+            name = filename.replace('.prg.st', '')
+            pous.append(('Program', name, st_file))
+        elif filename.endswith('.fb.st'):
+            name = filename.replace('.fb.st', '')
+            pous.append(('FunctionBlock', name, st_file))
+        elif filename.endswith('.fun.st'):
+            name = filename.replace('.fun.st', '')
+            pous.append(('Function', name, st_file))
+        elif filename.endswith('.gvl.st'):
+            name = filename.replace('.gvl.st', '')
+            gvls.append((name, st_file))
+    
+    print(f"\nPOUs to import: {len(pous)}")
+    for pou_type, name, filepath in pous:
+        print(f"  - {name} ({pou_type})")
+        # Show first few lines
+        with open(filepath, 'r', encoding='utf-8') as f:
+            lines = f.readlines()[:5]
+            for line in lines:
+                print(f"    {line.rstrip()}")
+        print()
+    
+    print(f"\nGVLs to import: {len(gvls)}")
+    for name, filepath in gvls:
+        print(f"  - {name}")
+        # Show content
+        with open(filepath, 'r', encoding='utf-8') as f:
+            content = f.read()
+            print(f"    {content.strip()}")
+        print()
+    
+    print("=" * 60)
+    print(f"\nTotal: {len(pous)} POUs, {len(gvls)} GVLs")
+    print(f"\nTo import, run inside CODESYS:")
+    print(f'  codesys_import.py "<project_path>" "{import_dir}"')
+
+
+if __name__ == "__main__":
+    import sys
+    
+    if len(sys.argv) < 2:
+        print("Usage: python quick_import_test.py <import_dir>")
+        print("\nExample:")
+        print("  python quick_import_test.py test_cross_applied_export")
+        sys.exit(1)
+    
+    analyze_import_directory(sys.argv[1])
+

From e6c52dac54e6be9d642bdda12eabd6ce33f283f4 Mon Sep 17 00:00:00 2001
From: Leor Barak Fishman <leor.fishman@gmail.com>
Date: Mon, 9 Mar 2026 08:10:28 -0700
Subject: [PATCH 17/18] more agentics

---
 electron-ui/index.html     |   55 +-
 electron-ui/main.js        |   45 +-
 electron-ui/preload.js     |    4 +-
 electron-ui/renderer.js    |  680 ++++-----
 electron-ui/styles.css     |  254 ++--
 scripts/anomaly_monitor.py | 2701 +++++++++++++-----------------------
 scripts/neo4j_ontology.py  |    3 +
 7 files changed, 1532 insertions(+), 2210 deletions(-)

diff --git a/electron-ui/index.html b/electron-ui/index.html
index 08adc5d..0f6e719 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -542,14 +542,14 @@ <h2>Troubleshooting Assistant</h2>
       <!-- Agents Tab -->
       <section class="tab-content" id="tab-agents">
         <header class="tab-header">
-          <h2>Long-Running Agents</h2>
-          <p>Continuously monitor live process data and triage anomalies with ontology context</p>
+          <h2>Subsystem Agents</h2>
+          <p>Per-subsystem anomaly monitoring with independent agent threads</p>
         </header>
 
         <div class="agents-topbar">
           <div class="agents-run-controls">
-            <button class="btn btn-primary" id="btn-agents-start">Start Monitoring</button>
-            <button class="btn btn-secondary" id="btn-agents-stop" disabled>Stop</button>
+            <button class="btn btn-primary" id="btn-agents-start">Start All Agents</button>
+            <button class="btn btn-secondary" id="btn-agents-stop" disabled>Stop All</button>
             <button class="btn btn-ghost" id="btn-agents-refresh">Refresh Events</button>
             <button class="btn btn-ghost" id="btn-agents-cleanup">Cleanup Old</button>
           </div>
@@ -579,30 +579,10 @@ <h2>Long-Running Agents</h2>
           <input class="input input-sm" id="agents-config-staleness-sec" type="number" min="10" step="10" value="120">
         </div>
 
-        <div class="agents-metrics-row">
-          <div class="metric-card"><span class="metric-label">Cycle (ms)</span><span class="metric-value" id="agents-metric-cycle">0</span></div>
-          <div class="metric-card"><span class="metric-label">Candidates</span><span class="metric-value" id="agents-metric-candidates">0</span></div>
-          <div class="metric-card"><span class="metric-label">Triaged</span><span class="metric-value" id="agents-metric-triaged">0</span></div>
-          <div class="metric-card"><span class="metric-label">Emitted</span><span class="metric-value" id="agents-metric-emitted">0</span></div>
-          <div class="metric-card"><span class="metric-label">Last heartbeat</span><span class="metric-value" id="agents-metric-heartbeat">n/a</span></div>
-        </div>
-
         <div class="agents-health-section">
           <div class="agents-health-header">
-            <h3>Subsystem Health</h3>
+            <h3>Subsystem Agents</h3>
             <div class="agents-health-actions">
-              <button class="btn btn-ghost btn-sm" id="btn-agents-clear-subsystem" style="display:none">Show All</button>
-            </div>
-          </div>
-          <div class="agents-health-grid" id="agents-health-grid">
-            <div class="agents-health-empty">Start monitoring to see subsystem health.</div>
-          </div>
-        </div>
-
-        <div class="agents-main">
-          <aside class="agents-feed-panel">
-            <div class="agents-feed-header">
-              <h3>Anomaly Feed</h3>
               <div class="agents-feed-filters">
                 <select class="input input-sm" id="agents-filter-state">
                   <option value="">All states</option>
@@ -619,25 +599,12 @@ <h3>Anomaly Feed</h3>
                 </select>
                 <input class="input input-sm" id="agents-filter-search" placeholder="Search tag/equipment">
               </div>
+              <button class="btn btn-ghost btn-sm" id="btn-agents-clear-subsystem" style="display:none">Show All</button>
             </div>
-            <div class="agents-event-list" id="agents-event-list">
-              <div class="agents-empty">No anomaly events yet.</div>
-            </div>
-          </aside>
-
-          <section class="agents-detail-panel">
-            <div class="agents-detail-header">
-              <h3>Event Details</h3>
-              <div class="agents-detail-actions">
-                <button class="btn btn-sm btn-primary" id="btn-agents-deep-analyze" disabled>Deep Analyze</button>
-                <button class="btn btn-sm btn-secondary" id="btn-agents-open-graph" disabled>Open in Graph</button>
-                <button class="btn btn-sm btn-ghost" id="btn-agents-ack" disabled>Acknowledge</button>
-              </div>
-            </div>
-            <div class="agents-detail-content" id="agents-event-detail">
-              <p class="text-muted">Select an anomaly event from the feed.</p>
-            </div>
-          </section>
+          </div>
+          <div class="agents-health-grid" id="agents-health-grid">
+            <div class="agents-health-empty">Start monitoring to see subsystem agents.</div>
+          </div>
         </div>
       </section>
 
@@ -1537,8 +1504,6 @@ <h3 id="graph-modal-title">Graph: Node</h3>
   <script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.28.1/cytoscape.min.js"></script>
   <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
   <script src="https://unpkg.com/cytoscape-dagre@2.5.0/cytoscape-dagre.js"></script>
-  <script src="https://unpkg.com/layout-base@2.0.1/layout-base.js"></script>
-  <script src="https://unpkg.com/cose-base@2.2.0/cose-base.js"></script>
   <script src="https://unpkg.com/cytoscape-fcose@2.2.0/cytoscape-fcose.js"></script>
   
   <script src="graph-renderer.js"></script>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index f3034f1..ae5c557 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -215,8 +215,8 @@ function normalizeAgentConfig(config = {}) {
     maxMonitoredTags: Math.max(10, Number(config.maxMonitoredTags || 200)),
     maxCandidatesPerCycle: Math.max(1, Number(config.maxCandidatesPerCycle || 25)),
     maxCandidatesPerSubsystem: Math.max(1, Number(config.maxCandidatesPerSubsystem || 8)),
-    maxLlmTriagesPerCycle: Math.max(0, Number(config.maxLlmTriagesPerCycle || 5)),
-    maxLlmTriagesPerSubsystem: Math.max(0, Number(config.maxLlmTriagesPerSubsystem || 2)),
+    maxLlmTriagesPerCycle: Math.max(0, Number(config.maxLlmTriagesPerCycle ?? 5)),
+    maxLlmTriagesPerSubsystem: Math.max(0, Number(config.maxLlmTriagesPerSubsystem ?? 2)),
     dedupCooldownMinutes: Math.max(1, Number(config.dedupCooldownMinutes || 10)),
     retentionDays: Math.max(1, Number(config.retentionDays || 14)),
     cleanupEveryCycles: Math.max(1, Number(config.cleanupEveryCycles || 40)),
@@ -1749,17 +1749,18 @@ ipcMain.handle('agents:clear-event', async (event, eventId, note = '') => {
   }
 });
 
-ipcMain.handle('agents:deep-analyze', async (event, eventId) => {
-  try {
-    const output = await runPythonScript('anomaly_monitor.py', [
-      'deep-analyze',
-      '--event-id',
-      String(eventId),
-    ]);
-    return JSON.parse(output || '{}');
-  } catch (error) {
-    return { success: false, error: error.message };
+ipcMain.handle('agents:deep-analyze', async (event, eventId, eventData) => {
+  if (!activeAgentRun || !activeAgentRun.process || activeAgentRun.process.killed) {
+    return { success: false, error: 'No active agent run — deep analyze requires a running agent' };
+  }
+  if (!eventData || !eventData.event_id) {
+    return { success: false, error: 'Missing event data' };
+  }
+  const sent = sendAgentCommand({ cmd: 'deep-analyze', event: eventData });
+  if (!sent) {
+    return { success: false, error: 'Failed to send command to agent process' };
   }
+  return { success: true, pending: true, eventId: eventData.event_id };
 });
 
 ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
@@ -1773,4 +1774,24 @@ ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
   } catch (error) {
     return { success: false, error: error.message };
   }
+});
+
+function sendAgentCommand(cmd) {
+  if (activeAgentRun && activeAgentRun.process && activeAgentRun.process.stdin && activeAgentRun.process.stdin.writable) {
+    activeAgentRun.process.stdin.write(JSON.stringify(cmd) + '\n');
+    return true;
+  }
+  return false;
+}
+
+ipcMain.handle('agents:start-subsystem', async (event, subsystemId) => {
+  if (!activeAgentRun) return { success: false, error: 'No active agent run' };
+  const sent = sendAgentCommand({ cmd: 'start-agent', subsystemId });
+  return { success: sent, subsystemId };
+});
+
+ipcMain.handle('agents:stop-subsystem', async (event, subsystemId) => {
+  if (!activeAgentRun) return { success: false, error: 'No active agent run' };
+  const sent = sendAgentCommand({ cmd: 'stop-agent', subsystemId });
+  return { success: sent, subsystemId };
 });
\ No newline at end of file
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index cf1d75c..7615063 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -79,8 +79,10 @@ contextBridge.exposeInMainWorld('api', {
   agentsGetEvent: (eventId) => ipcRenderer.invoke('agents:get-event', eventId),
   agentsAckEvent: (eventId, note) => ipcRenderer.invoke('agents:ack-event', eventId, note),
   agentsClearEvent: (eventId, note) => ipcRenderer.invoke('agents:clear-event', eventId, note),
-  agentsDeepAnalyze: (eventId) => ipcRenderer.invoke('agents:deep-analyze', eventId),
+  agentsDeepAnalyze: (eventId, eventData) => ipcRenderer.invoke('agents:deep-analyze', eventId, eventData),
   agentsCleanup: (retentionDays) => ipcRenderer.invoke('agents:cleanup', retentionDays),
+  agentsStartSubsystem: (subId) => ipcRenderer.invoke('agents:start-subsystem', subId),
+  agentsStopSubsystem: (subId) => ipcRenderer.invoke('agents:stop-subsystem', subId),
   
   // Database connections
   getDbConnections: () => ipcRenderer.invoke('get-db-connections'),
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index bba8767..93bf9b4 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3536,7 +3536,7 @@ btnSaveDbCreds?.addEventListener('click', async () => {
     btnSaveDbCreds.disabled = false;
   }
 });
-// Agents Tab - Long-running monitoring
+// Agents Tab — Per-subsystem agent monitoring
 // ============================================
 
 const HEALTH_TREND_MAX_CYCLES = 20;
@@ -3550,6 +3550,8 @@ const agentsState = {
   listenersReady: false,
   subsystemHealth: {},
   subsystemHistory: {},
+  agentStates: {},
+  pendingDeepAnalyze: new Set(),
 };
 
 function getAgentsElements() {
@@ -3558,21 +3560,11 @@ function getAgentsElements() {
     btnStop: document.getElementById('btn-agents-stop'),
     btnRefresh: document.getElementById('btn-agents-refresh'),
     btnCleanup: document.getElementById('btn-agents-cleanup'),
-    btnDeepAnalyze: document.getElementById('btn-agents-deep-analyze'),
-    btnOpenGraph: document.getElementById('btn-agents-open-graph'),
-    btnAck: document.getElementById('btn-agents-ack'),
     statusChip: document.getElementById('agents-status-chip'),
     statusText: document.getElementById('agents-status-text'),
-    list: document.getElementById('agents-event-list'),
-    detail: document.getElementById('agents-event-detail'),
     filterState: document.getElementById('agents-filter-state'),
     filterSeverity: document.getElementById('agents-filter-severity'),
     filterSearch: document.getElementById('agents-filter-search'),
-    metricCycle: document.getElementById('agents-metric-cycle'),
-    metricCandidates: document.getElementById('agents-metric-candidates'),
-    metricTriaged: document.getElementById('agents-metric-triaged'),
-    metricEmitted: document.getElementById('agents-metric-emitted'),
-    metricHeartbeat: document.getElementById('agents-metric-heartbeat'),
     cfgPoll: document.getElementById('agents-config-poll-ms'),
     cfgHist: document.getElementById('agents-config-history-min'),
     cfgPoints: document.getElementById('agents-config-min-points'),
@@ -3610,16 +3602,14 @@ function formatAgentTime(ts) {
   if (!ts) return 'n/a';
   const d = new Date(ts);
   if (Number.isNaN(d.getTime())) return String(ts);
-  return d.toLocaleString();
+  return d.toLocaleTimeString();
 }
 
 function computeHealthLevel(signal) {
-  const avgAbsZ = parseFloat(signal.avgAbsZ || 0);
-  const candidateRatio = parseFloat(signal.candidateRatio || 0);
   const maxAbsZ = parseFloat(signal.maxAbsZ || 0);
-  if (candidateRatio >= 0.25 || maxAbsZ >= 5) return 'critical';
-  if (candidateRatio >= 0.10 || avgAbsZ >= 2.5) return 'warning';
-  if (signal.shiftRatio > 0.1 || avgAbsZ >= 1.5) return 'elevated';
+  if (maxAbsZ >= 5) return 'critical';
+  if (maxAbsZ >= 3) return 'warning';
+  if (maxAbsZ >= 1.5) return 'elevated';
   return 'healthy';
 }
 
@@ -3627,68 +3617,153 @@ function healthLevelToScore(level) {
   return { healthy: 0.1, elevated: 0.4, warning: 0.7, critical: 1.0 }[level] || 0.1;
 }
 
-function updateSubsystemHealthFromDiagnostics(diagnostics) {
-  const tagMap = diagnostics?.subsystemTagMap;
-  if (tagMap && typeof tagMap === 'object') {
-    for (const [subId, info] of Object.entries(tagMap)) {
-      if (!agentsState.subsystemHealth[subId]) {
-        agentsState.subsystemHealth[subId] = {
-          subsystemId: subId,
+function getSubsystemIdForEvent(event) {
+  return event.subsystem_id
+    || `${(event.subsystem_type || 'global')}:${(event.subsystem_name || 'all').toLowerCase()}`;
+}
+
+function getFilteredEventsForSubsystem(subId) {
+  const el = getAgentsElements();
+  const stateFilter = (el.filterState?.value || '').toLowerCase();
+  const sevFilter = (el.filterSeverity?.value || '').toLowerCase();
+  const search = (el.filterSearch?.value || '').trim().toLowerCase();
+  return agentsState.events.filter((event) => {
+    if (getSubsystemIdForEvent(event) !== subId) return false;
+    if (stateFilter && String(event.state || '').toLowerCase() !== stateFilter) return false;
+    if (sevFilter && String(event.severity || '').toLowerCase() !== sevFilter) return false;
+    if (search) {
+      const haystack = [event.summary, event.source_tag, event.tag_name]
+        .filter(Boolean).join(' ').toLowerCase();
+      if (!haystack.includes(search)) return false;
+    }
+    return true;
+  });
+}
+
+function updateSubsystemHealthFromStatus(payload) {
+  const diagnostics = payload.diagnostics || {};
+  const phase = diagnostics.phase || '';
+  const subId = payload.subsystemId || diagnostics.subsystemId;
+
+  if (diagnostics.subsystemTagMap && typeof diagnostics.subsystemTagMap === 'object') {
+    for (const [sid, info] of Object.entries(diagnostics.subsystemTagMap)) {
+      if (!agentsState.subsystemHealth[sid]) {
+        agentsState.subsystemHealth[sid] = {
+          subsystemId: sid,
           subsystemType: info.type || 'global',
-          subsystemName: info.name || subId,
+          subsystemName: info.name || sid,
           evaluated: (info.tags || []).length,
-          candidate: 0,
-          nearShift: 0,
-          maxAbsZ: 0,
-          avgAbsZ: 0,
+          candidate: 0, nearShift: 0, maxAbsZ: 0, avgAbsZ: 0,
           healthLevel: 'healthy',
           tagSignals: (info.tags || []).map((t) => ({
-            path: t.path,
-            name: t.name || t.path,
-            z: 0,
-            mad: 0,
-            value: null,
+            path: t.path, name: t.name || t.path, z: 0, mad: 0, value: null,
           })),
         };
+        agentsState.agentStates[sid] = {
+          state: 'running', cycleCount: 0, avgCycleMs: 0, totalCandidates: 0, totalTriaged: 0,
+        };
       }
     }
   }
 
-  const signals = diagnostics?.subsystemShiftSignals;
-  if (Array.isArray(signals) && signals.length) {
-    for (const sig of signals) {
-      const subId = sig.subsystemId || sig.subsystemName || 'global:all';
-      const healthLevel = computeHealthLevel(sig);
-      agentsState.subsystemHealth[subId] = { ...sig, healthLevel };
-
-      if (!agentsState.subsystemHistory[subId]) {
-        agentsState.subsystemHistory[subId] = [];
-      }
-      const history = agentsState.subsystemHistory[subId];
-      history.push({
-        healthLevel,
-        avgAbsZ: parseFloat(sig.avgAbsZ || 0),
-        candidateRatio: parseFloat(sig.candidateRatio || 0),
-        candidates: parseInt(sig.candidate || 0, 10),
-        evaluated: parseInt(sig.evaluated || 0, 10),
-        ts: Date.now(),
-      });
-      if (history.length > HEALTH_TREND_MAX_CYCLES) {
-        history.splice(0, history.length - HEALTH_TREND_MAX_CYCLES);
+  if (subId && phase === 'cycle_complete') {
+    const signals = diagnostics.subsystemShiftSignals;
+    if (Array.isArray(signals) && signals.length) {
+      for (const sig of signals) {
+        const sid = sig.subsystemId || subId;
+        const healthLevel = computeHealthLevel(sig);
+        agentsState.subsystemHealth[sid] = { ...sig, healthLevel };
+        if (!agentsState.subsystemHistory[sid]) agentsState.subsystemHistory[sid] = [];
+        const history = agentsState.subsystemHistory[sid];
+        history.push({
+          healthLevel,
+          avgAbsZ: parseFloat(sig.avgAbsZ || 0),
+          candidateRatio: parseFloat(sig.candidateRatio || 0),
+          candidates: parseInt(sig.candidate || 0, 10),
+          evaluated: parseInt(sig.evaluated || 0, 10),
+          ts: Date.now(),
+        });
+        if (history.length > HEALTH_TREND_MAX_CYCLES) history.splice(0, history.length - HEALTH_TREND_MAX_CYCLES);
       }
     }
+    agentsState.agentStates[subId] = {
+      state: payload.state || 'running',
+      cycleCount: diagnostics.cycleCount || 0,
+      avgCycleMs: diagnostics.avgCycleMs || 0,
+      totalCandidates: diagnostics.totalCandidates || 0,
+      totalTriaged: diagnostics.totalTriaged || 0,
+    };
+
+    // Replace events for this subsystem with current live events
+    const liveEvents = payload.liveEvents || [];
+    agentsState.events = agentsState.events.filter((e) => e.subsystem_id !== subId);
+    for (const evt of liveEvents) {
+      agentsState.events.unshift(evt);
+    }
+  }
+
+  if (subId && phase === 'cycle_progress') {
+    if (!agentsState.agentStates[subId]) {
+      agentsState.agentStates[subId] = { state: 'running', cycleCount: 0, avgCycleMs: 0, totalCandidates: 0, totalTriaged: 0 };
+    }
+    const step = diagnostics.step || '';
+    const stepLabels = {
+      reading_tags: 'Reading tags',
+      fetching_history: 'Fetching history',
+      scoring: 'Scoring',
+      triaging: 'Triaging',
+      waiting: 'Idle',
+    };
+    agentsState.agentStates[subId].currentStep = step;
+    agentsState.agentStates[subId].stepLabel = stepLabels[step] || step;
+    agentsState.agentStates[subId].stepDetail = diagnostics.detail || '';
+    agentsState.agentStates[subId].lastStepAt = Date.now();
+    updateAgentCardPhase(subId);
+    return;
+  }
+
+  if (subId && (phase === 'agent_paused' || phase === 'agent_stopped')) {
+    if (agentsState.agentStates[subId]) {
+      agentsState.agentStates[subId].state = 'paused';
+      agentsState.agentStates[subId].currentStep = 'paused';
+      agentsState.agentStates[subId].stepLabel = 'Paused';
+    }
+  }
+  if (subId && (phase === 'agent_resumed' || phase === 'agent_started')) {
+    if (agentsState.agentStates[subId]) {
+      agentsState.agentStates[subId].state = 'running';
+      agentsState.agentStates[subId].currentStep = '';
+      agentsState.agentStates[subId].stepLabel = '';
+    }
   }
 
   renderSubsystemHealthGrid();
 }
 
+function updateAgentCardPhase(subId) {
+  const card = document.querySelector(`.agents-health-card[data-subsystem-id="${CSS.escape(subId)}"]`);
+  if (!card) return;
+  const phaseEl = card.querySelector('.health-agent-phase');
+  if (!phaseEl) return;
+  const agState = agentsState.agentStates[subId] || {};
+  const step = agState.currentStep || '';
+  const isActive = step && step !== 'waiting' && step !== 'paused';
+  phaseEl.textContent = agState.stepLabel || '';
+  phaseEl.className = 'health-agent-phase' + (isActive ? ' phase-active' : '');
+  if (isActive) {
+    card.classList.add('agent-cycling');
+  } else {
+    card.classList.remove('agent-cycling');
+  }
+}
+
 function renderSubsystemHealthGrid() {
   const container = document.getElementById('agents-health-grid');
   if (!container) return;
 
   const entries = Object.entries(agentsState.subsystemHealth);
   if (!entries.length) {
-    container.innerHTML = '<div class="agents-health-empty">Start monitoring to see subsystem health.</div>';
+    container.innerHTML = '<div class="agents-health-empty">Start monitoring to see subsystem agents.</div>';
     return;
   }
 
@@ -3713,11 +3788,34 @@ function renderSubsystemHealthGrid() {
       const anomalyClass = candidates > 0 ? (level === 'critical' ? ' has-critical' : ' has-anomalies') : '';
       const history = agentsState.subsystemHistory[subId] || [];
 
+      const agState = agentsState.agentStates[subId] || {};
+      const isPaused = agState.state === 'paused';
+      const agentIcon = isPaused ? '&#9654;' : '&#9646;&#9646;';
+      const agentTitle = isPaused ? 'Resume agent' : 'Pause agent';
+
+      const currentStep = agState.currentStep || '';
+      const isActive = currentStep && currentStep !== 'waiting' && currentStep !== 'paused';
+      const phaseLabel = agState.stepLabel || '';
+      const cyclingClass = isActive ? ' agent-cycling' : '';
+
+      const metricsRow = `
+        <div class="health-agent-metrics">
+          <span class="health-agent-phase${isActive ? ' phase-active' : ''}">${escapeHtml(phaseLabel)}</span>
+          <span title="Cycles">#${agState.cycleCount || 0}</span>
+          <span title="Avg cycle time">${agState.avgCycleMs || 0}ms</span>
+          <span title="Total candidates">cand: ${agState.totalCandidates || 0}</span>
+          <span title="Total triaged">tri: ${agState.totalTriaged || 0}</span>
+        </div>
+      `;
+
       let expandedBody = '';
       if (isExpanded) {
         const bigTrend = renderTrendBars(history, 48);
         const tagRows = renderTagSignalRows(sig.tagSignals || []);
         const tagCount = (sig.tagSignals || []).length;
+        const subEvents = getFilteredEventsForSubsystem(subId);
+        const eventRows = renderSubsystemEventRows(subEvents);
+        const eventCount = subEvents.length;
         expandedBody = `
           <div class="health-expanded-body">
             <div class="health-expanded-trend">${bigTrend}</div>
@@ -3729,6 +3827,11 @@ function renderSubsystemHealthGrid() {
               <span>Name</span><span>Trend</span><span>z-score</span><span>Avg</span><span>Current</span>
             </div>
             <div class="health-tag-list">${tagRows}</div>
+            <div class="health-tag-list-header" style="margin-top:var(--space-3)">
+              <h4>Events</h4>
+              <span>${eventCount} events</span>
+            </div>
+            <div class="health-event-list">${eventRows}</div>
           </div>
         `;
       } else {
@@ -3736,14 +3839,18 @@ function renderSubsystemHealthGrid() {
       }
 
       return `
-        <div class="agents-health-card health-${escapeHtml(level)}${expandedClass}" data-subsystem-id="${escapeHtml(subId)}">
+        <div class="agents-health-card health-${escapeHtml(level)}${expandedClass}${isPaused ? ' agent-paused' : ''}${cyclingClass}" data-subsystem-id="${escapeHtml(subId)}">
           <div class="health-card-top">
             <div class="health-card-identity">
               <span class="health-indicator health-${escapeHtml(level)}"></span>
               <span class="health-card-name" title="${escapeHtml(name)}">${escapeHtml(name)}</span>
             </div>
-            <span class="health-card-type">${escapeHtml(type)}</span>
+            <div class="health-card-controls">
+              <button class="btn-agent-toggle" data-subsystem-id="${escapeHtml(subId)}" title="${agentTitle}">${agentIcon}</button>
+              <span class="health-card-type">${escapeHtml(type)}</span>
+            </div>
           </div>
+          ${metricsRow}
           <div class="health-card-stats">
             <div class="health-stat">
               <span class="health-stat-label">Tags</span>
@@ -3765,13 +3872,124 @@ function renderSubsystemHealthGrid() {
     })
     .join('');
 
+  container.querySelectorAll('.btn-agent-toggle').forEach((btn) => {
+    btn.addEventListener('click', (e) => {
+      e.stopPropagation();
+      const subId = btn.getAttribute('data-subsystem-id');
+      if (!subId) return;
+      const agState = agentsState.agentStates[subId] || {};
+      if (agState.state === 'paused') window.api.agentsStartSubsystem(subId);
+      else window.api.agentsStopSubsystem(subId);
+    });
+  });
+
   container.querySelectorAll('.agents-health-card').forEach((card) => {
     card.addEventListener('click', (e) => {
-      if (e.target.closest('.health-tag-list')) return;
+      if (e.target.closest('.health-tag-list') || e.target.closest('.health-event-list') || e.target.closest('.btn-agent-toggle')) return;
       const subId = card.getAttribute('data-subsystem-id');
       selectSubsystem(subId);
     });
   });
+
+  container.querySelectorAll('.health-event-row').forEach((row) => {
+    row.addEventListener('click', (e) => {
+      e.stopPropagation();
+      const eventId = row.getAttribute('data-event-id');
+      if (eventId) selectAgentEvent(eventId);
+    });
+  });
+
+  container.querySelectorAll('.health-event-detail-actions .btn-deep-analyze').forEach((btn) => {
+    btn.addEventListener('click', (e) => {
+      e.stopPropagation();
+      const eventId = btn.getAttribute('data-event-id');
+      if (eventId) deepAnalyzeEvent(eventId, btn);
+    });
+  });
+
+  container.querySelectorAll('.health-event-detail-actions .btn-ack-event').forEach((btn) => {
+    btn.addEventListener('click', (e) => {
+      e.stopPropagation();
+      const eventId = btn.getAttribute('data-event-id');
+      if (eventId) acknowledgeEvent(eventId);
+    });
+  });
+
+  container.querySelectorAll('.health-event-detail-actions .btn-open-graph').forEach((btn) => {
+    btn.addEventListener('click', (e) => {
+      e.stopPropagation();
+      const eventId = btn.getAttribute('data-event-id');
+      if (!eventId) return;
+      const event = agentsState.events.find((ev) => ev.event_id === eventId);
+      if (!event) return;
+      const target = resolveAgentGraphTarget(event);
+      if (target) openGraphModal(target.name, target.type, event.summary || target.name);
+    });
+  });
+}
+
+function renderSubsystemEventRows(events) {
+  if (!events.length) return '<div class="health-tag-empty">No events for this subsystem.</div>';
+  return events.slice(0, 50).map((event) => {
+    const sev = String(event.severity || 'low').toLowerCase();
+    const isSelected = event.event_id === agentsState.selectedEventId;
+    const selectedClass = isSelected ? ' selected' : '';
+    const tagLabel = event.tag_name || event.source_tag || '';
+    const timeLabel = formatAgentTime(event.created_at);
+
+    let detailHtml = '';
+    if (isSelected) {
+      detailHtml = renderInlineEventDetail(event);
+    }
+
+    return `
+      <div class="health-event-row${selectedClass}" data-event-id="${escapeHtml(event.event_id || '')}">
+        <div class="health-event-row-header">
+          <span class="agents-severity sev-${escapeHtml(sev)}">${escapeHtml(sev)}</span>
+          <span class="health-event-summary">${escapeHtml(event.summary || 'Anomaly')}</span>
+          <span class="health-event-tag">${escapeHtml(tagLabel)}</span>
+          <span class="health-event-time">${escapeHtml(timeLabel)}</span>
+        </div>
+        ${detailHtml}
+      </div>
+    `;
+  }).join('');
+}
+
+function renderInlineEventDetail(event) {
+  let checks = [], causes = [], safety = [];
+  try { checks = JSON.parse(event.recommended_checks_json || '[]'); } catch (e) {}
+  try { causes = JSON.parse(event.probable_causes_json || '[]'); } catch (e) {}
+  try { safety = JSON.parse(event.safety_notes_json || '[]'); } catch (e) {}
+
+  const st = String(event.state || '').toLowerCase();
+  const ackLabel = st === 'acknowledged' ? 'Clear' : (st === 'cleared' ? 'Cleared' : 'Acknowledge');
+  const ackDisabled = st === 'cleared' ? ' disabled' : '';
+  const isPending = agentsState.pendingDeepAnalyze.has(event.event_id);
+  const analyzeLabel = isPending ? 'Analyzing…' : (event.deep_analyzed ? 'Re-Analyze' : 'Deep Analyze');
+  const analyzeDisabled = isPending ? ' disabled' : '';
+
+  return `
+    <div class="health-event-detail" onclick="event.stopPropagation()">
+      <div class="health-event-detail-grid">
+        <span class="detail-label">State</span><span>${escapeHtml(event.state || 'open')}</span>
+        <span class="detail-label">z-score</span><span>${escapeHtml(String(event.z_score ?? '0'))}</span>
+        <span class="detail-label">MAD</span><span>${escapeHtml(String(event.mad_score ?? '0'))}</span>
+        <span class="detail-label">Confidence</span><span>${escapeHtml(String(event.confidence ?? ''))}</span>
+        <span class="detail-label">Category</span><span>${escapeHtml(event.category || '')}</span>
+        <span class="detail-label">Source Tag</span><span>${escapeHtml(event.source_tag || '')}</span>
+      </div>
+      ${event.explanation ? `<div class="detail-section"><span class="detail-label">Explanation</span><div>${escapeHtml(event.explanation)}</div></div>` : ''}
+      ${causes.length ? `<div class="detail-section"><span class="detail-label">Probable Causes</span><ul class="agents-list">${causes.map((x) => `<li>${escapeHtml(String(x))}</li>`).join('')}</ul></div>` : ''}
+      ${checks.length ? `<div class="detail-section"><span class="detail-label">Checks</span><ul class="agents-list">${checks.map((x) => `<li>${escapeHtml(String(x))}</li>`).join('')}</ul></div>` : ''}
+      ${safety.length ? `<div class="detail-section"><span class="detail-label">Safety</span><ul class="agents-list">${safety.map((x) => `<li>${escapeHtml(String(x))}</li>`).join('')}</ul></div>` : ''}
+      <div class="health-event-detail-actions">
+        <button class="btn btn-sm btn-primary btn-deep-analyze" data-event-id="${escapeHtml(event.event_id)}"${analyzeDisabled}>${analyzeLabel}</button>
+        <button class="btn btn-sm btn-secondary btn-open-graph" data-event-id="${escapeHtml(event.event_id)}">Open in Graph</button>
+        <button class="btn btn-sm btn-ghost btn-ack-event" data-event-id="${escapeHtml(event.event_id)}"${ackDisabled}>${ackLabel}</button>
+      </div>
+    </div>
+  `;
 }
 
 function renderTrendBars(history, maxHeight) {
@@ -3820,7 +4038,6 @@ function renderTagSignalRows(tagSignals) {
   if (!tagSignals || !tagSignals.length) {
     return '<div class="health-tag-empty">No tag data available yet.</div>';
   }
-
   return tagSignals
     .map((tag) => {
       const absZ = Math.abs(tag.z || 0);
@@ -3848,13 +4065,23 @@ function selectSubsystem(subId) {
   const clearBtn = document.getElementById('btn-agents-clear-subsystem');
   if (agentsState.selectedSubsystemId === subId) {
     agentsState.selectedSubsystemId = null;
+    agentsState.selectedEventId = null;
     if (clearBtn) clearBtn.style.display = 'none';
   } else {
     agentsState.selectedSubsystemId = subId;
+    agentsState.selectedEventId = null;
     if (clearBtn) clearBtn.style.display = '';
   }
   renderSubsystemHealthGrid();
-  renderAgentEventList();
+}
+
+function selectAgentEvent(eventId) {
+  if (agentsState.selectedEventId === eventId) {
+    agentsState.selectedEventId = null;
+  } else {
+    agentsState.selectedEventId = eventId;
+  }
+  renderSubsystemHealthGrid();
 }
 
 function updateAgentStatusUi(status, text) {
@@ -3870,95 +4097,6 @@ function updateAgentStatusUi(status, text) {
   if (el.btnStop) el.btnStop.disabled = !(normalized === 'running' || normalized === 'starting' || normalized === 'stopping');
 }
 
-function updateAgentMetrics(metrics = {}, heartbeatTs = null) {
-  const el = getAgentsElements();
-  if (el.metricCycle) el.metricCycle.textContent = String(metrics.cycleMs ?? metrics.lastCycleMs ?? 0);
-  if (el.metricCandidates) el.metricCandidates.textContent = String(metrics.candidates ?? metrics.lastCandidates ?? 0);
-  if (el.metricTriaged) el.metricTriaged.textContent = String(metrics.triaged ?? metrics.lastTriaged ?? 0);
-  if (el.metricEmitted) el.metricEmitted.textContent = String(metrics.emitted ?? metrics.lastEmitted ?? 0);
-  if (el.metricHeartbeat) el.metricHeartbeat.textContent = formatAgentTime(heartbeatTs || metrics.timestamp);
-}
-
-function getFilteredAgentEvents() {
-  const el = getAgentsElements();
-  const state = (el.filterState?.value || '').toLowerCase();
-  const severity = (el.filterSeverity?.value || '').toLowerCase();
-  const search = (el.filterSearch?.value || '').trim().toLowerCase();
-  const subFilter = agentsState.selectedSubsystemId || '';
-  return agentsState.events.filter((event) => {
-    if (state && String(event.state || '').toLowerCase() !== state) return false;
-    if (severity && String(event.severity || '').toLowerCase() !== severity) return false;
-    if (subFilter) {
-      const eventSubId = event.subsystem_id
-        || `${(event.subsystem_type || 'global')}:${(event.subsystem_name || 'all').toLowerCase()}`;
-      if (eventSubId !== subFilter) return false;
-    }
-    if (search) {
-      const haystack = [
-        event.summary,
-        event.source_tag,
-        event.tag_name,
-        event.subsystem_name,
-        event.subsystem_type,
-        ...(event.equipment || []),
-        ...(event.tags || []),
-      ]
-        .filter(Boolean)
-        .join(' ')
-        .toLowerCase();
-      if (!haystack.includes(search)) return false;
-    }
-    return true;
-  });
-}
-
-function renderAgentEventList() {
-  const el = getAgentsElements();
-  if (!el.list) return;
-  const events = getFilteredAgentEvents();
-  if (!events.length) {
-    const subName = agentsState.selectedSubsystemId
-      ? (agentsState.subsystemHealth[agentsState.selectedSubsystemId]?.subsystemName || agentsState.selectedSubsystemId)
-      : '';
-    const msg = subName
-      ? `No anomaly events for "${subName}".`
-      : 'No anomaly events match the current filters.';
-    el.list.innerHTML = `<div class="agents-empty">${escapeHtml(msg)}</div>`;
-    return;
-  }
-  el.list.innerHTML = events
-    .map((event) => {
-      const active = event.event_id === agentsState.selectedEventId ? ' active' : '';
-      const sev = String(event.severity || 'low').toLowerCase();
-      const equipment = (event.equipment || []).slice(0, 2).join(', ');
-      const subsystemLabel = event.subsystem_name
-        ? `${event.subsystem_type || 'subsystem'}: ${event.subsystem_name}`
-        : '';
-      const baseMeta = [event.tag_name || event.source_tag || '', equipment, subsystemLabel]
-        .filter(Boolean)
-        .join(' • ');
-      return `
-        <div class="agents-event-card${active}" data-event-id="${escapeHtml(event.event_id || '')}">
-          <div class="agents-event-line-top">
-            <span class="agents-severity sev-${escapeHtml(sev)}">${escapeHtml(sev)}</span>
-            <span class="agents-event-time">${escapeHtml(formatAgentTime(event.created_at))}</span>
-          </div>
-          <div class="agents-event-summary">${escapeHtml(event.summary || 'Untitled anomaly')}</div>
-          <div class="agents-event-meta">${escapeHtml(baseMeta)}</div>
-        </div>
-      `;
-    })
-    .join('');
-
-  el.list.querySelectorAll('.agents-event-card').forEach((card) => {
-    card.addEventListener('click', () => {
-      const eventId = card.getAttribute('data-event-id');
-      if (!eventId) return;
-      selectAgentEvent(eventId);
-    });
-  });
-}
-
 function resolveAgentGraphTarget(event) {
   if (String(event.subsystem_type || '').toLowerCase() === 'view' && event.subsystem_name) {
     return { name: event.subsystem_name, type: 'View' };
@@ -3973,102 +4111,6 @@ function resolveAgentGraphTarget(event) {
   return null;
 }
 
-function renderAgentEventDetails(event) {
-  const el = getAgentsElements();
-  if (!el.detail) return;
-  if (!event) {
-    el.detail.innerHTML = '<p class="text-muted">Select an anomaly event from the feed.</p>';
-    if (el.btnDeepAnalyze) el.btnDeepAnalyze.disabled = true;
-    if (el.btnOpenGraph) el.btnOpenGraph.disabled = true;
-    if (el.btnAck) el.btnAck.disabled = true;
-    return;
-  }
-
-  let checks = [];
-  let causes = [];
-  let safety = [];
-  try { checks = JSON.parse(event.recommended_checks_json || '[]'); } catch (e) {}
-  try { causes = JSON.parse(event.probable_causes_json || '[]'); } catch (e) {}
-  try { safety = JSON.parse(event.safety_notes_json || '[]'); } catch (e) {}
-
-  el.detail.innerHTML = `
-    <div class="agents-detail-grid">
-      <div class="agents-detail-item"><span class="agents-detail-label">Event ID</span><span class="agents-detail-value">${escapeHtml(event.event_id || '')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">State</span><span class="agents-detail-value">${escapeHtml(event.state || '')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Severity</span><span class="agents-detail-value">${escapeHtml(event.severity || '')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Confidence</span><span class="agents-detail-value">${escapeHtml(String(event.confidence ?? ''))}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Category</span><span class="agents-detail-value">${escapeHtml(event.category || '')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Timestamp</span><span class="agents-detail-value">${escapeHtml(formatAgentTime(event.created_at))}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Subsystem Type</span><span class="agents-detail-value">${escapeHtml(event.subsystem_type || 'global')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Subsystem</span><span class="agents-detail-value">${escapeHtml(event.subsystem_name || 'all')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Source Tag</span><span class="agents-detail-value">${escapeHtml(event.source_tag || '')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">Tag Name</span><span class="agents-detail-value">${escapeHtml(event.tag_name || '')}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">z-score</span><span class="agents-detail-value">${escapeHtml(String(event.z_score ?? '0'))}</span></div>
-      <div class="agents-detail-item"><span class="agents-detail-label">MAD score</span><span class="agents-detail-value">${escapeHtml(String(event.mad_score ?? '0'))}</span></div>
-    </div>
-    <div>
-      <div class="agents-detail-label">Summary</div>
-      <div>${escapeHtml(event.summary || '')}</div>
-    </div>
-    <div>
-      <div class="agents-detail-label">Explanation</div>
-      <div>${escapeHtml(event.explanation || '')}</div>
-    </div>
-    <div>
-      <div class="agents-detail-label">Probable Causes</div>
-      <ul class="agents-list">${(causes || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
-    </div>
-    <div>
-      <div class="agents-detail-label">Verification Checks</div>
-      <ul class="agents-list">${(checks || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
-    </div>
-    <div>
-      <div class="agents-detail-label">Safety Notes</div>
-      <ul class="agents-list">${(safety || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
-    </div>
-  `;
-
-  if (el.btnDeepAnalyze) {
-    el.btnDeepAnalyze.disabled = false;
-    el.btnDeepAnalyze.textContent = event.llm_triaged ? 'Re-Analyze' : 'Deep Analyze';
-  }
-  if (el.btnOpenGraph) el.btnOpenGraph.disabled = !resolveAgentGraphTarget(event);
-  if (el.btnAck) {
-    const state = String(event.state || '').toLowerCase();
-    if (state === 'acknowledged') {
-      el.btnAck.textContent = 'Clear';
-      el.btnAck.disabled = false;
-    } else if (state === 'cleared') {
-      el.btnAck.textContent = 'Cleared';
-      el.btnAck.disabled = true;
-    } else {
-      el.btnAck.textContent = 'Acknowledge';
-      el.btnAck.disabled = false;
-    }
-  }
-}
-
-async function selectAgentEvent(eventId) {
-  agentsState.selectedEventId = eventId;
-  const existing = agentsState.events.find((e) => e.event_id === eventId);
-  if (existing && existing.explanation && existing.recommended_checks_json) {
-    renderAgentEventList();
-    renderAgentEventDetails(existing);
-    return;
-  }
-  const detailResult = await window.api.agentsGetEvent(eventId);
-  if (detailResult.success && detailResult.event) {
-    const idx = agentsState.events.findIndex((e) => e.event_id === eventId);
-    if (idx >= 0) {
-      agentsState.events[idx] = { ...agentsState.events[idx], ...detailResult.event };
-    } else {
-      agentsState.events.unshift(detailResult.event);
-    }
-    renderAgentEventList();
-    renderAgentEventDetails(detailResult.event);
-  }
-}
-
 async function loadAgentEvents() {
   const el = getAgentsElements();
   const result = await window.api.agentsListEvents({
@@ -4079,12 +4121,7 @@ async function loadAgentEvents() {
   });
   if (!result.success) return;
   agentsState.events = Array.isArray(result.events) ? result.events : [];
-  renderAgentEventList();
-
-  if (agentsState.selectedEventId) {
-    const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
-    renderAgentEventDetails(selected || null);
-  }
+  renderSubsystemHealthGrid();
 }
 
 async function refreshAgentStatus() {
@@ -4097,7 +4134,6 @@ async function refreshAgentStatus() {
     agentsState.runId = status.runId || agentsState.runId;
     agentsState.status = status.status || 'running';
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId}`);
-    updateAgentMetrics(status.metrics || {}, status.lastHeartbeatAt);
   } else {
     agentsState.status = 'idle';
     updateAgentStatusUi('idle', 'No active run');
@@ -4108,7 +4144,10 @@ async function startAgentsMonitoring() {
   const config = getAgentsConfigFromUI();
   agentsState.subsystemHealth = {};
   agentsState.subsystemHistory = {};
+  agentsState.agentStates = {};
   agentsState.selectedSubsystemId = null;
+  agentsState.selectedEventId = null;
+  agentsState.events = [];
   renderSubsystemHealthGrid();
   const clearSubBtn = document.getElementById('btn-agents-clear-subsystem');
   if (clearSubBtn) clearSubBtn.style.display = 'none';
@@ -4135,70 +4174,59 @@ async function stopAgentsMonitoring() {
   updateAgentStatusUi('stopped', 'Monitoring stopped');
 }
 
-async function deepAnalyzeSelectedEvent() {
-  if (!agentsState.selectedEventId) return;
-  const el = getAgentsElements();
-  if (el.btnDeepAnalyze) {
-    el.btnDeepAnalyze.disabled = true;
-    el.btnDeepAnalyze.textContent = 'Analyzing…';
+async function deepAnalyzeEvent(eventId, btnEl) {
+  const event = agentsState.events.find((e) => e.event_id === eventId);
+  if (!event) {
+    console.error('[Agents] deep-analyze: event not found in local state', eventId);
+    if (btnEl) { btnEl.textContent = 'Not Found'; btnEl.disabled = false; }
+    return;
   }
+  agentsState.pendingDeepAnalyze.add(eventId);
+  if (btnEl) { btnEl.disabled = true; btnEl.textContent = 'Analyzing…'; }
   try {
-    const result = await window.api.agentsDeepAnalyze(agentsState.selectedEventId);
-    if (result.success && result.event) {
-      const idx = agentsState.events.findIndex((e) => e.event_id === agentsState.selectedEventId);
-      if (idx >= 0) agentsState.events[idx] = { ...agentsState.events[idx], ...result.event };
-      renderAgentEventList();
-      renderAgentEventDetails(result.event);
-    } else {
+    const result = await window.api.agentsDeepAnalyze(eventId, event);
+    if (!result.success) {
       console.error('[Agents] deep-analyze failed:', result.error);
-      if (el.btnDeepAnalyze) {
-        el.btnDeepAnalyze.textContent = 'Failed — Retry';
-        el.btnDeepAnalyze.disabled = false;
-      }
+      agentsState.pendingDeepAnalyze.delete(eventId);
+      if (btnEl) { btnEl.textContent = 'Failed — Retry'; btnEl.disabled = false; }
     }
+    // Button stays disabled — result arrives async via AGENT_EVENT with deepAnalyze=true
   } catch (err) {
     console.error('[Agents] deep-analyze error:', err);
-    if (el.btnDeepAnalyze) {
-      el.btnDeepAnalyze.textContent = 'Failed — Retry';
-      el.btnDeepAnalyze.disabled = false;
-    }
+    agentsState.pendingDeepAnalyze.delete(eventId);
+    if (btnEl) { btnEl.textContent = 'Failed — Retry'; btnEl.disabled = false; }
   }
 }
 
-async function acknowledgeSelectedAgentEvent() {
-  if (!agentsState.selectedEventId) return;
-  const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
-  const state = String(selected?.state || '').toLowerCase();
-  const result = state === 'acknowledged'
-    ? await window.api.agentsClearEvent(agentsState.selectedEventId, '')
-    : await window.api.agentsAckEvent(agentsState.selectedEventId, '');
+async function acknowledgeEvent(eventId) {
+  const event = agentsState.events.find((e) => e.event_id === eventId);
+  const st = String(event?.state || '').toLowerCase();
+  const result = st === 'acknowledged'
+    ? await window.api.agentsClearEvent(eventId, '')
+    : await window.api.agentsAckEvent(eventId, '');
   if (!result.success) return;
-  await loadAgentEvents();
-  const refreshed = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
-  renderAgentEventDetails(refreshed || null);
+  const idx = agentsState.events.findIndex((e) => e.event_id === eventId);
+  if (idx >= 0) {
+    agentsState.events[idx].state = st === 'acknowledged' ? 'cleared' : 'acknowledged';
+  }
+  renderSubsystemHealthGrid();
 }
 
 function upsertRealtimeAgentEvent(payload) {
-  if (!payload || !payload.eventId) return;
-  const idx = agentsState.events.findIndex((e) => e.event_id === payload.eventId);
-  const next = {
-    event_id: payload.eventId,
-    severity: payload.severity || 'medium',
-    summary: payload.summary || 'Anomaly detected',
-    category: payload.category || 'deviation',
-    created_at: payload.createdAt || new Date().toISOString(),
-    source_tag: payload.entityRefs?.sourceTag || payload.entityRefs?.tag || '',
-    tag_name: payload.entityRefs?.tag || '',
-    subsystem_type: payload.entityRefs?.subsystemType || '',
-    subsystem_name: payload.entityRefs?.subsystemName || '',
-    state: 'open',
-  };
-  if (idx >= 0) {
-    agentsState.events[idx] = { ...agentsState.events[idx], ...next };
-  } else {
-    agentsState.events.unshift(next);
+  const evt = payload?.event;
+  if (!evt || !evt.event_id) return;
+  if (payload.deepAnalyze) {
+    agentsState.pendingDeepAnalyze.delete(evt.event_id);
+    if (evt.deep_analyze_error) {
+      console.error('[Agents] Deep analyze failed:', evt.deep_analyze_error);
+    } else {
+      console.log('[Agents] Deep analyze complete for', evt.event_id);
+    }
   }
-  renderAgentEventList();
+  const idx = agentsState.events.findIndex((e) => e.event_id === evt.event_id);
+  if (idx >= 0) agentsState.events[idx] = { ...agentsState.events[idx], ...evt };
+  else agentsState.events.unshift(evt);
+  renderSubsystemHealthGrid();
 }
 
 function ensureAgentListeners() {
@@ -4210,29 +4238,28 @@ function ensureAgentListeners() {
     if (payload.runId) agentsState.runId = payload.runId;
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
-    updateAgentMetrics(payload, payload.timestamp);
+
     const diagnostics = payload.diagnostics || {};
     const phase = diagnostics.phase || '?';
-    console.log(`[Agents] phase=${phase} tags=${diagnostics.monitoredTags ?? '?'}`);
-
-    if (phase === 'cycle_complete') {
-      const signals = diagnostics.subsystemShiftSignals;
-      const subCount = Array.isArray(signals) ? signals.length : 0;
-      const evaluated = (diagnostics.evaluatedLinked || 0) + (diagnostics.evaluatedUnlinked || 0);
-      console.log(`[Agents] cycle_complete: ${subCount} subsystems, ${evaluated} evaluated, ${diagnostics.candidateLinked || 0} candidates`);
+    const subId = payload.subsystemId || diagnostics.subsystemId || '';
+    if (phase === 'triage_slow_candidate') {
+      console.warn(`[Agent ${subId}] SLOW TRIAGE: ${diagnostics.tag} use_llm=${diagnostics.use_llm} llm=${diagnostics.llm_ms}ms persist=${diagnostics.persist_ms}ms total=${diagnostics.total_ms}ms`);
+    } else if (phase === 'cycle_complete' && subId) {
+      const t = diagnostics.timingMs || {};
+      console.log(`[Agent ${subId}] cycle #${diagnostics.cycleCount || '?'} ${payload.cycleMs || diagnostics.avgCycleMs || 0}ms (read=${t.read || '?'}ms hist=${t.history || '?'}ms score=${t.score || '?'}ms triage=${t.triage || '?'}ms) ${payload.candidates || 0} cand`);
+    } else if (phase === 'agents_started' || phase === 'rediscovery_complete') {
+      console.log(`[Agents] ${phase}: ${diagnostics.agentCount || 0} agents`);
     }
 
-    updateSubsystemHealthFromDiagnostics(diagnostics);
+    updateSubsystemHealthFromStatus(payload);
   });
 
-  window.api.onAgentEvent((payload) => {
-    upsertRealtimeAgentEvent(payload);
-  });
+  window.api.onAgentEvent((payload) => upsertRealtimeAgentEvent(payload));
 
   window.api.onAgentError((payload) => {
     if (!payload) return;
-    console.error('[Agents error]', payload);
-    updateAgentStatusUi('error', payload.message || 'Agent runtime error');
+    console.error('[Agents error]', payload.code, payload.message);
+    if (!payload.recoverable) updateAgentStatusUi('error', payload.message || 'Agent runtime error');
   });
 
   window.api.onAgentComplete((payload) => {
@@ -4240,7 +4267,6 @@ function ensureAgentListeners() {
     console.log('[Agents] run complete, success=' + payload.success);
     agentsState.status = payload.success ? 'stopped' : 'failed';
     updateAgentStatusUi(agentsState.status, payload.reason || 'Run complete');
-    refreshAgentStatus();
   });
 }
 
@@ -4257,25 +4283,15 @@ function initAgentsTab() {
       await window.api.agentsCleanup(14);
       await loadAgentEvents();
     });
-    el.btnDeepAnalyze?.addEventListener('click', deepAnalyzeSelectedEvent);
-    el.btnAck?.addEventListener('click', acknowledgeSelectedAgentEvent);
-    el.btnOpenGraph?.addEventListener('click', () => {
-      const event = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
-      if (!event) return;
-      const target = resolveAgentGraphTarget(event);
-      if (!target) return;
-      openGraphModal(target.name, target.type, event.summary || target.name);
-    });
-    el.filterState?.addEventListener('change', loadAgentEvents);
-    el.filterSeverity?.addEventListener('change', loadAgentEvents);
-    el.filterSearch?.addEventListener('input', renderAgentEventList);
-
+    el.filterState?.addEventListener('change', () => renderSubsystemHealthGrid());
+    el.filterSeverity?.addEventListener('change', () => renderSubsystemHealthGrid());
+    el.filterSearch?.addEventListener('input', () => renderSubsystemHealthGrid());
     const clearSubBtn = document.getElementById('btn-agents-clear-subsystem');
     clearSubBtn?.addEventListener('click', () => {
       agentsState.selectedSubsystemId = null;
+      agentsState.selectedEventId = null;
       clearSubBtn.style.display = 'none';
       renderSubsystemHealthGrid();
-      renderAgentEventList();
     });
   }
   refreshAgentStatus();
diff --git a/electron-ui/styles.css b/electron-ui/styles.css
index c967b08..35e7ffc 100644
--- a/electron-ui/styles.css
+++ b/electron-ui/styles.css
@@ -3227,6 +3227,39 @@ select.input,
   text-overflow: ellipsis;
 }
 
+.health-card-controls {
+  display: flex;
+  align-items: center;
+  gap: var(--space-1);
+  flex-shrink: 0;
+}
+
+.btn-agent-toggle {
+  background: transparent;
+  border: 1px solid var(--color-border);
+  border-radius: var(--radius-sm);
+  color: var(--color-text-muted);
+  cursor: pointer;
+  font-size: 10px;
+  padding: 2px 6px;
+  line-height: 1;
+  transition: color var(--transition-fast), border-color var(--transition-fast);
+}
+
+.btn-agent-toggle:hover {
+  color: var(--color-text);
+  border-color: var(--color-border-active);
+}
+
+.agents-health-card.agent-paused {
+  opacity: 0.6;
+}
+
+.agents-health-card.agent-paused .health-indicator {
+  opacity: 0.4;
+  animation: none;
+}
+
 .health-card-type {
   font-size: 10px;
   text-transform: uppercase;
@@ -3240,6 +3273,49 @@ select.input,
   white-space: nowrap;
 }
 
+.health-agent-metrics {
+  display: flex;
+  gap: var(--space-2);
+  margin-bottom: var(--space-2);
+  font-size: 10px;
+  font-family: var(--font-mono);
+  color: var(--color-text-muted);
+  padding: 2px var(--space-1);
+  border-bottom: 1px solid var(--color-border-subtle);
+  align-items: center;
+}
+
+.health-agent-metrics span {
+  white-space: nowrap;
+}
+
+.health-agent-phase {
+  font-family: var(--font-sans);
+  font-weight: 500;
+  color: var(--color-text-muted);
+  min-width: 0;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-agent-phase.phase-active {
+  color: var(--color-accent);
+}
+
+.agents-health-card.agent-cycling {
+  border-color: rgba(34, 211, 238, 0.3);
+  box-shadow: 0 0 0 1px rgba(34, 211, 238, 0.08) inset;
+}
+
+@keyframes agent-cycle-pulse {
+  0%, 100% { opacity: 1; }
+  50% { opacity: 0.5; }
+}
+
+.agents-health-card.agent-cycling .health-indicator {
+  animation: agent-cycle-pulse 1s ease-in-out infinite;
+}
+
 .health-card-stats {
   display: grid;
   grid-template-columns: 1fr 1fr 1fr;
@@ -3464,38 +3540,7 @@ select.input,
   text-align: center;
 }
 
-.agents-main {
-  display: grid;
-  grid-template-columns: minmax(300px, 38%) minmax(0, 1fr);
-  gap: var(--space-3);
-  min-height: 480px;
-}
-
-.agents-feed-panel,
-.agents-detail-panel {
-  border: 1px solid var(--color-border);
-  background: var(--color-bg-panel);
-  border-radius: var(--radius-lg);
-  overflow: hidden;
-  display: flex;
-  flex-direction: column;
-}
-
-.agents-feed-header,
-.agents-detail-header {
-  padding: var(--space-3);
-  border-bottom: 1px solid var(--color-border-subtle);
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  gap: var(--space-2);
-}
-
-.agents-feed-header h3,
-.agents-detail-header h3 {
-  font-size: var(--text-md);
-  font-weight: 600;
-}
+/* ---- Inline event list/detail within subsystem cards ---- */
 
 .agents-feed-filters {
   display: flex;
@@ -3504,50 +3549,7 @@ select.input,
 }
 
 .agents-feed-filters .input {
-  min-width: 120px;
-}
-
-.agents-event-list {
-  overflow-y: auto;
-  padding: var(--space-2);
-  display: flex;
-  flex-direction: column;
-  gap: var(--space-2);
-  flex: 1;
-}
-
-.agents-empty {
-  color: var(--color-text-muted);
-  font-size: var(--text-sm);
-  padding: var(--space-4);
-  text-align: center;
-}
-
-.agents-event-card {
-  border: 1px solid var(--color-border);
-  background: var(--color-bg-panel-2);
-  border-radius: var(--radius-md);
-  padding: var(--space-2) var(--space-3);
-  cursor: pointer;
-  transition: border-color var(--transition-fast), transform var(--transition-fast);
-}
-
-.agents-event-card:hover {
-  border-color: var(--color-border-active);
-  transform: translateY(-1px);
-}
-
-.agents-event-card.active {
-  border-color: var(--color-accent);
-  box-shadow: 0 0 0 1px rgba(34, 211, 238, 0.35) inset;
-}
-
-.agents-event-line-top {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  margin-bottom: 4px;
-  gap: var(--space-2);
+  min-width: 100px;
 }
 
 .agents-severity {
@@ -3557,6 +3559,7 @@ select.input,
   padding: 2px 6px;
   border-radius: 999px;
   border: 1px solid transparent;
+  flex-shrink: 0;
 }
 
 .agents-severity.sev-critical {
@@ -3583,57 +3586,100 @@ select.input,
   border-color: rgba(59, 130, 246, 0.35);
 }
 
-.agents-event-time {
+.health-event-list {
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-1);
+  max-height: 400px;
+  overflow-y: auto;
+}
+
+.health-event-row {
+  border: 1px solid var(--color-border-subtle);
+  border-radius: var(--radius-sm);
+  background: var(--color-bg-panel);
+  cursor: pointer;
+  transition: border-color var(--transition-fast);
+}
+
+.health-event-row:hover {
+  border-color: var(--color-border-active);
+}
+
+.health-event-row.selected {
+  border-color: var(--color-accent);
+  background: var(--color-bg-elevated);
+}
+
+.health-event-row-header {
+  display: grid;
+  grid-template-columns: auto 1fr auto auto;
+  gap: var(--space-2);
+  align-items: center;
+  padding: 5px var(--space-2);
   font-size: var(--text-xs);
-  color: var(--color-text-muted);
-  font-family: var(--font-mono);
 }
 
-.agents-event-summary {
-  font-size: var(--text-sm);
+.health-event-summary {
   color: var(--color-text);
-  margin-bottom: 4px;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
 }
 
-.agents-event-meta {
-  font-size: var(--text-xs);
+.health-event-tag {
+  font-family: var(--font-mono);
   color: var(--color-text-muted);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  max-width: 140px;
 }
 
-.agents-detail-content {
-  padding: var(--space-3);
-  overflow-y: auto;
-  font-size: var(--text-sm);
+.health-event-time {
+  font-family: var(--font-mono);
+  color: var(--color-text-muted);
+  white-space: nowrap;
+}
+
+.health-event-detail {
+  padding: var(--space-2) var(--space-3);
+  border-top: 1px solid var(--color-border-subtle);
+  font-size: var(--text-xs);
   display: flex;
   flex-direction: column;
-  gap: var(--space-3);
+  gap: var(--space-2);
 }
 
-.agents-detail-grid {
+.health-event-detail-grid {
   display: grid;
-  grid-template-columns: 1fr 1fr;
-  gap: var(--space-2) var(--space-3);
-}
-
-.agents-detail-item {
-  display: flex;
-  flex-direction: column;
-  gap: 2px;
+  grid-template-columns: auto 1fr auto 1fr auto 1fr;
+  gap: 3px var(--space-2);
+  align-items: baseline;
 }
 
-.agents-detail-label {
-  font-size: var(--text-xs);
+.health-event-detail .detail-label {
+  font-size: 10px;
   color: var(--color-text-muted);
   text-transform: uppercase;
   letter-spacing: 0.3px;
 }
 
-.agents-detail-value {
-  font-family: var(--font-mono);
-  color: var(--color-text);
+.health-event-detail .detail-section {
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+}
+
+.health-event-detail-actions {
+  display: flex;
+  gap: var(--space-2);
+  padding-top: var(--space-1);
 }
 
 .agents-list {
-  margin-left: var(--space-4);
+  margin: 0;
+  padding-left: var(--space-4);
   color: var(--color-text-secondary);
+  font-size: var(--text-xs);
 }
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 0d0d91a..db9612c 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -1,15 +1,29 @@
 #!/usr/bin/env python3
 """
-Long-running anomaly monitor worker.
-
-Modes:
-  - run: start continuous monitoring loop
-  - status: get run status
-  - list-events: list persisted anomaly events
-  - get-event: fetch one anomaly event
-  - ack-event: mark event as acknowledged
-  - cleanup: delete old events by retention policy
-  - replay-fixtures: run deterministic fixture validation
+Per-subsystem anomaly monitoring with coordinator + worker threads.
+
+Architecture:
+  AgentCoordinator (main thread)
+    - Discovers subsystems from Neo4j ontology
+    - Spawns/manages SubsystemAgent threads
+    - Reads stdin for commands (start/stop individual agents)
+    - Shared: Neo4j driver, IgnitionApiClient, thread-safe emit()
+
+  SubsystemAgent (one thread per subsystem)
+    - Own cycle loop, history cache, prev_values, ClaudeClient
+    - Monitors only its assigned tags
+    - Emits per-subsystem status/events via thread-safe emit()
+
+CLI modes:
+  run           Start coordinator with per-subsystem agents
+  list-events   List persisted anomaly events
+  get-event     Fetch one anomaly event
+  ack-event     Acknowledge an event
+  clear-event   Clear an acknowledged event
+  deep-analyze  Run LLM triage on an existing event
+  cleanup       Delete old events
+  status        Get run status
+  replay-fixtures  Validate scoring against fixtures
 """
 
 from __future__ import annotations
@@ -17,17 +31,19 @@
 import argparse
 import json
 import os
+import queue
 import signal
 import sys
+import threading
 import time
 import uuid
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple
 
 try:
     from dotenv import load_dotenv
-except ImportError:  # pragma: no cover - optional fallback for minimal environments
+except ImportError:
     def load_dotenv(*_args, **_kwargs):
         return False
 
@@ -40,82 +56,44 @@ def load_dotenv(*_args, **_kwargs):
     safe_float,
 )
 
-
 load_dotenv()
 
+_api_semaphore = threading.Semaphore(2)  # max 2 concurrent Ignition API calls
+
+_emit_queue: queue.Queue = queue.Queue()
+
+
+def _emit_writer() -> None:
+    """Dedicated thread that drains the emit queue to stdout."""
+    while True:
+        line = _emit_queue.get()
+        if line is None:
+            break
+        try:
+            sys.stdout.write(line)
+            sys.stdout.flush()
+        except Exception:
+            pass
+
+
+_emit_thread = threading.Thread(target=_emit_writer, daemon=True, name="emit-writer")
+_emit_thread.start()
+
 
 def utc_now_iso() -> str:
     return datetime.now(timezone.utc).isoformat()
 
 
 def emit(prefix: str, payload: Dict[str, Any]) -> None:
-    """Emit machine-parseable messages for Electron main process."""
-    print(f"[{prefix}] {json.dumps(payload, default=str)}", flush=True)
+    _emit_queue.put(f"[{prefix}] {json.dumps(payload, default=str)}\n")
 
 
 DEFAULT_SUBSYSTEM_PRIORITY = ["view", "equipment", "group", "global"]
 
 
-def _preview_value(value: Any, max_len: int = 120) -> Any:
-    if value is None or isinstance(value, (bool, int, float)):
-        return value
-    text = str(value)
-    if len(text) <= max_len:
-        return text
-    return text[: max_len - 3] + "..."
-
-
-def make_default_diagnostics(
-    *,
-    staleness_threshold_sec: int = 120,
-    phase: str = "initializing",
-    reason: str = "",
-) -> Dict[str, Any]:
-    return {
-        "phase": phase,
-        "reason": reason,
-        "monitoredTags": 0,
-        "linkedTags": 0,
-        "unlinkedTags": 0,
-        "validLiveCount": 0,
-        "missingTimestampCount": 0,
-        "inferredTimestampCount": 0,
-        "liveErrorCount": 0,
-        "liveErrorLinked": 0,
-        "liveErrorUnlinked": 0,
-        "qualityFilteredCount": 0,
-        "qualityFilteredLinked": 0,
-        "qualityFilteredUnlinked": 0,
-        "staleFilteredCount": 0,
-        "staleFilteredLinked": 0,
-        "staleFilteredUnlinked": 0,
-        "historyErrorCount": 0,
-        "historyErrorLinked": 0,
-        "historyErrorUnlinked": 0,
-        "insufficientHistoryCount": 0,
-        "lowHistoryCandidateCount": 0,
-        "evaluatedLinked": 0,
-        "evaluatedUnlinked": 0,
-        "candidateLinked": 0,
-        "candidateUnlinked": 0,
-        "nearShiftCount": 0,
-        "nearShiftLinked": 0,
-        "nearShiftUnlinked": 0,
-        "stalenessThresholdSec": staleness_threshold_sec,
-        "staleSamples": [],
-        "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
-        "detectedSubsystemCount": 0,
-        "detectedSubsystems": [],
-        "candidateSubsystemCount": 0,
-        "candidateBySubsystem": {},
-        "subsystemShiftSignals": [],
-        "maxCandidatesPerSubsystem": 0,
-        "maxLlmTriagesPerSubsystem": 0,
-        "llmTriagedCount": 0,
-        "dedupSuppressedCount": 0,
-        "toolCalls": [],
-    }
-
+# ---------------------------------------------------------------------------
+#  Subsystem helpers
+# ---------------------------------------------------------------------------
 
 def _canonical_subsystem_type(kind: Any) -> str:
     value = str(kind or "").strip().lower()
@@ -149,7 +127,6 @@ def infer_tag_group(tag_path: Optional[str], folder_name: Optional[str] = None)
         head = folder.split("/", 1)[0].strip()
         if head:
             return head
-
     raw = str(tag_path or "").strip()
     if not raw:
         return None
@@ -159,30 +136,26 @@ def infer_tag_group(tag_path: Optional[str], folder_name: Optional[str] = None)
     if not raw:
         return None
     parts = [p.strip() for p in raw.split("/") if p.strip()]
-    # Ignore flat tags and only infer a group when there is at least one folder segment.
     if len(parts) < 2:
         return None
     return parts[0]
 
 
-def _last_segment_from_tag_path(tag_path: Optional[str]) -> str:
+def _last_segment(tag_path: Optional[str]) -> str:
     raw = str(tag_path or "").strip()
     if not raw:
         return ""
     if raw.startswith("[") and "]" in raw:
         raw = raw.split("]", 1)[1]
     raw = raw.strip("/")
-    if not raw:
-        return ""
     parts = [p.strip() for p in raw.split("/") if p.strip()]
     return parts[-1] if parts else raw
 
 
-def _looks_like_live_tag_path(value: Optional[str]) -> bool:
+def _looks_like_tag_path(value: Optional[str]) -> bool:
     path = str(value or "").strip()
     if not path:
         return False
-    # Typical Ignition path shape: [provider]Folder/Tag or Folder/Tag
     if path.startswith("[") and "]" in path:
         return True
     if "/" in path and not any(ch in path for ch in "{}()"):
@@ -197,35 +170,32 @@ def derive_subsystems_for_tag(
 ) -> Tuple[List[Dict[str, str]], Dict[str, str]]:
     mode = str(subsystem_mode or "auto").strip().lower()
     if mode in {"global", "off", "disabled"}:
-        global_ref = _subsystem_ref("global", "all")
-        return [global_ref], global_ref
+        ref = _subsystem_ref("global", "all")
+        return [ref], ref
 
     refs: List[Dict[str, str]] = []
     seen: Set[str] = set()
 
-    def add_ref(kind: str, name: Optional[str]) -> None:
+    def add(kind: str, name: Optional[str]) -> None:
         if not name:
             return
         ref = _subsystem_ref(kind, name)
-        if ref["id"] in seen:
-            return
-        seen.add(ref["id"])
-        refs.append(ref)
+        if ref["id"] not in seen:
+            seen.add(ref["id"])
+            refs.append(ref)
 
-    for view_name in tag_meta.get("views") or []:
-        add_ref("view", str(view_name))
-    for equipment_name in tag_meta.get("equipment") or []:
-        add_ref("equipment", str(equipment_name))
-    add_ref("group", infer_tag_group(tag_meta.get("path"), tag_meta.get("folder_name")))
+    for v in tag_meta.get("views") or []:
+        add("view", str(v))
+    for e in tag_meta.get("equipment") or []:
+        add("equipment", str(e))
+    add("group", infer_tag_group(tag_meta.get("path"), tag_meta.get("folder_name")))
 
     if not refs:
         refs = [_subsystem_ref("global", "all")]
 
-    ordered_priority = [
-        _canonical_subsystem_type(x) for x in (priority or DEFAULT_SUBSYSTEM_PRIORITY)
-    ]
+    ordered = [_canonical_subsystem_type(x) for x in (priority or DEFAULT_SUBSYSTEM_PRIORITY)]
     primary = refs[0]
-    for kind in ordered_priority:
+    for kind in ordered:
         found = next((s for s in refs if s.get("type") == kind), None)
         if found:
             primary = found
@@ -234,11 +204,22 @@ def add_ref(kind: str, name: Optional[str]) -> None:
     return refs, primary
 
 
+def _preview_value(value: Any, max_len: int = 120) -> Any:
+    if value is None or isinstance(value, (bool, int, float)):
+        return value
+    text = str(value)
+    return text if len(text) <= max_len else text[:max_len - 3] + "..."
+
+
+# ---------------------------------------------------------------------------
+#  Config
+# ---------------------------------------------------------------------------
+
 def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     raw = dict(config or {})
     thresholds = raw.get("thresholds", {}) if isinstance(raw.get("thresholds"), dict) else {}
     defaults = {
-        "pollIntervalMs": 1000,
+        "pollIntervalMs": 5000,
         "historyWindowMinutes": 360,
         "minHistoryPoints": 30,
         "maxMonitoredTags": 200,
@@ -249,17 +230,16 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         "dedupCooldownMinutes": 10,
         "retentionDays": 14,
         "cleanupEveryCycles": 40,
-        "historyCacheTtlSec": 30,
+        "historyCacheTtlSec": 60,
         "tagCacheTtlSec": 60,
-        "runMode": "live",
+        "rediscoveryIntervalSec": 60,
         "scope": {
-            "project": None,
-            "equipmentTags": [],
-            "tagRegex": None,
             "subsystemMode": "auto",
             "subsystemPriority": list(DEFAULT_SUBSYSTEM_PRIORITY),
             "subsystemInclude": [],
             "includeUnlinkedTags": False,
+            "tagRegex": None,
+            "equipmentTags": [],
         },
         "thresholds": {
             "z": 3.0,
@@ -277,320 +257,205 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     cfg["thresholds"].update({k: v for k, v in thresholds.items() if v is not None})
     if isinstance(raw.get("scope"), dict):
         cfg["scope"].update(raw["scope"])
-    scope_cfg = cfg["scope"]
-    mode = str(scope_cfg.get("subsystemMode") or "auto").strip().lower()
-    if mode not in {"auto", "global", "off", "disabled"}:
-        mode = "auto"
-    scope_cfg["subsystemMode"] = mode
-    if not isinstance(scope_cfg.get("subsystemPriority"), list) or not scope_cfg.get("subsystemPriority"):
-        scope_cfg["subsystemPriority"] = list(DEFAULT_SUBSYSTEM_PRIORITY)
-    scope_cfg["subsystemPriority"] = [
-        str(x).strip()
-        for x in scope_cfg.get("subsystemPriority", [])
-        if str(x).strip()
+    scope = cfg["scope"]
+    mode = str(scope.get("subsystemMode") or "auto").strip().lower()
+    scope["subsystemMode"] = mode if mode in {"auto", "global", "off", "disabled"} else "auto"
+    if not isinstance(scope.get("subsystemPriority"), list) or not scope["subsystemPriority"]:
+        scope["subsystemPriority"] = list(DEFAULT_SUBSYSTEM_PRIORITY)
+    scope["subsystemPriority"] = [
+        str(x).strip() for x in scope["subsystemPriority"] if str(x).strip()
     ] or list(DEFAULT_SUBSYSTEM_PRIORITY)
-    if not isinstance(scope_cfg.get("subsystemInclude"), list):
-        scope_cfg["subsystemInclude"] = []
-    scope_cfg["subsystemInclude"] = [
-        str(x).strip().lower()
-        for x in scope_cfg.get("subsystemInclude", [])
-        if str(x).strip()
-    ]
-    scope_cfg["includeUnlinkedTags"] = bool(scope_cfg.get("includeUnlinkedTags", False))
+    if not isinstance(scope.get("subsystemInclude"), list):
+        scope["subsystemInclude"] = []
+    scope["subsystemInclude"] = [str(x).strip().lower() for x in scope["subsystemInclude"] if str(x).strip()]
+    scope["includeUnlinkedTags"] = bool(scope.get("includeUnlinkedTags", False))
     return cfg
 
 
-class AnomalyMonitor:
-    def __init__(self, config: Dict[str, Any], run_id: Optional[str] = None):
-        self.config = merge_defaults(config)
-        self.run_id = run_id or f"agent-run-{uuid.uuid4()}"
-        from ignition_api_client import IgnitionApiClient
-        from neo4j_ontology import get_ontology_graph
-
-        self.graph = get_ontology_graph()
-
-        self.api = IgnitionApiClient(
-            base_url=self.config.get("ignitionApiUrl") or os.getenv("IGNITION_API_URL"),
-            api_token=self.config.get("ignitionApiToken") or os.getenv("IGNITION_API_TOKEN"),
-            timeout=15.0,
-        )
+# ═══════════════════════════════════════════════════════════════════════════
+#  SubsystemAgent — one per subsystem, runs in its own thread
+# ═══════════════════════════════════════════════════════════════════════════
 
-        self.llm = None
-        self._llm_enabled = bool(os.getenv("ANTHROPIC_API_KEY"))
-        if self._llm_enabled:
-            try:
-                from claude_client import ClaudeClient
+class SubsystemAgent(threading.Thread):
+    """Monitors a single subsystem's tags in its own thread."""
 
-                self.llm = ClaudeClient(
-                    enable_tools=False,
-                    ignition_api_url=self.config.get("ignitionApiUrl"),
-                    ignition_api_token=self.config.get("ignitionApiToken"),
-                )
-            except Exception as exc:
-                self._llm_enabled = False
-                emit("AGENT_ERROR", {
-                    "runId": self.run_id,
-                    "code": "llm_init_failed",
-                    "message": str(exc),
-                    "recoverable": True,
-                    "timestamp": utc_now_iso(),
-                })
+    def __init__(
+        self,
+        *,
+        subsystem_id: str,
+        subsystem_type: str,
+        subsystem_name: str,
+        tag_metas: List[Dict[str, Any]],
+        graph: Any,
+        api: Any,
+        config: Dict[str, Any],
+        run_id: str,
+        stagger_delay: float = 0.0,
+    ):
+        super().__init__(daemon=True, name=f"agent-{subsystem_id}")
+        self.subsystem_id = subsystem_id
+        self.subsystem_type = subsystem_type
+        self.subsystem_name = subsystem_name
+        self.tag_metas = list(tag_metas)
+        self.graph = graph
+        self.api = api
+        self.config = config
+        self.run_id = run_id
+        self._stagger_delay = stagger_delay
 
         self._running = True
+        self._paused = False
         self._cycle_count = 0
+        self._total_candidates = 0
+        self._total_triaged = 0
+        self._total_emitted = 0
+        self._cycle_times: List[int] = []
         self._prev_values: Dict[str, float] = {}
         self._history_cache: Dict[str, Dict[str, Any]] = {}
-        self._tag_cache: Optional[Dict[str, Any]] = None
-        self._tag_cache_at: float = 0.0
+        self._context_cache: Dict[str, Dict[str, Any]] = {}
+        self._context_cache_ts: Dict[str, float] = {}
 
-    # -----------------------------
-    # Schema / run lifecycle
-    # -----------------------------
-    def init_schema(self) -> None:
-        self.graph.init_agent_monitoring_schema()
-
-    def upsert_run(self, status: str, reason: Optional[str] = None) -> None:
-        with self.graph.session() as session:
-            session.run(
-                """
-                MERGE (r:AgentRun {run_id: $run_id})
-                SET r.status = $status,
-                    r.updated_at = datetime(),
-                    r.last_heartbeat_at = datetime(),
-                    r.config_json = $config_json,
-                    r.cycle_count = $cycle_count,
-                    r.started_at = coalesce(r.started_at, datetime()),
-                    r.stopped_at = CASE WHEN $status IN ['stopped', 'failed'] THEN datetime() ELSE r.stopped_at END,
-                    r.stop_reason = CASE WHEN $reason IS NULL THEN r.stop_reason ELSE $reason END
-                """,
-                run_id=self.run_id,
-                status=status,
-                config_json=json.dumps(self.config, default=str),
-                cycle_count=self._cycle_count,
-                reason=reason,
-            )
-
-    def heartbeat(self, metrics: Dict[str, Any]) -> None:
-        with self.graph.session() as session:
-            session.run(
-                """
-                MATCH (r:AgentRun {run_id: $run_id})
-                SET r.last_heartbeat_at = datetime(),
-                    r.cycle_count = $cycle_count,
-                    r.last_cycle_ms = $cycle_ms,
-                    r.last_candidates = $candidates,
-                    r.last_triaged = $triaged,
-                    r.last_emitted = $emitted
-                """,
-                run_id=self.run_id,
-                cycle_count=self._cycle_count,
-                cycle_ms=metrics.get("cycleMs", 0),
-                candidates=metrics.get("candidates", 0),
-                triaged=metrics.get("triaged", 0),
-                emitted=metrics.get("emitted", 0),
-            )
-
-    # -----------------------------
-    # Tag and context collection
-    # -----------------------------
-    def get_monitored_tags(self) -> List[Dict[str, Any]]:
-        ttl = float(self.config.get("tagCacheTtlSec", 60))
-        now = time.time()
-        if self._tag_cache is not None and ttl > 0 and (now - self._tag_cache_at) < ttl:
-            return self._tag_cache
-
-        result = self._fetch_monitored_tags()
-        self._tag_cache = result
-        self._tag_cache_at = time.time()
-        return result
-
-    def _fetch_monitored_tags(self) -> List[Dict[str, Any]]:
-        max_tags = int(self.config.get("maxMonitoredTags", 200))
-        scope = self.config.get("scope", {})
-        tag_regex = scope.get("tagRegex")
-        equipment_tags = {
-            str(x).strip().lower()
-            for x in (scope.get("equipmentTags") or [])
-            if str(x).strip()
-        }
-        subsystem_mode = str(scope.get("subsystemMode") or "auto").strip().lower()
-        subsystem_priority = scope.get("subsystemPriority") or list(DEFAULT_SUBSYSTEM_PRIORITY)
-        subsystem_include = {
-            str(x).strip().lower()
-            for x in (scope.get("subsystemInclude") or [])
-            if str(x).strip()
-        }
-        include_unlinked = bool(scope.get("includeUnlinkedTags", False))
-        tag_map: Dict[str, Dict[str, Any]] = {}
-
-        def upsert_tag(
-            *,
-            tag_path: str,
-            tag_name: str,
-            folder_name: str = "",
-            views: Optional[List[str]] = None,
-            equipment: Optional[List[str]] = None,
-            source: str = "unknown",
-        ) -> None:
-            path = str(tag_path or "").strip()
-            if not path:
-                return
-            entry = tag_map.setdefault(
-                path,
-                {
-                    "path": path,
-                    "name": str(tag_name or _last_segment_from_tag_path(path) or path),
-                    "folder_name": str(folder_name or ""),
-                    "views": [],
-                    "equipment": [],
-                    "source": source,
-                    "bound_to_view": False,
-                },
-            )
-            if source == "view_binding":
-                entry["bound_to_view"] = True
-                entry["source"] = source
-            if folder_name and not entry.get("folder_name"):
-                entry["folder_name"] = str(folder_name)
-            if tag_name and (
-                not entry.get("name")
-                or entry.get("name") == entry.get("path")
-                or entry.get("name") == _last_segment_from_tag_path(entry.get("path"))
-            ):
-                entry["name"] = str(tag_name)
-            for view_name in views or []:
-                v = str(view_name or "").strip()
-                if v and v not in entry["views"]:
-                    entry["views"].append(v)
-            for eq_name in equipment or []:
-                eq = str(eq_name or "").strip()
-                if eq and eq not in entry["equipment"]:
-                    entry["equipment"].append(eq)
-
-        with self.graph.session() as session:
-            bound_result = session.run(
-                """
-                MATCH (v:View)-[:HAS_COMPONENT]->(c:ViewComponent)-[r:BINDS_TO]->(n)
-                WHERE r.tag_path IS NOT NULL
-                  AND trim(r.tag_path) <> ''
-                  AND toLower(coalesce(r.binding_type, 'tag')) = 'tag'
-                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(n)
-                RETURN DISTINCT trim(r.tag_path) AS tag_path,
-                                coalesce(n.name, '') AS tag_name,
-                                collect(DISTINCT v.name) AS views,
-                                collect(DISTINCT eq.name) AS equipment
-                LIMIT $limit
-                """,
-                limit=max_tags * 4,
-            )
-            for r in bound_result:
-                path = str(r["tag_path"] or "").strip()
-                if not _looks_like_live_tag_path(path):
-                    continue
-                upsert_tag(
-                    tag_path=path,
-                    tag_name=str(r["tag_name"] or _last_segment_from_tag_path(path)),
-                    folder_name=infer_tag_group(path) or "",
-                    views=[x for x in (r["views"] or []) if x],
-                    equipment=[x for x in (r["equipment"] or []) if x],
-                    source="view_binding",
-                )
-
-            scada_result = session.run(
-                """
-                MATCH (t:ScadaTag)
-                WHERE t.opc_item_path IS NOT NULL
-                  AND trim(t.opc_item_path) <> ''
-                OPTIONAL MATCH (c:ViewComponent)-[:BINDS_TO]->(t)
-                OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(c)
-                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
-                RETURN DISTINCT trim(t.opc_item_path) AS tag_path,
-                                coalesce(t.name, t.opc_item_path) AS tag_name,
-                                coalesce(t.folder_name, '') AS folder_name,
-                                collect(DISTINCT v.name) AS views,
-                                collect(DISTINCT eq.name) AS equipment
-                LIMIT $limit
-                """,
-                limit=max_tags * 6,
-            )
-            for r in scada_result:
-                path = str(r["tag_path"] or "").strip()
-                if not _looks_like_live_tag_path(path):
-                    continue
-                upsert_tag(
-                    tag_path=path,
-                    tag_name=str(r["tag_name"] or _last_segment_from_tag_path(path)),
-                    folder_name=str(r["folder_name"] or ""),
-                    views=[x for x in (r["views"] or []) if x],
-                    equipment=[x for x in (r["equipment"] or []) if x],
-                    source="scada_tag",
+        self.llm = None
+        if bool(os.getenv("ANTHROPIC_API_KEY")):
+            try:
+                from claude_client import ClaudeClient
+                self.llm = ClaudeClient(
+                    enable_tools=False,
+                    ignition_api_url=config.get("ignitionApiUrl"),
+                    ignition_api_token=config.get("ignitionApiToken"),
                 )
+            except Exception:
+                pass
+
+    @property
+    def agent_state(self) -> str:
+        if not self._running:
+            return "stopped"
+        if self._paused:
+            return "paused"
+        return "running"
+
+    @property
+    def avg_cycle_ms(self) -> int:
+        if not self._cycle_times:
+            return 0
+        return int(sum(self._cycle_times) / len(self._cycle_times))
+
+    def update_tags(self, tag_metas: List[Dict[str, Any]]) -> None:
+        self.tag_metas = list(tag_metas)
+
+    def pause(self) -> None:
+        self._paused = True
+
+    def resume(self) -> None:
+        self._paused = False
+
+    def stop(self) -> None:
+        self._running = False
+
+    # -------------------------------------------------------------------
+    #  Thread entry point
+    # -------------------------------------------------------------------
+    def run(self) -> None:
+        poll_ms = int(self.config.get("pollIntervalMs", 1000))
+        self._emit_status("agent_started", "cycle_start")
 
-        tags = list(tag_map.values())
+        if self._stagger_delay > 0:
+            self._emit_progress("staggering", f"{int(self._stagger_delay * 1000)}ms")
+            time.sleep(self._stagger_delay)
 
-        if not include_unlinked:
-            linked = [t for t in tags if (t.get("views") or t.get("equipment") or t.get("bound_to_view"))]
-            if linked:
-                tags = linked
+        while self._running:
+            if self._paused:
+                time.sleep(0.5)
+                continue
 
-        if tag_regex:
-            import re
+            self._cycle_count += 1
+            t0 = time.time()
             try:
-                pattern = re.compile(tag_regex, re.IGNORECASE)
-                tags = [t for t in tags if pattern.search(t["path"]) or pattern.search(t["name"])]
-            except re.error:
+                metrics = self._run_cycle()
+                cycle_ms = int((time.time() - t0) * 1000)
+                self._cycle_times.append(cycle_ms)
+                if len(self._cycle_times) > 20:
+                    self._cycle_times = self._cycle_times[-20:]
+
+                self._emit_status(
+                    "cycle_complete",
+                    "ok",
+                    cycle_ms=cycle_ms,
+                    diagnostics=metrics.get("diagnostics", {}),
+                    candidates=metrics.get("candidates", 0),
+                    triaged=metrics.get("triaged", 0),
+                    emitted=metrics.get("emitted", 0),
+                    live_events=metrics.get("liveEvents", []),
+                )
+            except Exception as exc:
+                cycle_ms = int((time.time() - t0) * 1000)
                 emit("AGENT_ERROR", {
                     "runId": self.run_id,
-                    "code": "invalid_tag_regex",
-                    "message": f"Invalid regex: {tag_regex}",
+                    "subsystemId": self.subsystem_id,
+                    "code": "cycle_error",
+                    "message": str(exc),
                     "recoverable": True,
                     "timestamp": utc_now_iso(),
                 })
+                self._emit_status("cycle_error", str(exc), cycle_ms=cycle_ms)
 
-        if equipment_tags:
-            tags = [
-                t for t in tags
-                if t["name"].lower() in equipment_tags
-                or t["path"].lower() in equipment_tags
-                or any(str(eq).strip().lower() in equipment_tags for eq in t.get("equipment", []))
-            ]
-
-        tags.sort(
-            key=lambda t: (
-                0 if t.get("bound_to_view") else 1,
-                0 if (t.get("views") or t.get("equipment")) else 1,
-                str(t.get("path", "")),
-            )
-        )
-
-        for tag in tags:
-            subsystems, primary = derive_subsystems_for_tag(
-                tag_meta=tag,
-                subsystem_mode=subsystem_mode,
-                priority=subsystem_priority,
-            )
-            tag["subsystems"] = subsystems
-            tag["primary_subsystem"] = primary
+            elapsed = time.time() - t0
+            remaining = max(0, poll_ms / 1000.0 - elapsed)
+            if remaining > 0 and self._running:
+                self._emit_progress("waiting", f"{int(remaining * 1000)}ms")
+                time.sleep(remaining)
 
-        if subsystem_include:
-            tags = [
-                t
-                for t in tags
-                if any(
-                    s.get("id", "").lower() in subsystem_include
-                    or s.get("name", "").lower() in subsystem_include
-                    for s in (t.get("subsystems") or [])
-                )
-            ]
+        self._emit_status("agent_stopped", "stopped")
 
-        return tags[:max_tags]
+    # -------------------------------------------------------------------
+    #  Status emission
+    # -------------------------------------------------------------------
+    def _emit_status(
+        self,
+        phase: str,
+        reason: str,
+        cycle_ms: int = 0,
+        diagnostics: Optional[Dict[str, Any]] = None,
+        candidates: int = 0,
+        triaged: int = 0,
+        emitted: int = 0,
+        live_events: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
+        payload: Dict[str, Any] = {
+            "runId": self.run_id,
+            "subsystemId": self.subsystem_id,
+            "state": self.agent_state,
+            "cycleMs": cycle_ms,
+            "candidates": candidates,
+            "triaged": triaged,
+            "emitted": emitted,
+            "diagnostics": {
+                "phase": phase,
+                "reason": reason,
+                "subsystemId": self.subsystem_id,
+                "subsystemType": self.subsystem_type,
+                "subsystemName": self.subsystem_name,
+                "cycleCount": self._cycle_count,
+                "avgCycleMs": self.avg_cycle_ms,
+                "totalCandidates": self._total_candidates,
+                "totalTriaged": self._total_triaged,
+                "totalEmitted": self._total_emitted,
+                "tagCount": len(self.tag_metas),
+                **(diagnostics or {}),
+            },
+            "timestamp": utc_now_iso(),
+        }
+        if live_events is not None:
+            payload["liveEvents"] = live_events
+        emit("AGENT_STATUS", payload)
 
+    # -------------------------------------------------------------------
+    #  History fetching (per-agent cache)
+    # -------------------------------------------------------------------
     def _extract_history_values(self, history_data: Any, tag_path: str) -> List[float]:
-        """Normalize multiple gateway response shapes to numeric values list."""
         values: List[float] = []
-        if history_data is None:
-            return values
-        if isinstance(history_data, dict) and history_data.get("error"):
+        if history_data is None or (isinstance(history_data, dict) and history_data.get("error")):
             return values
 
         rows: List[Any] = []
@@ -602,10 +467,10 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
                 if isinstance(chunk, list):
                     rows = chunk
                     break
-            if not rows and "tagHistory" in history_data and isinstance(history_data["tagHistory"], list):
+            if not rows and isinstance(history_data.get("tagHistory"), list):
                 rows = history_data["tagHistory"]
 
-        prefixed = self.api._ensure_provider_prefix(tag_path) if hasattr(self, "api") else tag_path
+        prefixed = self.api._ensure_provider_prefix(tag_path) if hasattr(self.api, "_ensure_provider_prefix") else tag_path
         stripped = tag_path
         if stripped.startswith("[") and "]" in stripped:
             stripped = stripped[stripped.index("]") + 1:]
@@ -621,11 +486,11 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
                 continue
             candidate = None
             if "value" in row:
-                candidate = row.get("value")
+                candidate = row["value"]
             else:
-                matched_key = next((k for k in path_variants if k in row), None)
-                if matched_key:
-                    candidate = row.get(matched_key)
+                matched = next((k for k in path_variants if k in row), None)
+                if matched:
+                    candidate = row[matched]
                 elif len(row) <= 2:
                     for k, v in row.items():
                         if k.lower() in {"timestamp", "ts", "t", "time"}:
@@ -637,34 +502,7 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
                 values.append(val)
         return values
 
-    def fetch_history_values(self, tag_path: str) -> tuple[List[float], Optional[str]]:
-        ttl = float(self.config.get("historyCacheTtlSec", 30))
-        now = time.time()
-        cached = self._history_cache.get(tag_path)
-        if cached and ttl > 0 and (now - cached["fetched_at"]) < ttl:
-            return list(cached["values"]), cached.get("error")
-
-        minutes = int(self.config.get("historyWindowMinutes", 360))
-        end_dt = datetime.now(timezone.utc)
-        start_dt = end_dt - timedelta(minutes=minutes)
-        data = self.api.query_tag_history(
-            [tag_path],
-            start_dt.isoformat(),
-            end_dt.isoformat(),
-            return_size=max(100, int(self.config.get("minHistoryPoints", 30)) * 4),
-            aggregation_mode="Average",
-            return_format="Wide",
-        )
-        if isinstance(data, dict) and data.get("error"):
-            err = str(data.get("error"))
-            self._history_cache[tag_path] = {"values": [], "error": err, "fetched_at": now}
-            return [], err
-        values = self._extract_history_values(data, tag_path)
-        self._history_cache[tag_path] = {"values": values, "error": None, "fetched_at": now}
-        return values, None
-
-    def fetch_history_batch(self, tag_paths: List[str]) -> Dict[str, Tuple[List[float], Optional[str]]]:
-        """Fetch history for many tags, using cache and batched API calls."""
+    def _fetch_history_batch(self, tag_paths: List[str]) -> Dict[str, Tuple[List[float], Optional[str]]]:
         ttl = float(self.config.get("historyCacheTtlSec", 30))
         now = time.time()
         results: Dict[str, Tuple[List[float], Optional[str]]] = {}
@@ -684,35 +522,46 @@ def fetch_history_batch(self, tag_paths: List[str]) -> Dict[str, Tuple[List[floa
         end_dt = datetime.now(timezone.utc)
         start_dt = end_dt - timedelta(minutes=minutes)
         return_size = max(100, int(self.config.get("minHistoryPoints", 30)) * 4)
-        batch_size = 20
-
-        for i in range(0, len(uncached), batch_size):
-            batch = uncached[i : i + batch_size]
-            data = self.api.query_tag_history(
-                batch,
-                start_dt.isoformat(),
-                end_dt.isoformat(),
-                return_size=return_size,
-                aggregation_mode="Average",
-                return_format="Wide",
-            )
-            fetch_ts = time.time()
+
+        for i in range(0, len(uncached), 20):
+            batch = uncached[i:i + 20]
+            with _api_semaphore:
+                data = self.api.query_tag_history(
+                    batch, start_dt.isoformat(), end_dt.isoformat(),
+                    return_size=return_size, aggregation_mode="Average", return_format="Wide",
+                )
+            ts = time.time()
 
             if isinstance(data, dict) and data.get("error"):
-                err = str(data.get("error"))
-                for path in batch:
-                    results[path] = ([], err)
-                    self._history_cache[path] = {"values": [], "error": err, "fetched_at": fetch_ts}
+                err = str(data["error"])
+                for p in batch:
+                    results[p] = ([], err)
+                    self._history_cache[p] = {"values": [], "error": err, "fetched_at": ts}
                 continue
 
-            for path in batch:
-                values = self._extract_history_values(data, path)
-                results[path] = (values, None)
-                self._history_cache[path] = {"values": values, "error": None, "fetched_at": fetch_ts}
+            for p in batch:
+                vals = self._extract_history_values(data, p)
+                results[p] = (vals, None)
+                self._history_cache[p] = {"values": vals, "error": None, "fetched_at": ts}
 
         return results
 
-    def get_context(self, tag_path: str) -> Dict[str, Any]:
+    # -------------------------------------------------------------------
+    #  Context & triage
+    # -------------------------------------------------------------------
+    def _get_context(self, tag_path: str) -> Dict[str, Any]:
+        ttl = 120.0
+        now = time.time()
+        cached_ts = self._context_cache_ts.get(tag_path, 0)
+        if tag_path in self._context_cache and (now - cached_ts) < ttl:
+            return dict(self._context_cache[tag_path])
+
+        ctx = self._fetch_context_from_graph(tag_path)
+        self._context_cache[tag_path] = ctx
+        self._context_cache_ts[tag_path] = now
+        return dict(ctx)
+
+    def _fetch_context_from_graph(self, tag_path: str) -> Dict[str, Any]:
         with self.graph.session() as session:
             result = session.run(
                 """
@@ -723,1252 +572,746 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
                 OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
                 OPTIONAL MATCH (eq)-[:HAS_SYMPTOM]->(s:FaultSymptom)
                 OPTIONAL MATCH (s)-[:CAUSED_BY]->(fc:FaultCause)
-                OPTIONAL MATCH (eq)-[:HAS_PATTERN]->(p:ControlPattern)
-                OPTIONAL MATCH (eq)-[:SAFETY_CRITICAL]->(se:SafetyElement)
                 RETURN t,
                        collect(DISTINCT v.name) AS views,
                        collect(DISTINCT eq.name) AS equipment,
                        collect(DISTINCT s.symptom) AS symptoms,
-                       collect(DISTINCT fc.cause) AS causes,
-                       collect(DISTINCT p.name) AS patterns,
-                       collect(DISTINCT se.name) AS safety
+                       collect(DISTINCT fc.cause) AS causes
                 LIMIT 1
                 """,
                 tag=tag_path,
             )
             record = result.single()
-            fallback_views: List[str] = []
-            fallback_equipment: List[str] = []
-            fallback_result = session.run(
+            fallback = session.run(
                 """
                 MATCH (v:View)-[:HAS_COMPONENT]->(vc:ViewComponent)-[r:BINDS_TO]->(n)
                 WHERE r.tag_path = $tag
                 OPTIONAL MATCH (eq:Equipment)-[*1..2]-(n)
-                RETURN collect(DISTINCT v.name) AS views,
-                       collect(DISTINCT eq.name) AS equipment
+                RETURN collect(DISTINCT v.name) AS views, collect(DISTINCT eq.name) AS equipment
                 LIMIT 1
                 """,
                 tag=tag_path,
             ).single()
-            if fallback_result:
-                fallback_views = [x for x in (fallback_result["views"] or []) if x]
-                fallback_equipment = [x for x in (fallback_result["equipment"] or []) if x]
+            fb_views = [x for x in (fallback["views"] or []) if x] if fallback else []
+            fb_equip = [x for x in (fallback["equipment"] or []) if x] if fallback else []
 
             if not record:
                 return {
                     "tag_path": tag_path,
-                    "tag_name": _last_segment_from_tag_path(tag_path) or tag_path,
-                    "views": fallback_views,
-                    "equipment": fallback_equipment,
+                    "tag_name": _last_segment(tag_path) or tag_path,
+                    "views": fb_views, "equipment": fb_equip,
                     "group": infer_tag_group(tag_path),
-                    "symptoms": [],
-                    "causes": [],
-                    "patterns": [],
-                    "safety": [],
+                    "symptoms": [], "causes": [],
                 }
             node = record["t"]
             return {
                 "tag_path": tag_path,
-                "tag_name": node.get("name") if node else (_last_segment_from_tag_path(tag_path) or tag_path),
-                "views": sorted(set([x for x in record["views"] if x] + fallback_views)),
-                "equipment": sorted(set([x for x in record["equipment"] if x] + fallback_equipment)),
+                "tag_name": node.get("name") if node else (_last_segment(tag_path) or tag_path),
+                "views": sorted(set([x for x in record["views"] if x] + fb_views)),
+                "equipment": sorted(set([x for x in record["equipment"] if x] + fb_equip)),
                 "group": infer_tag_group(tag_path, node.get("folder_name") if node else None),
                 "symptoms": [x for x in record["symptoms"] if x],
                 "causes": [x for x in record["causes"] if x],
-                "patterns": [x for x in record["patterns"] if x],
-                "safety": [x for x in record["safety"] if x],
             }
 
-    # -----------------------------
-    # Triage and persistence
-    # -----------------------------
-    def run_llm_triage(
-        self,
-        context: Dict[str, Any],
-        deterministic: Dict[str, Any],
-        live_sample: Dict[str, Any],
-    ) -> Dict[str, Any]:
+    def _run_llm_triage(self, context: Dict, deterministic: Dict, live_sample: Dict) -> Dict[str, Any]:
         fallback = {
-            "summary": f"Deterministic anomaly on {context.get('tag_name', context['tag_path'])}",
+            "summary": f"Deviation on {context.get('tag_name', context['tag_path'])} in {self.subsystem_name}",
             "category": deterministic.get("category", "deviation"),
             "severity": "medium",
-            "confidence": 0.55,
+            "confidence": 0.5,
             "probable_causes": ["Signal deviates from historical baseline."],
-            "verification_checks": [
-                f"Check live quality/timestamp for {context.get('tag_path')}",
-                "Inspect upstream interlocks and communication health.",
-            ],
-            "safety_notes": context.get("safety", []),
-            "rationale": "LLM triage unavailable; using deterministic fallback.",
+            "verification_checks": [f"Check {context.get('tag_path')}"],
+            "safety_notes": [],
+            "rationale": "Deterministic-only triage.",
             "related_entities": [
                 {"label": "Equipment", "name": e} for e in context.get("equipment", [])[:3]
-            ] + [{"label": "View", "name": v} for v in context.get("views", [])[:2]],
+            ],
         }
         if not self.llm:
             return fallback
-
-        system_prompt = (
-            "You are an industrial anomaly triage assistant. "
-            "Return ONLY valid JSON with keys: summary, category, severity, confidence, "
-            "probable_causes, verification_checks, safety_notes, rationale, related_entities. "
-            "Severity must be one of critical/high/medium/low. "
-            "Category must be one of spike/drift/stuck/state-conflict/quality-issue/deviation. "
-            "related_entities is a list of objects: {label,name}."
-        )
-        user_prompt = json.dumps(
-            {
-                "context": context,
-                "deterministic": deterministic,
-                "live_sample": live_sample,
-            },
-            default=str,
-        )
         try:
             result = self.llm.query_json(
-                system_prompt=system_prompt,
-                user_prompt=user_prompt,
+                system_prompt=(
+                    "You are an industrial anomaly triage assistant. "
+                    "Return ONLY valid JSON with keys: summary, category, severity, confidence, "
+                    "probable_causes, verification_checks, safety_notes, rationale, related_entities."
+                ),
+                user_prompt=json.dumps({"context": context, "deterministic": deterministic, "live_sample": live_sample}, default=str),
                 max_tokens=900,
                 use_tools=False,
             )
             data = result.get("data")
-            if not isinstance(data, dict):
-                return fallback
-            merged = dict(fallback)
-            merged.update({k: v for k, v in data.items() if v is not None})
-            return merged
-        except Exception as exc:
-            emit("AGENT_ERROR", {
-                "runId": self.run_id,
-                "code": "llm_triage_failed",
-                "message": str(exc),
-                "recoverable": True,
-                "timestamp": utc_now_iso(),
-            })
-            return fallback
+            if isinstance(data, dict):
+                merged = dict(fallback)
+                merged.update({k: v for k, v in data.items() if v is not None})
+                return merged
+        except Exception:
+            pass
+        return fallback
+
+    # -------------------------------------------------------------------
+    #  Main cycle
+    # -------------------------------------------------------------------
+    def _emit_progress(self, step: str, detail: str = "") -> None:
+        emit("AGENT_STATUS", {
+            "runId": self.run_id,
+            "subsystemId": self.subsystem_id,
+            "state": self.agent_state,
+            "diagnostics": {
+                "phase": "cycle_progress",
+                "step": step,
+                "detail": detail,
+                "subsystemId": self.subsystem_id,
+                "cycleCount": self._cycle_count,
+            },
+            "timestamp": utc_now_iso(),
+        })
 
-    def _severity_from_scores(self, deterministic: Dict[str, Any], llm_out: Dict[str, Any]) -> str:
-        sev = str(llm_out.get("severity", "")).lower()
-        if sev in {"critical", "high", "medium", "low"}:
-            return sev
-        z = abs(float(deterministic.get("z_score", 0.0)))
-        if z >= 8:
-            return "critical"
-        if z >= 5:
-            return "high"
-        if z >= 3:
-            return "medium"
-        return "low"
-
-    def is_duplicate_recent(self, dedup_sig: str) -> bool:
-        cooldown = max(1, int(self.config.get("dedupCooldownMinutes", 10)))
-        with self.graph.session() as session:
-            result = session.run(
-                """
-                MATCH (e:AnomalyEvent {dedup_key: $dedup_key})
-                WHERE e.created_at IS NOT NULL
-                  AND datetime(e.created_at) > datetime() - duration({minutes: $minutes})
-                RETURN count(e) AS cnt
-                """,
-                dedup_key=dedup_sig,
-                minutes=cooldown,
-            )
-            row = result.single()
-            return bool(row and row["cnt"] > 0)
+    def _run_cycle(self) -> Dict[str, Any]:
+        thresholds = self.config.get("thresholds", {})
+        stale_sec = int(thresholds.get("stalenessSec", 120))
+        min_history = int(self.config.get("minHistoryPoints", 30))
+        max_candidates = int(self.config.get("maxCandidatesPerSubsystem", 8))
+        max_llm = int(self.config.get("maxLlmTriagesPerSubsystem", 0))
+
+        tag_paths = [t["path"] for t in self.tag_metas]
+        if not tag_paths:
+            return {"candidates": 0, "triaged": 0, "emitted": 0, "diagnostics": {"phase": "cycle_complete", "reason": "no_tags"}}
+
+        self._emit_progress("reading_tags", f"{len(tag_paths)} tags")
+        t_read = time.time()
+        with _api_semaphore:
+            live_values = self.api.read_tags(tag_paths)
+        read_ms = int((time.time() - t_read) * 1000)
+        now = datetime.now(timezone.utc)
 
-    def persist_event(
-        self,
-        context: Dict[str, Any],
-        deterministic: Dict[str, Any],
-        live_sample: Dict[str, Any],
-        triage: Dict[str, Any],
-        subsystem: Optional[Dict[str, str]] = None,
-    ) -> Optional[Dict[str, Any]]:
-        category = triage.get("category") or deterministic.get("category", "deviation")
-        subsystem_ref = subsystem or _subsystem_ref("global", "all")
-        dedup_source = f"{context['tag_path']}::{subsystem_ref.get('id', 'global:all')}"
-        dedup_sig = dedup_key(dedup_source, category, int(self.config.get("dedupCooldownMinutes", 10)))
-        if self.is_duplicate_recent(dedup_sig):
-            return None
-
-        event_id = f"ae-{uuid.uuid4()}"
-        severity = self._severity_from_scores(deterministic, triage)
-        confidence = float(max(0.0, min(1.0, triage.get("confidence", 0.5))))
-        event_data = {
-            "event_id": event_id,
-            "run_id": self.run_id,
-            "event_schema_version": 1,
-            "state": "open",
-            "severity": severity,
-            "confidence": confidence,
-            "category": category,
-            "summary": triage.get("summary", f"Anomaly on {context['tag_path']}"),
-            "explanation": triage.get("rationale", ""),
-            "recommended_checks_json": json.dumps(triage.get("verification_checks", []), default=str),
-            "probable_causes_json": json.dumps(triage.get("probable_causes", []), default=str),
-            "safety_notes_json": json.dumps(triage.get("safety_notes", []), default=str),
-            "deterministic_reasons_json": json.dumps(deterministic.get("reasons", []), default=str),
-            "z_score": float(deterministic.get("z_score", 0.0)),
-            "mad_score": float(deterministic.get("mad_score", 0.0)),
-            "delta_rate": float(deterministic.get("delta_rate", 0.0)),
-            "window_volatility": float(deterministic.get("window_volatility", 0.0)),
-            "source_tag": context["tag_path"],
-            "tag_name": context.get("tag_name") or context["tag_path"],
-            "subsystem_type": subsystem_ref.get("type"),
-            "subsystem_name": subsystem_ref.get("name"),
-            "subsystem_id": subsystem_ref.get("id"),
-            "live_quality": live_sample.get("quality"),
-            "live_timestamp": live_sample.get("timestamp"),
-            "live_value": str(live_sample.get("value")),
-            "dedup_key": dedup_sig,
-            "created_at": utc_now_iso(),
-            "updated_at": utc_now_iso(),
+        tags_for_history: List[Tuple[Any, Dict[str, Any]]] = []
+        live_error_count = 0
+        quality_filtered = 0
+        stale_filtered = 0
+
+        for idx, tv in enumerate(live_values):
+            tag_meta = self.tag_metas[idx] if idx < len(self.tag_metas) else {"path": tv.path, "name": tv.path}
+            if tv.error:
+                live_error_count += 1
+                continue
+            if not is_quality_good(tv.quality):
+                quality_filtered += 1
+                continue
+            if is_stale(tv.timestamp, stale_sec, now=now):
+                stale_filtered += 1
+                continue
+            tags_for_history.append((tv, tag_meta))
+
+        self._emit_progress("fetching_history", f"{len(tags_for_history)} tags")
+        history_paths = [tv.path for tv, _ in tags_for_history]
+        t_hist = time.time()
+        history_results = self._fetch_history_batch(history_paths) if history_paths else {}
+        hist_ms = int((time.time() - t_hist) * 1000)
+
+        self._emit_progress("scoring", f"{len(tags_for_history)} tags (read={read_ms}ms hist={hist_ms}ms)")
+        t_score = time.time()
+        shift_signal = {
+            "subsystemId": self.subsystem_id,
+            "subsystemType": self.subsystem_type,
+            "subsystemName": self.subsystem_name,
+            "evaluated": 0, "candidate": 0, "nearShift": 0,
+            "sumAbsZ": 0.0, "maxAbsZ": 0.0,
+            "_tagEntries": [],
         }
+        candidates: List[Dict] = []
+        history_errors = 0
+        insufficient_history = 0
+
+        for tv, tag_meta in tags_for_history:
+            history, hist_err = history_results.get(tv.path, ([], "No history"))
+            if hist_err:
+                history_errors += 1
+                continue
+            if len(history) < min_history and len(history) < 5:
+                insufficient_history += 1
+                continue
 
-        with self.graph.session() as session:
-            session.run(
-                """
-                MATCH (r:AgentRun {run_id: $run_id})
-                CREATE (e:AnomalyEvent $props)
-                MERGE (r)-[:EMITTED]->(e)
-                """,
-                run_id=self.run_id,
-                props=event_data,
-            )
+            prev_val = self._prev_values.get(tv.path)
+            det = compute_deviation_scores(tv.value, history, prev_value=prev_val, thresholds=thresholds)
+            curr = safe_float(tv.value)
+            if curr is not None:
+                self._prev_values[tv.path] = curr
+
+            abs_z = abs(float(det.get("z_score", 0.0)))
+            z = float(det.get("z_score", 0.0))
+            shift_signal["evaluated"] += 1
+            shift_signal["sumAbsZ"] += abs_z
+            if abs_z > shift_signal["maxAbsZ"]:
+                shift_signal["maxAbsZ"] = abs_z
+            if abs_z >= 1.5:
+                shift_signal["nearShift"] += 1
+
+            tag_name = tv.path.rsplit("/", 1)[-1] if "/" in str(tv.path) else str(tv.path)
+            cached_hist = self._history_cache.get(tv.path)
+            sparkline = None
+            avg_val = None
+            if cached_hist and cached_hist.get("values"):
+                vals = cached_hist["values"]
+                avg_val = round(sum(vals) / len(vals), 2)
+                if len(vals) <= 20:
+                    sparkline = [round(v, 2) for v in vals]
+                else:
+                    step = len(vals) / 20
+                    sparkline = [round(vals[int(i * step)], 2) for i in range(20)]
 
-            session.run(
-                """
-                MATCH (e:AnomalyEvent {event_id: $event_id})
-                MATCH (t:ScadaTag)
-                WHERE t.name = $tag OR t.opc_item_path = $tag
-                MERGE (e)-[:OBSERVED_ON]->(t)
-                """,
-                event_id=event_id,
-                tag=context["tag_path"],
-            )
+            shift_signal["_tagEntries"].append({
+                "path": str(tv.path), "name": tag_name,
+                "z": round(z, 3), "mad": round(float(det.get("mad_score", 0)), 3),
+                "value": tv.value, "avg": avg_val, "sparkline": sparkline,
+            })
 
-            for equipment_name in context.get("equipment", [])[:5]:
-                session.run(
-                    """
-                    MATCH (e:AnomalyEvent {event_id: $event_id})
-                    MATCH (eq:Equipment {name: $name})
-                    MERGE (e)-[:AFFECTS]->(eq)
-                    """,
-                    event_id=event_id,
-                    name=equipment_name,
-                )
+            cat = det.get("category", "normal")
+            if det.get("candidate") and cat != "stuck" and len(candidates) < max_candidates:
+                shift_signal["candidate"] += 1
+                context = self._get_context(tv.path)
+                context["subsystem"] = _subsystem_ref(self.subsystem_type, self.subsystem_name)
+                candidates.append({
+                    "context": context, "deterministic": det,
+                    "live_sample": {"path": tv.path, "value": tv.value, "quality": tv.quality, "timestamp": tv.timestamp},
+                })
 
-            if subsystem_ref.get("type") == "view":
-                session.run(
-                    """
-                    MATCH (e:AnomalyEvent {event_id: $event_id})
-                    MATCH (v:View {name: $name})
-                    MERGE (e)-[:SCOPED_TO]->(v)
-                    """,
-                    event_id=event_id,
-                    name=subsystem_ref.get("name"),
-                )
-            elif subsystem_ref.get("type") == "equipment":
-                session.run(
-                    """
-                    MATCH (e:AnomalyEvent {event_id: $event_id})
-                    MATCH (eq:Equipment {name: $name})
-                    MERGE (e)-[:SCOPED_TO]->(eq)
-                    """,
-                    event_id=event_id,
-                    name=subsystem_ref.get("name"),
-                )
+        score_ms = int((time.time() - t_score) * 1000)
+
+        t_triage = time.time()
+        live_events: List[Dict[str, Any]] = []
+        now_iso = utc_now_iso()
+        for cand in candidates:
+            det = cand["deterministic"]
+            ctx = cand["context"]
+            ls = cand["live_sample"]
+            severity = "low"
+            abs_z = abs(float(det.get("z_score", 0)))
+            if abs_z >= 8:
+                severity = "critical"
+            elif abs_z >= 5:
+                severity = "high"
+            elif abs_z >= 3:
+                severity = "medium"
+            live_events.append({
+                "event_id": f"live-{self.subsystem_id}-{ls.get('path', '')}",
+                "source_tag": ls.get("path", ""),
+                "tag_name": ctx.get("tag_name") or ls.get("path", ""),
+                "subsystem_id": self.subsystem_id,
+                "subsystem_type": self.subsystem_type,
+                "subsystem_name": self.subsystem_name,
+                "state": "open",
+                "severity": severity,
+                "category": det.get("category", "deviation"),
+                "summary": f"{det.get('category', 'Deviation')} on {ctx.get('tag_name', '?')} (z={det.get('z_score', 0):.1f})",
+                "z_score": float(det.get("z_score", 0)),
+                "mad_score": float(det.get("mad_score", 0)),
+                "delta_rate": float(det.get("delta_rate", 0)),
+                "confidence": 0.5,
+                "deterministic_reasons_json": json.dumps(det.get("reasons", []), default=str),
+                "live_value": str(ls.get("value")),
+                "live_quality": ls.get("quality"),
+                "live_timestamp": ls.get("timestamp"),
+                "created_at": now_iso,
+            })
+        triage_ms = int((time.time() - t_triage) * 1000)
+
+        self._total_candidates += len(candidates)
+        self._total_emitted += len(live_events)
+
+        evaluated = max(1, shift_signal["evaluated"])
+        tag_entries = shift_signal.pop("_tagEntries", [])
+        shift_signal["avgAbsZ"] = round(shift_signal["sumAbsZ"] / evaluated, 3)
+        shift_signal["shiftRatio"] = round(shift_signal["nearShift"] / evaluated, 3)
+        shift_signal["candidateRatio"] = round(shift_signal["candidate"] / evaluated, 3)
+        shift_signal.pop("sumAbsZ", None)
+        sorted_tags = sorted(tag_entries, key=lambda t: abs(t.get("z", 0)), reverse=True)
+        shift_signal["tagSignals"] = sorted_tags
+
+        return {
+            "candidates": len(candidates),
+            "triaged": len(live_events),
+            "emitted": len(live_events),
+            "liveEvents": live_events,
+            "diagnostics": {
+                "phase": "cycle_complete",
+                "reason": "ok",
+                "monitoredTags": len(tag_paths),
+                "liveErrorCount": live_error_count,
+                "qualityFilteredCount": quality_filtered,
+                "staleFilteredCount": stale_filtered,
+                "historyErrorCount": history_errors,
+                "insufficientHistoryCount": insufficient_history,
+                "evaluatedCount": shift_signal["evaluated"],
+                "candidateCount": len(candidates),
+                "subsystemShiftSignals": [shift_signal],
+                "timingMs": {
+                    "read": read_ms,
+                    "history": hist_ms,
+                    "score": score_ms,
+                    "triage": triage_ms,
+                },
+            },
+        }
 
-            related_inputs: List[Dict[str, str]] = []
-            for item in triage.get("related_entities", []) or []:
-                if isinstance(item, dict) and item.get("label") and item.get("name"):
-                    related_inputs.append({"label": str(item["label"]), "name": str(item["name"])})
-            for name in context.get("symptoms", [])[:3]:
-                related_inputs.append({"label": "FaultSymptom", "name": name})
-            for name in context.get("causes", [])[:3]:
-                related_inputs.append({"label": "FaultCause", "name": name})
-
-            for rel in related_inputs[:8]:
-                label = rel["label"]
-                if label not in {"FaultSymptom", "FaultCause", "ControlPattern", "SafetyElement", "Equipment", "ScadaTag", "View"}:
-                    continue
-                session.run(
-                    f"""
-                    MATCH (e:AnomalyEvent {{event_id: $event_id}})
-                    MATCH (n:{label})
-                    WHERE n.name = $name OR n.symptom = $name OR n.cause = $name
-                    MERGE (e)-[:RELATED_TO]->(n)
-                    """,
-                    event_id=event_id,
-                    name=rel["name"],
-                )
 
-        return event_data
+# ═══════════════════════════════════════════════════════════════════════════
+#  AgentCoordinator — manages subsystem agents
+# ═══════════════════════════════════════════════════════════════════════════
 
-    def _emit_persisted_event(self, persisted: Dict[str, Any]) -> None:
-        """Emit normalized AGENT_EVENT payload for UI stream."""
-        emit("AGENT_EVENT", {
-            "runId": self.run_id,
-            "eventId": persisted["event_id"],
-            "severity": persisted["severity"],
-            "summary": persisted["summary"],
-            "category": persisted.get("category"),
-            "entityRefs": {
-                "tag": persisted.get("tag_name") or persisted.get("source_tag"),
-                "sourceTag": persisted.get("source_tag"),
-                "subsystemType": persisted.get("subsystem_type"),
-                "subsystemName": persisted.get("subsystem_name"),
-            },
-            "createdAt": persisted.get("created_at"),
-        })
+class AgentCoordinator:
+    """Discovers subsystems, spawns/manages SubsystemAgent threads."""
 
-    def emit_provider_failure_event(
-        self,
-        code: str,
-        message: str,
-        *,
-        severity: str = "high",
-        category: str = "quality-issue",
-        source_tag: Optional[str] = None,
-        details: Optional[Dict[str, Any]] = None,
-        subsystem: Optional[Dict[str, str]] = None,
-    ) -> bool:
-        """
-        Persist and stream provider-health anomalies so failures appear in feed.
-
-        Returns:
-            True if a new event was persisted (false if deduped).
-        """
-        emit("AGENT_ERROR", {
-            "runId": self.run_id,
-            "code": code,
-            "message": message,
-            "recoverable": True,
-            "timestamp": utc_now_iso(),
-        })
+    def __init__(self, config: Dict[str, Any], run_id: Optional[str] = None):
+        self.config = merge_defaults(config)
+        self.run_id = run_id or f"agent-{int(time.time() * 1000)}"
+        from ignition_api_client import IgnitionApiClient
+        from neo4j_ontology import get_ontology_graph
 
-        tag = source_tag or f"provider://{code}"
-        detail_blob = json.dumps(details or {}, default=str)
-        context = {
-            "tag_path": tag,
-            "tag_name": source_tag or "ProviderHealth",
-            "equipment": [],
-            "symptoms": [],
-            "causes": [],
-            "patterns": [],
-            "safety": [],
-        }
-        deterministic = {
-            "candidate": True,
-            "reasons": [code],
-            "category": category,
-            "z_score": 0.0,
-            "mad_score": 0.0,
-            "delta_rate": 0.0,
-            "window_volatility": 0.0,
-            "history_points": 0,
-        }
-        triage = {
-            "summary": message,
-            "category": category,
-            "severity": severity,
-            "confidence": 0.9,
-            "probable_causes": [message],
-            "verification_checks": [
-                "Check Ignition gateway connectivity and credentials.",
-                "Validate tag provider availability and endpoint health.",
-            ],
-            "safety_notes": [],
-            "rationale": f"Provider health event ({code}). Details: {detail_blob}",
-            "related_entities": [],
-        }
-        persisted = self.persist_event(
-            context=context,
-            deterministic=deterministic,
-            live_sample={
-                "path": tag,
-                "value": "",
-                "quality": "Bad",
-                "timestamp": utc_now_iso(),
-                "data_type": "provider_health",
-            },
-            triage=triage,
-            subsystem=subsystem,
+        self.graph = get_ontology_graph()
+        self.api = IgnitionApiClient(
+            base_url=self.config.get("ignitionApiUrl") or os.getenv("IGNITION_API_URL"),
+            api_token=self.config.get("ignitionApiToken") or os.getenv("IGNITION_API_TOKEN"),
+            timeout=15.0,
         )
-        if persisted:
-            self._emit_persisted_event(persisted)
-            return True
-        return False
+        self._running = True
+        self.agents: Dict[str, SubsystemAgent] = {}
 
-    # -----------------------------
-    # Monitoring loop
-    # -----------------------------
-    def run_cycle(self) -> Dict[str, Any]:
-        cycle_start = time.time()
-        thresholds = self.config.get("thresholds", {})
-        stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
-        metrics = {
-            "candidates": 0,
-            "triaged": 0,
-            "emitted": 0,
-            "cycleMs": 0,
-            "diagnostics": make_default_diagnostics(
-                staleness_threshold_sec=stale_threshold_sec,
-                phase="cycle_start",
-                reason="cycle_initialized",
-            ),
-        }
-        min_history = int(self.config.get("minHistoryPoints", 30))
-        max_candidates_total = max(1, int(self.config.get("maxCandidatesPerCycle", 25)))
-        max_candidates_per_subsystem = max(1, int(self.config.get("maxCandidatesPerSubsystem", 8)))
-        max_triage_total = max(0, int(self.config.get("maxLlmTriagesPerCycle", 5)))
-        max_triage_per_subsystem = max(0, int(self.config.get("maxLlmTriagesPerSubsystem", 2)))
-
-        if not self.api.is_configured:
-            emitted = self.emit_provider_failure_event(
-                "ignition_not_configured",
-                "Ignition API URL/token not configured.",
-                severity="critical",
-                category="state-conflict",
+    # -------------------------------------------------------------------
+    #  Schema / lifecycle
+    # -------------------------------------------------------------------
+    def _init_schema(self) -> None:
+        self.graph.init_agent_monitoring_schema()
+
+    def _upsert_run(self, status: str, reason: Optional[str] = None) -> None:
+        with self.graph.session() as session:
+            session.run(
+                """
+                MERGE (r:AgentRun {run_id: $run_id})
+                SET r.status = $status, r.updated_at = datetime(),
+                    r.last_heartbeat_at = datetime(),
+                    r.config_json = $cfg,
+                    r.started_at = coalesce(r.started_at, datetime()),
+                    r.stopped_at = CASE WHEN $status IN ['stopped','failed'] THEN datetime() ELSE r.stopped_at END,
+                    r.stop_reason = CASE WHEN $reason IS NULL THEN r.stop_reason ELSE $reason END
+                """,
+                run_id=self.run_id, status=status,
+                cfg=json.dumps(self.config, default=str), reason=reason,
             )
-            if emitted:
-                metrics["emitted"] += 1
-            metrics["diagnostics"]["phase"] = "cycle_early_exit"
-            metrics["diagnostics"]["reason"] = "ignition_not_configured"
-            metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
-            return metrics
-
-        tags = self.get_monitored_tags()
-        if not tags:
-            emit("AGENT_ERROR", {
-                "runId": self.run_id,
-                "code": "no_tags_found",
-                "message": "No ScadaTag nodes with readable tag paths found.",
-                "recoverable": True,
-                "timestamp": utc_now_iso(),
+
+    # -------------------------------------------------------------------
+    #  Tag discovery
+    # -------------------------------------------------------------------
+    def _fetch_tags(self) -> List[Dict[str, Any]]:
+        max_tags = int(self.config.get("maxMonitoredTags", 200))
+        scope = self.config.get("scope", {})
+        subsystem_mode = str(scope.get("subsystemMode") or "auto")
+        subsystem_priority = scope.get("subsystemPriority") or list(DEFAULT_SUBSYSTEM_PRIORITY)
+        include_unlinked = bool(scope.get("includeUnlinkedTags", False))
+        tag_map: Dict[str, Dict[str, Any]] = {}
+
+        def upsert(*, path: str, name: str, folder: str = "", views: List[str] = None, equipment: List[str] = None, source: str = "unknown"):
+            path = path.strip()
+            if not path:
+                return
+            entry = tag_map.setdefault(path, {
+                "path": path, "name": name or _last_segment(path) or path,
+                "folder_name": folder, "views": [], "equipment": [],
+                "source": source, "bound_to_view": False,
             })
-            metrics["diagnostics"]["phase"] = "cycle_early_exit"
-            metrics["diagnostics"]["reason"] = "no_tags_found"
-            metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
-            return metrics
-
-        tag_paths = [t["path"] for t in tags]
-        tag_lookup = {t["path"]: t for t in tags}
-        linked_tag_count = sum(
-            1 for t in tags if (t.get("views") or t.get("equipment"))
-        )
-        unlinked_tag_count = max(0, len(tags) - linked_tag_count)
-        detected_subsystems = sorted(
-            {
-                (t.get("primary_subsystem") or _subsystem_ref("global", "all")).get("id", "global:all")
-                for t in tags
-            }
-        )
+            if source == "view_binding":
+                entry["bound_to_view"] = True
+                entry["source"] = source
+            if folder and not entry.get("folder_name"):
+                entry["folder_name"] = folder
+            if name and (not entry["name"] or entry["name"] == entry["path"]):
+                entry["name"] = name
+            for v in (views or []):
+                if v and v not in entry["views"]:
+                    entry["views"].append(v)
+            for e in (equipment or []):
+                if e and e not in entry["equipment"]:
+                    entry["equipment"].append(e)
+
+        with self.graph.session() as session:
+            for r in session.run(
+                """
+                MATCH (v:View)-[:HAS_COMPONENT]->(c:ViewComponent)-[r:BINDS_TO]->(n)
+                WHERE r.tag_path IS NOT NULL AND trim(r.tag_path) <> ''
+                  AND toLower(coalesce(r.binding_type, 'tag')) = 'tag'
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(n)
+                RETURN DISTINCT trim(r.tag_path) AS tag_path, coalesce(n.name,'') AS tag_name,
+                       collect(DISTINCT v.name) AS views, collect(DISTINCT eq.name) AS equipment
+                LIMIT $lim
+                """, lim=max_tags * 4,
+            ):
+                p = str(r["tag_path"] or "").strip()
+                if _looks_like_tag_path(p):
+                    upsert(path=p, name=str(r["tag_name"] or _last_segment(p)),
+                           folder=infer_tag_group(p) or "",
+                           views=[x for x in (r["views"] or []) if x],
+                           equipment=[x for x in (r["equipment"] or []) if x],
+                           source="view_binding")
+
+            for r in session.run(
+                """
+                MATCH (t:ScadaTag) WHERE t.opc_item_path IS NOT NULL AND trim(t.opc_item_path) <> ''
+                OPTIONAL MATCH (c:ViewComponent)-[:BINDS_TO]->(t)
+                OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(c)
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
+                RETURN DISTINCT trim(t.opc_item_path) AS tag_path, coalesce(t.name,t.opc_item_path) AS tag_name,
+                       coalesce(t.folder_name,'') AS folder_name,
+                       collect(DISTINCT v.name) AS views, collect(DISTINCT eq.name) AS equipment
+                LIMIT $lim
+                """, lim=max_tags * 6,
+            ):
+                p = str(r["tag_path"] or "").strip()
+                if _looks_like_tag_path(p):
+                    upsert(path=p, name=str(r["tag_name"] or _last_segment(p)),
+                           folder=str(r["folder_name"] or ""),
+                           views=[x for x in (r["views"] or []) if x],
+                           equipment=[x for x in (r["equipment"] or []) if x],
+                           source="scada_tag")
+
+        tags = list(tag_map.values())
+        if not include_unlinked:
+            linked = [t for t in tags if t.get("views") or t.get("equipment") or t.get("bound_to_view")]
+            if linked:
+                tags = linked
+
+        for tag in tags:
+            subs, primary = derive_subsystems_for_tag(tag, subsystem_mode=subsystem_mode, priority=subsystem_priority)
+            tag["subsystems"] = subs
+            tag["primary_subsystem"] = primary
+
+        return tags[:max_tags]
 
-        subsystem_tag_map: Dict[str, Dict[str, Any]] = {}
+    def _discover_subsystems(self) -> Dict[str, Dict[str, Any]]:
+        tags = self._fetch_tags()
+        subsystems: Dict[str, Dict[str, Any]] = {}
         for t in tags:
             sub = t.get("primary_subsystem") or _subsystem_ref("global", "all")
             sub_id = sub.get("id", "global:all")
-            bucket = subsystem_tag_map.setdefault(sub_id, {
+            bucket = subsystems.setdefault(sub_id, {
                 "type": sub.get("type", "global"),
                 "name": sub.get("name", "all"),
                 "tags": [],
             })
-            bucket["tags"].append({
-                "path": t["path"],
-                "name": t.get("name", t["path"]),
-                "views": t.get("views", []),
-                "equipment": t.get("equipment", []),
-                "allSubsystems": [s.get("id") for s in (t.get("subsystems") or [])],
-            })
-
-        live_values = self.api.read_tags(tag_paths)
-        tool_calls: List[Dict[str, Any]] = []
-        tool_calls.append({
-            "tool": "read_tags",
-            "request": {
-                "count": len(tag_paths),
-                "samplePaths": tag_paths[:8],
-            },
-            "result": {
-                "count": len(live_values),
-                "errorCount": sum(1 for tv in live_values if tv.error),
-                "qualityGoodCount": sum(1 for tv in live_values if is_quality_good(tv.quality)),
-                "timestampMissingCount": sum(1 for tv in live_values if not tv.timestamp),
-                "timestampInferredCount": sum(
-                    1
-                    for tv in live_values
-                    if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred"))
-                ),
-                "sample": [
-                    {
-                        "path": tv.path,
-                        "value": _preview_value(tv.value),
-                        "quality": tv.quality,
-                        "timestamp": tv.timestamp,
-                        "timestampInferred": bool(tv.config.get("timestamp_inferred"))
-                        if isinstance(tv.config, dict)
-                        else False,
-                        "configKeys": sorted(list(tv.config.keys()))[:8]
-                        if isinstance(tv.config, dict)
-                        else [],
-                        "error": tv.error,
-                    }
-                    for tv in live_values[:5]
-                ],
-            },
-        })
-        candidates: List[Dict[str, Any]] = []
-        now = datetime.now(timezone.utc)
-        live_error_count = 0
-        live_error_samples: List[str] = []
-        history_error_count = 0
-        history_error_samples: List[str] = []
-        valid_live_count = 0
-        missing_timestamp_count = 0
-        inferred_timestamp_count = 0
-        quality_filtered_count = 0
-        stale_filtered_count = 0
-        insufficient_history_count = 0
-        low_history_candidate_count = 0
-        candidate_subsystem_counts: Dict[str, int] = {}
-        live_error_linked = 0
-        live_error_unlinked = 0
-        history_error_linked = 0
-        history_error_unlinked = 0
-        quality_filtered_linked = 0
-        quality_filtered_unlinked = 0
-        stale_filtered_linked = 0
-        stale_filtered_unlinked = 0
-        evaluated_linked = 0
-        evaluated_unlinked = 0
-        candidate_linked = 0
-        candidate_unlinked = 0
-        near_shift_count = 0
-        near_shift_linked = 0
-        near_shift_unlinked = 0
-        stale_samples: List[Dict[str, Any]] = []
-        subsystem_shift_signals: Dict[str, Dict[str, Any]] = {}
-        processed_live_count = 0
-        total_live_count = len(live_values)
-        last_progress_emit = 0.0
-
-        def emit_cycle_progress(reason: str, current_tag: str = "", include_tag_map: bool = False) -> None:
-            nonlocal last_progress_emit
-            diag = make_default_diagnostics(
-                staleness_threshold_sec=stale_threshold_sec,
-                phase="cycle_in_progress",
-                reason=reason,
-            )
-            diag.update({
-                "processedLiveCount": processed_live_count,
-                "totalLiveCount": total_live_count,
-                "currentTag": current_tag,
-                "candidatesSoFar": len(candidates),
-                "liveErrorCount": live_error_count,
-                "qualityFilteredCount": quality_filtered_count,
-                "staleFilteredCount": stale_filtered_count,
-                "historyErrorCount": history_error_count,
-                "monitoredTags": len(tags),
-                "linkedTags": linked_tag_count,
-                "unlinkedTags": unlinked_tag_count,
-                "detectedSubsystemCount": len(detected_subsystems),
-                "detectedSubsystems": detected_subsystems[:10],
-            })
-            if include_tag_map:
-                diag["subsystemTagMap"] = subsystem_tag_map
-            emit("AGENT_STATUS", {
-                "runId": self.run_id,
-                "state": "running",
-                "cycleMs": int((time.time() - cycle_start) * 1000),
-                "candidates": len(candidates),
-                "triaged": 0,
-                "emitted": metrics.get("emitted", 0),
-                "diagnostics": diag,
-                "timestamp": utc_now_iso(),
-            })
-            last_progress_emit = time.time()
-
-        emit_cycle_progress("cycle_started", include_tag_map=True)
-
-        def _update_subsystem_signal(
-            subsystem_ref: Dict[str, str], deterministic: Dict[str, Any],
-            tag_path: str, live_value: Any = None,
-        ) -> None:
-            sub_id = subsystem_ref.get("id", "global:all")
-            abs_z = abs(float(deterministic.get("z_score", 0.0)))
-            z = float(deterministic.get("z_score", 0.0))
-            mad = float(deterministic.get("mad_score", 0.0))
-            bucket = subsystem_shift_signals.setdefault(
-                sub_id,
-                {
-                    "subsystemId": sub_id,
-                    "subsystemType": subsystem_ref.get("type", "global"),
-                    "subsystemName": subsystem_ref.get("name", "all"),
-                    "evaluated": 0,
-                    "candidate": 0,
-                    "nearShift": 0,
-                    "sumAbsZ": 0.0,
-                    "sumZ": 0.0,
-                    "maxAbsZ": 0.0,
-                    "sampleTag": tag_path,
-                    "_tagEntries": [],
-                },
-            )
-            bucket["evaluated"] += 1
-            bucket["sumAbsZ"] += abs_z
-            bucket["sumZ"] += z
-            if abs_z >= 1.5:
-                bucket["nearShift"] += 1
-            if abs_z > bucket["maxAbsZ"]:
-                bucket["maxAbsZ"] = abs_z
-                bucket["sampleTag"] = tag_path
-            tag_name = tag_path.rsplit("/", 1)[-1] if "/" in str(tag_path) else str(tag_path)
-            bucket["_tagEntries"].append({
-                "path": str(tag_path),
-                "name": tag_name,
-                "z": round(z, 3),
-                "absZ": round(abs_z, 3),
-                "mad": round(mad, 3),
-                "value": live_value,
-            })
-
-        # ---- Phase 1: Filter live values (no I/O) ----
-        TagEntry = Tuple[Any, Dict[str, Any], Dict[str, str], bool]  # (tv, tag_meta, subsystem, is_linked)
-        tags_for_history: List[TagEntry] = []
-
-        for idx, tv in enumerate(live_values):
-            processed_live_count += 1
-            tag_meta = (
-                tags[idx] if idx < len(tags)
-                else tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
-            )
-            subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
-            is_linked = bool(tag_meta.get("views") or tag_meta.get("equipment"))
-
-            if tv.error:
-                live_error_count += 1
-                if is_linked:
-                    live_error_linked += 1
-                else:
-                    live_error_unlinked += 1
-                if len(live_error_samples) < 5:
-                    live_error_samples.append(f"{tv.path}: {tv.error}")
-                continue
-            valid_live_count += 1
-            if not tv.timestamp:
-                missing_timestamp_count += 1
-            if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred")):
-                inferred_timestamp_count += 1
-            if not is_quality_good(tv.quality):
-                quality_filtered_count += 1
-                if is_linked:
-                    quality_filtered_linked += 1
-                else:
-                    quality_filtered_unlinked += 1
-                continue
-            parsed_ts = parse_timestamp(tv.timestamp)
-            age_sec = (now - parsed_ts).total_seconds() if parsed_ts is not None else None
-            if is_stale(tv.timestamp, stale_threshold_sec, now=now):
-                stale_filtered_count += 1
-                if is_linked:
-                    stale_filtered_linked += 1
-                else:
-                    stale_filtered_unlinked += 1
-                if len(stale_samples) < 8:
-                    stale_samples.append({
-                        "path": tv.path,
-                        "timestampRaw": tv.timestamp,
-                        "timestampParsedUtc": parsed_ts.isoformat() if parsed_ts else None,
-                        "ageSec": round(age_sec, 3) if age_sec is not None else None,
-                        "thresholdSec": stale_threshold_sec,
-                        "reason": "timestamp_parse_failed" if parsed_ts is None else "age_exceeds_threshold",
-                    })
-                continue
-
-            tags_for_history.append((tv, tag_meta, subsystem, is_linked))
-
-        emit_cycle_progress(
-            "filtering_complete",
-            current_tag=f"{len(tags_for_history)} tags passed filters",
+            bucket["tags"].append(t)
+        return subsystems
+
+    # -------------------------------------------------------------------
+    #  Agent management
+    # -------------------------------------------------------------------
+    def _spawn_agent(self, sub_id: str, info: Dict[str, Any], stagger_delay: float = 0.0) -> SubsystemAgent:
+        agent = SubsystemAgent(
+            subsystem_id=sub_id,
+            subsystem_type=info["type"],
+            subsystem_name=info["name"],
+            tag_metas=info["tags"],
+            graph=self.graph,
+            api=self.api,
+            config=self.config,
+            run_id=self.run_id,
+            stagger_delay=stagger_delay,
         )
+        agent.start()
+        self.agents[sub_id] = agent
+        return agent
+
+    def _stop_agent(self, sub_id: str) -> None:
+        agent = self.agents.pop(sub_id, None)
+        if agent:
+            agent.stop()
+
+    def _stop_all(self) -> None:
+        for agent in self.agents.values():
+            agent.stop()
+        for agent in list(self.agents.values()):
+            agent.join(timeout=5)
+        self.agents.clear()
+
+    # -------------------------------------------------------------------
+    #  Stdin command reader
+    # -------------------------------------------------------------------
+    def _stdin_reader(self) -> None:
+        while self._running:
+            try:
+                line = sys.stdin.readline()
+                if not line:
+                    break
+                line = line.strip()
+                if not line:
+                    continue
+                cmd = json.loads(line)
+                self._handle_command(cmd)
+            except (json.JSONDecodeError, Exception):
+                continue
 
-        # ---- Phase 2: Batched history fetch ----
-        history_fetch_start = time.time()
-        history_paths = [tv.path for tv, _, _, _ in tags_for_history]
-        history_results = self.fetch_history_batch(history_paths) if history_paths else {}
-        history_fetch_elapsed = time.time() - history_fetch_start
-        emit_cycle_progress(
-            "history_complete",
-            current_tag=f"{len(history_results)} in {round(history_fetch_elapsed, 1)}s",
-        )
+    def _handle_command(self, cmd: Dict[str, Any]) -> None:
+        action = cmd.get("cmd", "")
+        sub_id = cmd.get("subsystemId", "")
 
-        # ---- Phase 3: Score and build candidates using pre-fetched history ----
-        for tv, tag_meta, subsystem, is_linked in tags_for_history:
-            history, history_error = history_results.get(tv.path, ([], "No history result"))
-
-            if len(tool_calls) < 18:
-                tool_calls.append({
-                    "tool": "query_tag_history",
-                    "request": {
-                        "tagPath": tv.path,
-                        "historyWindowMinutes": int(self.config.get("historyWindowMinutes", 360)),
-                    },
-                    "result": {
-                        "historyPoints": len(history),
-                        "error": history_error,
-                    },
+        if action == "stop-all":
+            self._running = False
+        elif action == "stop-agent" and sub_id:
+            agent = self.agents.get(sub_id)
+            if agent:
+                agent.pause()
+                emit("AGENT_STATUS", {
+                    "runId": self.run_id, "subsystemId": sub_id,
+                    "state": "paused", "diagnostics": {"phase": "agent_paused", "reason": "user_request"},
+                    "timestamp": utc_now_iso(),
                 })
-            if history_error:
-                history_error_count += 1
-                if is_linked:
-                    history_error_linked += 1
-                else:
-                    history_error_unlinked += 1
-                if len(history_error_samples) < 5:
-                    history_error_samples.append(f"{tv.path}: {history_error}")
-                continue
-            if len(history) < min_history:
-                insufficient_history_count += 1
-                if len(history) >= 5:
-                    prev_val = self._prev_values.get(tv.path)
-                    deterministic = compute_deviation_scores(
-                        current_value=tv.value,
-                        history_values=history,
-                        prev_value=prev_val,
-                        thresholds=thresholds,
+        elif action == "start-agent" and sub_id:
+            agent = self.agents.get(sub_id)
+            if agent:
+                agent.resume()
+                emit("AGENT_STATUS", {
+                    "runId": self.run_id, "subsystemId": sub_id,
+                    "state": "running", "diagnostics": {"phase": "agent_resumed", "reason": "user_request"},
+                    "timestamp": utc_now_iso(),
+                })
+        elif action == "deep-analyze":
+            event_data = cmd.get("event", {})
+            threading.Thread(
+                target=self._deep_analyze_inline,
+                args=(event_data,),
+                daemon=True,
+                name="deep-analyze",
+            ).start()
+
+    # -------------------------------------------------------------------
+    #  Deep analyze (inline, runs in background thread)
+    # -------------------------------------------------------------------
+    def _deep_analyze_inline(self, event_data: Dict[str, Any]) -> None:
+        event_id = event_data.get("event_id", "?")
+        tag_path = event_data.get("source_tag") or event_data.get("tag_name", "")
+        sub_id = event_data.get("subsystem_id", "")
+        if not tag_path:
+            emit("AGENT_EVENT", {"runId": self.run_id, "deepAnalyze": True,
+                "event": {**event_data, "deep_analyze_error": "No source_tag"}})
+            return
+        agent = self.agents.get(sub_id) if sub_id else None
+        llm = None
+        if agent and agent.llm:
+            llm = agent.llm
+        else:
+            if bool(os.getenv("ANTHROPIC_API_KEY")):
+                try:
+                    from claude_client import ClaudeClient
+                    llm = ClaudeClient(
+                        enable_tools=False,
+                        ignition_api_url=self.config.get("ignitionApiUrl"),
+                        ignition_api_token=self.config.get("ignitionApiToken"),
                     )
-                    curr_num = safe_float(tv.value)
-                    if curr_num is not None:
-                        self._prev_values[tv.path] = curr_num
-
-                    _update_subsystem_signal(subsystem, deterministic, tv.path, live_value=tv.value)
-                    if is_linked:
-                        evaluated_linked += 1
-                    else:
-                        evaluated_unlinked += 1
-                    if abs(float(deterministic.get("z_score", 0.0))) >= 1.5:
-                        near_shift_count += 1
-                        if is_linked:
-                            near_shift_linked += 1
-                        else:
-                            near_shift_unlinked += 1
-
-                    if deterministic.get("candidate"):
-                        sub_bucket = subsystem_shift_signals.setdefault(
-                            subsystem.get("id", "global:all"),
-                            {
-                                "subsystemId": subsystem.get("id", "global:all"),
-                                "subsystemType": subsystem.get("type", "global"),
-                                "subsystemName": subsystem.get("name", "all"),
-                                "evaluated": 0,
-                                "candidate": 0,
-                                "nearShift": 0,
-                                "sumAbsZ": 0.0,
-                                "sumZ": 0.0,
-                                "maxAbsZ": 0.0,
-                                "sampleTag": tv.path,
-                                "_tagEntries": [],
-                            },
-                        )
-                        sub_bucket["candidate"] += 1
-                        if is_linked:
-                            candidate_linked += 1
-                        else:
-                            candidate_unlinked += 1
-                        deterministic["reasons"] = list(deterministic.get("reasons", [])) + ["low_history_override"]
-                        deterministic["history_quality"] = "low"
-                        context = self.get_context(tv.path)
-                        context["subsystem"] = subsystem
-                        context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
-                        candidates.append(
-                            {
-                                "context": context,
-                                "deterministic": deterministic,
-                                "live_sample": {
-                                    "path": tv.path,
-                                    "value": tv.value,
-                                    "quality": tv.quality,
-                                    "timestamp": tv.timestamp,
-                                    "data_type": tv.data_type,
-                                },
-                                "subsystem": subsystem,
-                            }
-                        )
-                        sub_id = subsystem.get("id", "global:all")
-                        candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
-                        low_history_candidate_count += 1
-                continue
+                except Exception:
+                    pass
+        if not llm:
+            emit("AGENT_EVENT", {"runId": self.run_id, "deepAnalyze": True,
+                "event": {**event_data, "deep_analyze_error": "No LLM available (check ANTHROPIC_API_KEY)"}})
+            return
 
-            prev_val = self._prev_values.get(tv.path)
-            deterministic = compute_deviation_scores(
-                current_value=tv.value,
-                history_values=history,
-                prev_value=prev_val,
-                thresholds=thresholds,
-            )
-            curr_num = safe_float(tv.value)
-            if curr_num is not None:
-                self._prev_values[tv.path] = curr_num
+        det = {
+            "z_score": event_data.get("z_score", 0),
+            "mad_score": event_data.get("mad_score", 0),
+            "delta_rate": event_data.get("delta_rate", 0),
+            "category": event_data.get("category", "deviation"),
+            "reasons": json.loads(event_data.get("deterministic_reasons_json", "[]")),
+        }
+        context = {"tag_path": tag_path, "tag_name": event_data.get("tag_name", tag_path),
+            "equipment": [], "views": [], "group": "", "symptoms": [], "causes": []}
+        live_sample = {"path": tag_path, "value": event_data.get("live_value"),
+            "quality": event_data.get("live_quality"), "timestamp": event_data.get("live_timestamp")}
 
-            _update_subsystem_signal(subsystem, deterministic, tv.path, live_value=tv.value)
-            if is_linked:
-                evaluated_linked += 1
-            else:
-                evaluated_unlinked += 1
-            if abs(float(deterministic.get("z_score", 0.0))) >= 1.5:
-                near_shift_count += 1
-                if is_linked:
-                    near_shift_linked += 1
-                else:
-                    near_shift_unlinked += 1
-
-            if deterministic.get("candidate"):
-                sub_bucket = subsystem_shift_signals.setdefault(
-                    subsystem.get("id", "global:all"),
-                    {
-                        "subsystemId": subsystem.get("id", "global:all"),
-                        "subsystemType": subsystem.get("type", "global"),
-                        "subsystemName": subsystem.get("name", "all"),
-                        "evaluated": 0,
-                        "candidate": 0,
-                        "nearShift": 0,
-                        "sumAbsZ": 0.0,
-                        "sumZ": 0.0,
-                        "maxAbsZ": 0.0,
-                        "sampleTag": tv.path,
-                        "_tagEntries": [],
-                    },
-                )
-                sub_bucket["candidate"] += 1
-                if is_linked:
-                    candidate_linked += 1
-                else:
-                    candidate_unlinked += 1
-                context = self.get_context(tv.path)
-                context["subsystem"] = subsystem
-                context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
-                candidates.append(
-                    {
-                        "context": context,
-                        "deterministic": deterministic,
-                        "live_sample": {
-                            "path": tv.path,
-                            "value": tv.value,
-                            "quality": tv.quality,
-                            "timestamp": tv.timestamp,
-                            "data_type": tv.data_type,
-                        },
-                        "subsystem": subsystem,
-                    }
-                )
-                sub_id = subsystem.get("id", "global:all")
-                candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
-
-        emit_cycle_progress("scoring_complete")
-
-        if live_values and live_error_count == len(live_values):
-            emitted = self.emit_provider_failure_event(
-                "live_tag_provider_failed",
-                f"Live tag provider failed for all reads ({live_error_count}/{len(live_values)}).",
-                severity="high",
-                category="quality-issue",
-                details={"samples": live_error_samples},
-            )
-            if emitted:
-                metrics["emitted"] += 1
-        elif live_error_count > 0:
-            emitted = self.emit_provider_failure_event(
-                "live_tag_provider_partial_failure",
-                f"Live tag provider partially failed ({live_error_count}/{len(live_values)} reads).",
-                severity="medium",
-                category="quality-issue",
-                details={"samples": live_error_samples},
-            )
-            if emitted:
-                metrics["emitted"] += 1
-
-        if valid_live_count > 0 and history_error_count >= max(1, int(valid_live_count * 0.8)):
-            emitted = self.emit_provider_failure_event(
-                "history_provider_failed",
-                f"History provider failed for most queries ({history_error_count}/{valid_live_count}).",
-                severity="high",
-                category="quality-issue",
-                details={"samples": history_error_samples},
-            )
-            if emitted:
-                metrics["emitted"] += 1
-        elif history_error_count > 0:
-            emitted = self.emit_provider_failure_event(
-                "history_provider_partial_failure",
-                f"History provider partially failed ({history_error_count}/{valid_live_count}).",
-                severity="medium",
-                category="quality-issue",
-                details={"samples": history_error_samples},
-            )
-            if emitted:
-                metrics["emitted"] += 1
-
-        if valid_live_count > 0 and stale_filtered_count >= max(1, int(valid_live_count * 0.8)):
-            emitted = self.emit_provider_failure_event(
-                "live_timestamp_stale",
-                f"Most live samples were stale ({stale_filtered_count}/{valid_live_count}).",
-                severity="medium",
-                category="quality-issue",
-                details={"staleCount": stale_filtered_count, "validLiveCount": valid_live_count},
-            )
-            if emitted:
-                metrics["emitted"] += 1
-
-        if valid_live_count > 0 and quality_filtered_count >= max(1, int(valid_live_count * 0.8)):
-            emitted = self.emit_provider_failure_event(
-                "live_quality_bad",
-                f"Most live samples had non-good quality ({quality_filtered_count}/{valid_live_count}).",
-                severity="medium",
-                category="quality-issue",
-                details={"qualityFilteredCount": quality_filtered_count, "validLiveCount": valid_live_count},
-            )
-            if emitted:
-                metrics["emitted"] += 1
-
-        metrics["candidates"] = len(candidates)
-        shortlisted: List[Dict[str, Any]] = []
-        selected_per_subsystem: Dict[str, int] = {}
-        for candidate in candidates:
-            subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
-            sub_id = subsystem.get("id", "global:all")
-            if selected_per_subsystem.get(sub_id, 0) >= max_candidates_per_subsystem:
-                continue
-            shortlisted.append(candidate)
-            selected_per_subsystem[sub_id] = selected_per_subsystem.get(sub_id, 0) + 1
-            if len(shortlisted) >= max_candidates_total:
-                break
-
-        llm_total = 0
-        llm_per_subsystem: Dict[str, int] = {}
-        dedup_suppressed_count = 0
-
-        if shortlisted:
-            emit_cycle_progress(
-                "triage_started",
-                current_tag=f"{len(shortlisted)} candidates to process",
+        try:
+            result = llm.query_json(
+                system_prompt=(
+                    "You are an industrial anomaly triage assistant. "
+                    "Return ONLY valid JSON with keys: summary, category, severity, confidence, "
+                    "probable_causes, verification_checks, safety_notes, rationale, related_entities."
+                ),
+                user_prompt=json.dumps({"context": context, "deterministic": det, "live_sample": live_sample}, default=str),
+                max_tokens=900,
+                use_tools=False,
             )
+            data = result.get("data", {}) if isinstance(result, dict) else {}
+            updated = dict(event_data)
+            if isinstance(data, dict):
+                updated["summary"] = data.get("summary", updated.get("summary", ""))
+                updated["explanation"] = data.get("rationale", updated.get("explanation", ""))
+                updated["probable_causes_json"] = json.dumps(data.get("probable_causes", []))
+                updated["recommended_checks_json"] = json.dumps(data.get("verification_checks", []))
+                updated["safety_notes_json"] = json.dumps(data.get("safety_notes", []))
+                updated["severity"] = data.get("severity", updated.get("severity", "medium"))
+                updated["confidence"] = data.get("confidence", updated.get("confidence", 0.5))
+                updated["deep_analyzed"] = True
+            emit("AGENT_EVENT", {"runId": self.run_id, "deepAnalyze": True, "event": updated})
+        except Exception as exc:
+            emit("AGENT_EVENT", {"runId": self.run_id, "deepAnalyze": True,
+                "event": {**event_data, "deep_analyze_error": str(exc)}})
 
-        for ci, candidate in enumerate(shortlisted):
-            subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
-            sub_id = subsystem.get("id", "global:all")
-            tag_name = candidate["context"].get("tag_name", candidate["context"].get("tag_path", "?"))
-            use_llm = (
-                llm_total < max_triage_total
-                and llm_per_subsystem.get(sub_id, 0) < max_triage_per_subsystem
-            )
-            triage = (
-                self.run_llm_triage(
-                    candidate["context"],
-                    candidate["deterministic"],
-                    candidate["live_sample"],
-                )
-                if use_llm
-                else {
-                    "summary": (
-                        f"Deviation on {candidate['context'].get('tag_name', candidate['context']['tag_path'])} "
-                        f"in subsystem {subsystem.get('name', 'all')}"
-                    ),
-                    "category": candidate["deterministic"].get("category", "deviation"),
-                    "severity": "medium",
-                    "confidence": 0.5,
-                    "verification_checks": [],
-                    "probable_causes": [],
-                    "safety_notes": [],
-                    "rationale": "Deterministic-only triage (LLM triage disabled or cap reached).",
-                    "related_entities": [],
-                }
-            )
-            if use_llm:
-                llm_total += 1
-                llm_per_subsystem[sub_id] = llm_per_subsystem.get(sub_id, 0) + 1
-            metrics["triaged"] += 1
-            persisted = self.persist_event(
-                candidate["context"],
-                candidate["deterministic"],
-                candidate["live_sample"],
-                triage,
-                subsystem=subsystem,
-            )
-            if persisted:
-                metrics["emitted"] += 1
-                self._emit_persisted_event(persisted)
-            else:
-                dedup_suppressed_count += 1
+    # -------------------------------------------------------------------
+    #  Main loop
+    # -------------------------------------------------------------------
+    def run(self) -> int:
+        self._init_schema()
+        self._upsert_run("running")
 
-            if (ci + 1) % 5 == 0 or ci == len(shortlisted) - 1:
-                emit_cycle_progress(
-                    "triaging",
-                    current_tag=f"{ci + 1}/{len(shortlisted)} ({tag_name})",
-                )
+        emit("AGENT_STATUS", {
+            "runId": self.run_id, "state": "running",
+            "diagnostics": {"phase": "startup", "reason": "coordinator_started"},
+            "timestamp": utc_now_iso(),
+        })
+
+        subsystems = self._discover_subsystems()
+        tag_map: Dict[str, Any] = {}
+        stagger_sec = 1.5  # seconds between each agent's first cycle
+        for idx, (sub_id, info) in enumerate(subsystems.items()):
+            tag_map[sub_id] = {
+                "type": info["type"], "name": info["name"],
+                "tags": [{"path": t["path"], "name": t.get("name", t["path"])} for t in info["tags"]],
+            }
+            self._spawn_agent(sub_id, info, stagger_delay=idx * stagger_sec)
 
-        top_candidates_by_subsystem = dict(
-            sorted(candidate_subsystem_counts.items(), key=lambda item: item[1], reverse=True)[:10]
-        )
-        top_shift_signals = sorted(
-            subsystem_shift_signals.values(),
-            key=lambda item: (
-                int(item.get("candidate", 0)),
-                float(item.get("maxAbsZ", 0.0)),
-                int(item.get("nearShift", 0)),
-                int(item.get("evaluated", 0)),
-            ),
-            reverse=True,
-        )
-        sparkline_size = 20
-        for item in top_shift_signals:
-            evaluated = max(1, int(item.get("evaluated", 0)))
-            item["avgAbsZ"] = round(float(item.get("sumAbsZ", 0.0)) / evaluated, 3)
-            item["avgZ"] = round(float(item.get("sumZ", 0.0)) / evaluated, 3)
-            item["shiftRatio"] = round(float(item.get("nearShift", 0)) / evaluated, 3)
-            item["candidateRatio"] = round(float(item.get("candidate", 0)) / evaluated, 3)
-            item.pop("sumAbsZ", None)
-            item.pop("sumZ", None)
-            raw_tags = item.pop("_tagEntries", [])
-            sorted_tags = sorted(raw_tags, key=lambda t: t.get("absZ", 0.0), reverse=True)
-            tag_signals = []
-            for t in sorted_tags:
-                entry = {k: v for k, v in t.items() if k != "absZ"}
-                cached_hist = self._history_cache.get(t.get("path", ""))
-                if cached_hist and cached_hist.get("values"):
-                    vals = cached_hist["values"]
-                    entry["avg"] = round(sum(vals) / len(vals), 2)
-                    if len(vals) <= sparkline_size:
-                        entry["sparkline"] = [round(v, 2) for v in vals]
-                    else:
-                        step = len(vals) / sparkline_size
-                        entry["sparkline"] = [round(vals[int(i * step)], 2) for i in range(sparkline_size)]
-                tag_signals.append(entry)
-            item["tagSignals"] = tag_signals
-
-        metrics["diagnostics"] = {
-            **make_default_diagnostics(
-                staleness_threshold_sec=int(thresholds.get("stalenessSec", 120)),
-                phase="cycle_complete",
-                reason="ok",
-            ),
-            "monitoredTags": len(tag_paths),
-            "linkedTags": linked_tag_count,
-            "unlinkedTags": unlinked_tag_count,
-            "validLiveCount": valid_live_count,
-            "missingTimestampCount": missing_timestamp_count,
-            "inferredTimestampCount": inferred_timestamp_count,
-            "liveErrorCount": live_error_count,
-            "liveErrorLinked": live_error_linked,
-            "liveErrorUnlinked": live_error_unlinked,
-            "qualityFilteredCount": quality_filtered_count,
-            "qualityFilteredLinked": quality_filtered_linked,
-            "qualityFilteredUnlinked": quality_filtered_unlinked,
-            "staleFilteredCount": stale_filtered_count,
-            "staleFilteredLinked": stale_filtered_linked,
-            "staleFilteredUnlinked": stale_filtered_unlinked,
-            "historyErrorCount": history_error_count,
-            "historyErrorLinked": history_error_linked,
-            "historyErrorUnlinked": history_error_unlinked,
-            "insufficientHistoryCount": insufficient_history_count,
-            "lowHistoryCandidateCount": low_history_candidate_count,
-            "evaluatedLinked": evaluated_linked,
-            "evaluatedUnlinked": evaluated_unlinked,
-            "candidateLinked": candidate_linked,
-            "candidateUnlinked": candidate_unlinked,
-            "nearShiftCount": near_shift_count,
-            "nearShiftLinked": near_shift_linked,
-            "nearShiftUnlinked": near_shift_unlinked,
-            "stalenessThresholdSec": int(thresholds.get("stalenessSec", 120)),
-            "staleSamples": stale_samples,
-            "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
-            "detectedSubsystemCount": len(detected_subsystems),
-            "detectedSubsystems": detected_subsystems[:10],
-            "subsystemTagMap": subsystem_tag_map,
-            "candidateSubsystemCount": len(candidate_subsystem_counts),
-            "candidateBySubsystem": top_candidates_by_subsystem,
-            "subsystemShiftSignals": top_shift_signals,
-            "maxCandidatesPerSubsystem": max_candidates_per_subsystem,
-            "maxLlmTriagesPerSubsystem": max_triage_per_subsystem,
-            "llmTriagedCount": llm_total,
-            "dedupSuppressedCount": dedup_suppressed_count,
-            "toolCalls": tool_calls,
-        }
-        metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
-        return metrics
-
-    def cleanup_retention(self) -> int:
-        retention_days = int(self.config.get("retentionDays", 14))
-        return self.graph.cleanup_anomaly_events(retention_days=retention_days)
-
-    def run_forever(self) -> int:
-        self.init_schema()
-        self.upsert_run("running")
-        startup_diag = make_default_diagnostics(
-            staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
-            phase="startup",
-            reason="worker_started",
-        )
         emit("AGENT_STATUS", {
-            "runId": self.run_id,
-            "state": "running",
-            "cycleMs": 0,
-            "candidates": 0,
-            "triaged": 0,
-            "emitted": 0,
-            "diagnostics": startup_diag,
+            "runId": self.run_id, "state": "running",
+            "diagnostics": {
+                "phase": "agents_started",
+                "reason": f"{len(self.agents)} subsystem agents spawned",
+                "subsystemTagMap": tag_map,
+                "agentCount": len(self.agents),
+                "agentIds": list(self.agents.keys()),
+            },
             "timestamp": utc_now_iso(),
         })
 
-        poll_ms = int(self.config.get("pollIntervalMs", 1000))
+        stdin_thread = threading.Thread(target=self._stdin_reader, daemon=True, name="stdin-reader")
+        stdin_thread.start()
+
+        rediscovery_interval = float(self.config.get("rediscoveryIntervalSec", 60))
         cleanup_every = max(1, int(self.config.get("cleanupEveryCycles", 40)))
-        exit_code = 0
-        reason = "stopped"
+        last_rediscovery = time.time()
+        watchdog_count = 0
 
         while self._running:
-            self._cycle_count += 1
-            cycle_started = time.time()
-            try:
-                metrics = self.run_cycle()
-                self.heartbeat(metrics)
-                emit("AGENT_STATUS", {
-                    "runId": self.run_id,
-                    "state": "running",
-                    "cycleMs": metrics["cycleMs"],
-                    "candidates": metrics["candidates"],
-                    "triaged": metrics["triaged"],
-                    "emitted": metrics["emitted"],
-                    "diagnostics": metrics.get("diagnostics", {}),
-                    "timestamp": utc_now_iso(),
-                })
-                if self._cycle_count % cleanup_every == 0:
-                    deleted = self.cleanup_retention()
-                    if deleted > 0:
-                        cleanup_diag = make_default_diagnostics(
-                            staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
-                            phase="retention_cleanup",
-                            reason="cleanup_complete",
-                        )
-                        cleanup_diag["emittedCleanupCount"] = deleted
+            time.sleep(2)
+            watchdog_count += 1
+
+            if time.time() - last_rediscovery >= rediscovery_interval:
+                try:
+                    new_subs = self._discover_subsystems()
+                    new_ids = set(new_subs.keys())
+                    old_ids = set(self.agents.keys())
+
+                    for sub_id in new_ids - old_ids:
+                        info = new_subs[sub_id]
+                        self._spawn_agent(sub_id, info)
                         emit("AGENT_STATUS", {
-                            "runId": self.run_id,
-                            "state": "retention_cleanup",
-                            "cycleMs": 0,
-                            "candidates": 0,
-                            "triaged": 0,
-                            "emitted": deleted,
-                            "diagnostics": cleanup_diag,
+                            "runId": self.run_id, "subsystemId": sub_id, "state": "running",
+                            "diagnostics": {"phase": "agent_discovered", "reason": "new_subsystem"},
                             "timestamp": utc_now_iso(),
                         })
-            except Exception as exc:
-                reason = "failed"
-                exit_code = 1
-                emit("AGENT_ERROR", {
-                    "runId": self.run_id,
-                    "code": "cycle_error",
-                    "message": str(exc),
-                    "recoverable": True,
-                    "timestamp": utc_now_iso(),
-                })
-                error_diag = make_default_diagnostics(
-                    staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
-                    phase="cycle_error",
-                    reason="exception",
-                )
-                error_diag["errorMessage"] = str(exc)
-                emit("AGENT_STATUS", {
-                    "runId": self.run_id,
-                    "state": "running",
-                    "cycleMs": int((time.time() - cycle_started) * 1000),
-                    "candidates": 0,
-                    "triaged": 0,
-                    "emitted": 0,
-                    "diagnostics": error_diag,
-                    "timestamp": utc_now_iso(),
-                })
 
-            elapsed_ms = int((time.time() - cycle_started) * 1000)
-            remaining = max(0, poll_ms - elapsed_ms) / 1000.0
-            if remaining > 0:
-                time.sleep(remaining)
+                    for sub_id in old_ids & new_ids:
+                        agent = self.agents.get(sub_id)
+                        if agent:
+                            agent.update_tags(new_subs[sub_id]["tags"])
+
+                    tag_map = {}
+                    for sub_id, info in new_subs.items():
+                        tag_map[sub_id] = {
+                            "type": info["type"], "name": info["name"],
+                            "tags": [{"path": t["path"], "name": t.get("name", t["path"])} for t in info["tags"]],
+                        }
+                    emit("AGENT_STATUS", {
+                        "runId": self.run_id, "state": "running",
+                        "diagnostics": {
+                            "phase": "rediscovery_complete",
+                            "reason": f"{len(new_subs)} subsystems",
+                            "subsystemTagMap": tag_map,
+                            "agentCount": len(self.agents),
+                        },
+                        "timestamp": utc_now_iso(),
+                    })
+                except Exception as exc:
+                    emit("AGENT_ERROR", {
+                        "runId": self.run_id, "code": "rediscovery_error",
+                        "message": str(exc), "recoverable": True, "timestamp": utc_now_iso(),
+                    })
+                last_rediscovery = time.time()
 
-        self.upsert_run("stopped" if reason != "failed" else "failed", reason=reason)
+            if watchdog_count % cleanup_every == 0:
+                try:
+                    deleted = self.graph.cleanup_anomaly_events(int(self.config.get("retentionDays", 14)))
+                    if deleted > 0:
+                        emit("AGENT_STATUS", {
+                            "runId": self.run_id, "state": "running",
+                            "diagnostics": {"phase": "retention_cleanup", "reason": f"deleted {deleted} old events"},
+                            "timestamp": utc_now_iso(),
+                        })
+                except Exception:
+                    pass
+
+        self._stop_all()
+        self._upsert_run("stopped", reason="stopped")
         emit("AGENT_COMPLETE", {
-            "runId": self.run_id,
-            "success": exit_code == 0,
-            "reason": reason,
-            "stoppedAt": utc_now_iso(),
+            "runId": self.run_id, "success": True, "reason": "stopped", "stoppedAt": utc_now_iso(),
         })
-        return exit_code
+        return 0
 
-    # -----------------------------
-    # Single-operation helpers
-    # -----------------------------
-    def list_events(self, limit: int, state: Optional[str], severity: Optional[str], run_id: Optional[str]) -> Dict[str, Any]:
-        events = self.graph.list_anomaly_events(limit=limit, state=state, severity=severity, run_id=run_id)
-        return {"success": True, "events": events}
+    # -------------------------------------------------------------------
+    #  Single-operation helpers (for CLI)
+    # -------------------------------------------------------------------
+    def list_events(self, limit: int, state: Optional[str] = None, severity: Optional[str] = None, run_id: Optional[str] = None) -> Dict:
+        return {"success": True, "events": self.graph.list_anomaly_events(limit=limit, state=state, severity=severity, run_id=run_id)}
 
-    def get_event(self, event_id: str) -> Dict[str, Any]:
+    def get_event(self, event_id: str) -> Dict:
         event = self.graph.get_anomaly_event(event_id)
-        if not event:
-            return {"success": False, "error": f"Event not found: {event_id}"}
-        return {"success": True, "event": event}
+        return {"success": True, "event": event} if event else {"success": False, "error": f"Not found: {event_id}"}
 
-    def ack_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
+    def ack_event(self, event_id: str, note: Optional[str] = None) -> Dict:
         with self.graph.session() as session:
-            result = session.run(
-                """
-                MATCH (e:AnomalyEvent {event_id: $event_id})
-                SET e.state = 'acknowledged',
-                    e.acknowledged_at = datetime(),
-                    e.ack_note = $note,
-                    e.updated_at = datetime()
-                RETURN count(e) AS cnt
-                """,
-                event_id=event_id,
-                note=note or "",
-            )
-            record = result.single()
-            if not record or record["cnt"] == 0:
-                return {"success": False, "error": f"Event not found: {event_id}"}
+            row = session.run(
+                "MATCH (e:AnomalyEvent {event_id: $eid}) SET e.state='acknowledged', e.acknowledged_at=datetime(), e.ack_note=$note, e.updated_at=datetime() RETURN count(e) AS cnt",
+                eid=event_id, note=note or "",
+            ).single()
+            if not row or row["cnt"] == 0:
+                return {"success": False, "error": f"Not found: {event_id}"}
         return {"success": True, "eventId": event_id}
 
-    def clear_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
+    def clear_event(self, event_id: str, note: Optional[str] = None) -> Dict:
         with self.graph.session() as session:
-            result = session.run(
-                """
-                MATCH (e:AnomalyEvent {event_id: $event_id})
-                SET e.state = 'cleared',
-                    e.cleared_at = datetime(),
-                    e.clear_note = $note,
-                    e.updated_at = datetime()
-                RETURN count(e) AS cnt
-                """,
-                event_id=event_id,
-                note=note or "",
-            )
-            record = result.single()
-            if not record or record["cnt"] == 0:
-                return {"success": False, "error": f"Event not found: {event_id}"}
+            row = session.run(
+                "MATCH (e:AnomalyEvent {event_id: $eid}) SET e.state='cleared', e.cleared_at=datetime(), e.clear_note=$note, e.updated_at=datetime() RETURN count(e) AS cnt",
+                eid=event_id, note=note or "",
+            ).single()
+            if not row or row["cnt"] == 0:
+                return {"success": False, "error": f"Not found: {event_id}"}
         return {"success": True, "eventId": event_id}
 
-    def deep_analyze(self, event_id: str) -> Dict[str, Any]:
-        """Run LLM triage on an existing event and update it in-place."""
+    def deep_analyze(self, event_id: str) -> Dict:
         event = self.graph.get_anomaly_event(event_id)
         if not event:
-            return {"success": False, "error": f"Event not found: {event_id}"}
-
+            return {"success": False, "error": f"Not found: {event_id}"}
         tag_path = event.get("source_tag") or event.get("tag_name", "")
         if not tag_path:
             return {"success": False, "error": "Event has no source_tag"}
 
-        context = self.get_context(tag_path)
-        context["subsystem"] = {
-            "id": event.get("subsystem_id", "global:all"),
-            "type": event.get("subsystem_type", "global"),
-            "name": event.get("subsystem_name", "all"),
-        }
+        temp_agent = SubsystemAgent(
+            subsystem_id=event.get("subsystem_id", "global:all"),
+            subsystem_type=event.get("subsystem_type", "global"),
+            subsystem_name=event.get("subsystem_name", "all"),
+            tag_metas=[], graph=self.graph, api=self.api,
+            config=self.config, run_id=self.run_id,
+        )
+        if not temp_agent.llm:
+            return {"success": False, "error": "LLM client not configured"}
 
-        deterministic = {
+        context = temp_agent._get_context(tag_path)
+        context["subsystem"] = _subsystem_ref(event.get("subsystem_type", "global"), event.get("subsystem_name", "all"))
+        det = {
             "candidate": True,
             "z_score": float(event.get("z_score", 0)),
             "mad_score": float(event.get("mad_score", 0)),
@@ -1977,134 +1320,81 @@ def deep_analyze(self, event_id: str) -> Dict[str, Any]:
             "reasons": json.loads(event.get("deterministic_reasons_json", "[]")),
             "category": event.get("category", "deviation"),
         }
+        live = {"value": event.get("live_value"), "quality": event.get("live_quality"), "timestamp": event.get("live_timestamp")}
+        triage = temp_agent._run_llm_triage(context, det, live)
+        severity = SubsystemAgent._severity_from_scores(det, triage)
 
-        live_sample = {
-            "value": event.get("live_value"),
-            "quality": event.get("live_quality"),
-            "timestamp": event.get("live_timestamp"),
-        }
-
-        if not self.llm:
-            return {"success": False, "error": "LLM client not configured"}
-
-        triage = self.run_llm_triage(context, deterministic, live_sample)
-
-        severity = self._severity_from_scores(deterministic, triage)
         with self.graph.session() as session:
             session.run(
                 """
-                MATCH (e:AnomalyEvent {event_id: $event_id})
-                SET e.summary = $summary,
-                    e.explanation = $explanation,
-                    e.severity = $severity,
-                    e.confidence = $confidence,
-                    e.recommended_checks_json = $checks,
-                    e.probable_causes_json = $causes,
-                    e.safety_notes_json = $safety,
-                    e.updated_at = $updated_at,
-                    e.llm_triaged = true
-                RETURN e
+                MATCH (e:AnomalyEvent {event_id: $eid})
+                SET e.summary=$summary, e.explanation=$expl, e.severity=$sev,
+                    e.confidence=$conf, e.recommended_checks_json=$checks,
+                    e.probable_causes_json=$causes, e.safety_notes_json=$safety,
+                    e.updated_at=$ts, e.llm_triaged=true
                 """,
-                event_id=event_id,
-                summary=triage.get("summary", ""),
-                explanation=triage.get("rationale", ""),
-                severity=severity,
-                confidence=float(max(0.0, min(1.0, triage.get("confidence", 0.5)))),
+                eid=event_id, summary=triage.get("summary", ""),
+                expl=triage.get("rationale", ""), sev=severity,
+                conf=float(max(0.0, min(1.0, triage.get("confidence", 0.5)))),
                 checks=json.dumps(triage.get("verification_checks", []), default=str),
                 causes=json.dumps(triage.get("probable_causes", []), default=str),
                 safety=json.dumps(triage.get("safety_notes", []), default=str),
-                updated_at=utc_now_iso(),
+                ts=utc_now_iso(),
             )
+        return {"success": True, "event": self.graph.get_anomaly_event(event_id)}
 
-        updated_event = self.graph.get_anomaly_event(event_id)
-        return {"success": True, "event": updated_event}
-
-    def get_status(self, run_id: str) -> Dict[str, Any]:
+    def get_status(self, run_id: str) -> Dict:
         with self.graph.session() as session:
-            result = session.run(
-                """
-                MATCH (r:AgentRun {run_id: $run_id})
-                RETURN r
-                LIMIT 1
-                """,
-                run_id=run_id,
-            )
-            row = result.single()
+            row = session.run("MATCH (r:AgentRun {run_id: $rid}) RETURN r LIMIT 1", rid=run_id).single()
             if not row:
                 return {"success": False, "error": f"Run not found: {run_id}"}
             props = dict(row["r"])
             return {
-                "success": True,
-                "status": props.get("status"),
+                "success": True, "status": props.get("status"),
                 "metrics": {
                     "cycleCount": props.get("cycle_count", 0),
                     "lastCycleMs": props.get("last_cycle_ms", 0),
-                    "lastCandidates": props.get("last_candidates", 0),
-                    "lastTriaged": props.get("last_triaged", 0),
-                    "lastEmitted": props.get("last_emitted", 0),
                 },
-                "lastHeartbeatAt": props.get("last_heartbeat_at"),
-                "run": props,
+                "lastHeartbeatAt": props.get("last_heartbeat_at"), "run": props,
             }
 
 
-def _load_fixture_cases(path: Path) -> List[Dict[str, Any]]:
-    data = json.loads(path.read_text(encoding="utf-8"))
-    if isinstance(data, dict):
-        return data.get("cases", [])
-    if isinstance(data, list):
-        return data
-    return []
-
+# ═══════════════════════════════════════════════════════════════════════════
+#  Fixture replay (standalone, no agent needed)
+# ═══════════════════════════════════════════════════════════════════════════
 
 def replay_fixtures(config_json: Optional[str], fixture_path: str) -> Dict[str, Any]:
     config = merge_defaults(json.loads(config_json) if config_json else {})
-    path = Path(fixture_path)
-    cases = _load_fixture_cases(path)
+    cases = json.loads(Path(fixture_path).read_text(encoding="utf-8"))
+    if isinstance(cases, dict):
+        cases = cases.get("cases", [])
     thresholds = config.get("thresholds", {})
     passed = 0
-    failures: List[Dict[str, Any]] = []
-
+    failures: List[Dict] = []
     for case in cases:
-        result = compute_deviation_scores(
-            current_value=case.get("current_value"),
-            history_values=case.get("history_values", []),
-            prev_value=case.get("prev_value"),
-            thresholds=thresholds,
-        )
+        result = compute_deviation_scores(case.get("current_value"), case.get("history_values", []),
+                                          prev_value=case.get("prev_value"), thresholds=thresholds)
         expected = bool(case.get("expected_candidate", False))
         if result.get("candidate") == expected:
             passed += 1
         else:
-            failures.append(
-                {
-                    "id": case.get("id"),
-                    "expected_candidate": expected,
-                    "actual_candidate": result.get("candidate"),
-                    "category": result.get("category"),
-                    "reasons": result.get("reasons", []),
-                }
-            )
+            failures.append({"id": case.get("id"), "expected": expected, "actual": result.get("candidate"), "reasons": result.get("reasons", [])})
+    return {"success": len(failures) == 0, "total": len(cases), "passed": passed, "failed": len(failures), "failures": failures}
 
-    return {
-        "success": len(failures) == 0,
-        "total": len(cases),
-        "passed": passed,
-        "failed": len(failures),
-        "failures": failures,
-    }
 
+# ═══════════════════════════════════════════════════════════════════════════
+#  CLI entry point
+# ═══════════════════════════════════════════════════════════════════════════
 
 def main() -> int:
-    parser = argparse.ArgumentParser(description="Anomaly monitor worker")
+    parser = argparse.ArgumentParser(description="Per-subsystem anomaly monitor")
     sub = parser.add_subparsers(dest="command", required=True)
 
-    p_run = sub.add_parser("run", help="Run continuous anomaly monitoring")
-    p_run.add_argument("--run-id", help="Optional run id")
-    p_run.add_argument("--config-json", default="{}", help="JSON config string")
+    p_run = sub.add_parser("run", help="Run coordinator with per-subsystem agents")
+    p_run.add_argument("--run-id")
+    p_run.add_argument("--config-json", default="{}")
 
-    p_status = sub.add_parser("status", help="Get status for one run")
-    p_status.add_argument("--run-id", required=True)
+    sub.add_parser("status", help="Get run status").add_argument("--run-id", required=True)
 
     p_list = sub.add_parser("list-events", help="List anomaly events")
     p_list.add_argument("--limit", type=int, default=100)
@@ -2112,36 +1402,33 @@ def main() -> int:
     p_list.add_argument("--severity")
     p_list.add_argument("--run-id")
 
-    p_get = sub.add_parser("get-event", help="Get one anomaly event")
-    p_get.add_argument("--event-id", required=True)
+    sub.add_parser("get-event", help="Get one event").add_argument("--event-id", required=True)
 
-    p_ack = sub.add_parser("ack-event", help="Acknowledge one anomaly event")
+    p_ack = sub.add_parser("ack-event", help="Acknowledge event")
     p_ack.add_argument("--event-id", required=True)
     p_ack.add_argument("--note")
 
-    p_clear = sub.add_parser("clear-event", help="Clear one acknowledged anomaly event")
+    p_clear = sub.add_parser("clear-event", help="Clear event")
     p_clear.add_argument("--event-id", required=True)
     p_clear.add_argument("--note")
 
-    p_deep = sub.add_parser("deep-analyze", help="Run LLM triage on an existing event")
+    p_deep = sub.add_parser("deep-analyze", help="LLM triage on existing event")
     p_deep.add_argument("--event-id", required=True)
 
-    p_cleanup = sub.add_parser("cleanup", help="Delete old anomaly events")
-    p_cleanup.add_argument("--retention-days", type=int, default=14)
+    sub.add_parser("cleanup", help="Delete old events").add_argument("--retention-days", type=int, default=14)
 
-    p_replay = sub.add_parser("replay-fixtures", help="Validate deterministic scoring against fixtures")
+    p_replay = sub.add_parser("replay-fixtures", help="Validate scoring")
     p_replay.add_argument("--fixture-file", required=True)
     p_replay.add_argument("--config-json", default="{}")
 
     args = parser.parse_args()
 
     if args.command == "replay-fixtures":
-        result = replay_fixtures(args.config_json, args.fixture_file)
-        print(json.dumps(result))
-        return 0 if result["success"] else 1
+        print(json.dumps(replay_fixtures(args.config_json, args.fixture_file)))
+        return 0
 
     try:
-        monitor = AnomalyMonitor(
+        coordinator = AgentCoordinator(
             config=json.loads(getattr(args, "config_json", "{}") or "{}"),
             run_id=getattr(args, "run_id", None),
         )
@@ -2150,46 +1437,28 @@ def main() -> int:
         return 1
 
     if args.command == "run":
-        def _signal_handler(_signum, _frame):
-            monitor._running = False
-
-        signal.signal(signal.SIGTERM, _signal_handler)
+        signal.signal(signal.SIGTERM, lambda *_: setattr(coordinator, '_running', False))
         if hasattr(signal, "SIGINT"):
-            signal.signal(signal.SIGINT, _signal_handler)
-        return monitor.run_forever()
+            signal.signal(signal.SIGINT, lambda *_: setattr(coordinator, '_running', False))
+        return coordinator.run()
 
     if args.command == "status":
-        print(json.dumps(monitor.get_status(args.run_id), default=str))
-        return 0
-
-    if args.command == "list-events":
-        print(json.dumps(monitor.list_events(args.limit, args.state, args.severity, args.run_id), default=str))
-        return 0
-
-    if args.command == "get-event":
-        print(json.dumps(monitor.get_event(args.event_id), default=str))
-        return 0
-
-    if args.command == "ack-event":
-        print(json.dumps(monitor.ack_event(args.event_id, args.note), default=str))
-        return 0
-
-    if args.command == "clear-event":
-        print(json.dumps(monitor.clear_event(args.event_id, args.note), default=str))
-        return 0
-
-    if args.command == "deep-analyze":
-        print(json.dumps(monitor.deep_analyze(args.event_id), default=str))
-        return 0
-
-    if args.command == "cleanup":
-        deleted = monitor.graph.cleanup_anomaly_events(args.retention_days)
+        print(json.dumps(coordinator.get_status(args.run_id), default=str))
+    elif args.command == "list-events":
+        print(json.dumps(coordinator.list_events(args.limit, args.state, args.severity, getattr(args, "run_id", None)), default=str))
+    elif args.command == "get-event":
+        print(json.dumps(coordinator.get_event(args.event_id), default=str))
+    elif args.command == "ack-event":
+        print(json.dumps(coordinator.ack_event(args.event_id, args.note), default=str))
+    elif args.command == "clear-event":
+        print(json.dumps(coordinator.clear_event(args.event_id, args.note), default=str))
+    elif args.command == "deep-analyze":
+        print(json.dumps(coordinator.deep_analyze(args.event_id), default=str))
+    elif args.command == "cleanup":
+        deleted = coordinator.graph.cleanup_anomaly_events(args.retention_days)
         print(json.dumps({"success": True, "deleted": deleted}))
-        return 0
-
-    return 1
+    return 0
 
 
 if __name__ == "__main__":
     sys.exit(main())
-
diff --git a/scripts/neo4j_ontology.py b/scripts/neo4j_ontology.py
index 380e3cb..92f6258 100644
--- a/scripts/neo4j_ontology.py
+++ b/scripts/neo4j_ontology.py
@@ -192,6 +192,9 @@ def create_indexes(self) -> None:
                 "CREATE INDEX hmitextlist_name IF NOT EXISTS FOR (htl:HMITextList) ON (htl.name)",
                 "CREATE INDEX plctagtable_name IF NOT EXISTS FOR (pt:PLCTagTable) ON (pt.name)",
                 "CREATE INDEX plctag_name IF NOT EXISTS FOR (ptg:PLCTag) ON (ptg.name)",
+                # ScadaTag lookup indexes (used by agent persist queries)
+                "CREATE INDEX scadatag_name IF NOT EXISTS FOR (t:ScadaTag) ON (t.name)",
+                "CREATE INDEX scadatag_opc_item_path IF NOT EXISTS FOR (t:ScadaTag) ON (t.opc_item_path)",
                 # Agent monitoring indexes
                 "CREATE INDEX anomalyevent_created IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.created_at)",
                 "CREATE INDEX anomalyevent_state IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.state)",

From 9db49f2d583671cb16ca5b58d63b51d5543bcbd0 Mon Sep 17 00:00:00 2001
From: Leor Barak Fishman <leor.fishman@gmail.com>
Date: Tue, 10 Mar 2026 13:27:39 -0700
Subject: [PATCH 18/18] p and id intake

---
 electron-ui/graph-renderer.js   |  16 +
 electron-ui/index.html          |  62 ++++
 electron-ui/main.js             |  48 ++-
 electron-ui/preload.js          |   4 +
 electron-ui/renderer.js         | 133 ++++++++-
 requirements.txt                |   8 +-
 scripts/artifact_ingest.py      | 478 ++++++++++++++++++++++++++++++
 scripts/artifact_linker.py      | 508 ++++++++++++++++++++++++++++++++
 scripts/artifact_models.py      | 171 +++++++++++
 scripts/gpt54_client.py         | 378 ++++++++++++++++++++++++
 scripts/graph_api.py            |  57 +++-
 scripts/incremental_analyzer.py |  51 ++++
 scripts/neo4j_ontology.py       | 304 +++++++++++++++++++
 scripts/process_semantics.py    | 170 +++++++++++
 14 files changed, 2379 insertions(+), 9 deletions(-)
 create mode 100644 scripts/artifact_ingest.py
 create mode 100644 scripts/artifact_linker.py
 create mode 100644 scripts/artifact_models.py
 create mode 100644 scripts/gpt54_client.py
 create mode 100644 scripts/process_semantics.py

diff --git a/electron-ui/graph-renderer.js b/electron-ui/graph-renderer.js
index ee5ec51..6441750 100644
--- a/electron-ui/graph-renderer.js
+++ b/electron-ui/graph-renderer.js
@@ -838,6 +838,17 @@ class GraphRenderer {
       'HMIScreen': 'siemens-hmi', 'hmiscreen': 'siemens-hmi',
       'PLCTagTable': 'plc', 'plctagtable': 'plc',
       'PLCTag': 'plc', 'plctag': 'plc',
+      // Process-semantic layer
+      'ProcessMedium': 'process', 'processmedium': 'process',
+      'UnitOperation': 'process', 'unitoperation': 'process',
+      'OperatingEnvelope': 'process', 'operatingenvelope': 'process',
+      'PhysicalPrinciple': 'process', 'physicalprinciple': 'process',
+      'ChemicalSpecies': 'process', 'chemicalspecies': 'process',
+      'Reaction': 'process', 'reaction': 'process',
+      // Anomaly + safety
+      'AgentRun': 'anomaly', 'agentrun': 'anomaly',
+      'AnomalyEvent': 'anomaly', 'anomalyevent': 'anomaly',
+      'SafetyElement': 'safety', 'safetyelement': 'safety',
     };
     return typeMap[type] || 'other';
   }
@@ -855,6 +866,11 @@ class GraphRenderer {
       'mes': '#00897B',
       'siemens': '#0288D1',
       'siemens-hmi': '#0097A7',
+      'process': '#00ACC1',
+      'anomaly': '#F44336',
+      'safety': '#D32F2F',
+      'patterns': '#795548',
+      'flows': '#E91E63',
       'other': '#9E9E9E'
     };
     const group = this._getGroupForType(type);
diff --git a/electron-ui/index.html b/electron-ui/index.html
index 0f6e719..d09dac3 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -219,6 +219,33 @@ <h3>Workbench Backup</h3>
             <button class="btn btn-primary" id="btn-ingest-workbench">Select Folder</button>
           </div>
           
+          <div class="card">
+            <div class="card-icon">
+              <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
+                <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
+                <polyline points="14 2 14 8 20 8"/>
+                <line x1="12" y1="18" x2="12" y2="12"/>
+                <line x1="9" y1="15" x2="12" y2="12"/>
+                <line x1="15" y1="15" x2="12" y2="12"/>
+              </svg>
+            </div>
+            <h3>P&IDs / SOPs / Diagrams</h3>
+            <p>Import process documents — GPT-5.4 extracts equipment, media, operations, and links them to the ontology</p>
+            <div class="form-group" style="margin-top: 0.5rem;">
+              <select id="artifact-source-kind" class="input input-sm" style="width: 100%;">
+                <option value="pid">P&ID (image or PDF)</option>
+                <option value="sop">SOP (text or PDF)</option>
+                <option value="diagram">Engineering Diagram</option>
+              </select>
+            </div>
+            <div style="display: flex; gap: 0.5rem;">
+              <button class="btn btn-primary" id="btn-select-artifact">Select File(s)</button>
+              <button class="btn btn-secondary" id="btn-ingest-artifact" disabled>Ingest</button>
+            </div>
+            <div id="artifact-file-list" class="text-sm text-muted" style="margin-top: 0.25rem;"></div>
+            <div id="artifact-ingest-status" class="text-sm" style="margin-top: 0.25rem; display: none;"></div>
+          </div>
+
           <div class="card">
             <div class="card-icon">
               <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
@@ -708,6 +735,7 @@ <h2>Ontology Graph</h2>
                   <option value="troubleshooting">Troubleshooting</option>
                   <option value="anomaly">Anomalies</option>
                   <option value="flows">Flows</option>
+                  <option value="process">Process Layer</option>
                 </select>
               </div>
               <div class="toolbar-separator"></div>
@@ -803,6 +831,24 @@ <h2>Ontology Graph</h2>
                       <span class="legend-label">HMI (Alarm / Screen / Script)</span>
                     </div>
                   </div>
+                  <div class="legend-group">
+                    <div class="legend-group-title">Process Layer</div>
+                    <div class="legend-item">
+                      <span class="legend-color" style="background: #00ACC1;"></span>
+                      <span class="legend-label">Medium / Operation / Envelope</span>
+                    </div>
+                    <div class="legend-item">
+                      <span class="legend-color" style="background: #00ACC1;"></span>
+                      <span class="legend-label">Principle / Species / Reaction</span>
+                    </div>
+                  </div>
+                  <div class="legend-group">
+                    <div class="legend-group-title">Anomalies</div>
+                    <div class="legend-item">
+                      <span class="legend-color" style="background: #F44336;"></span>
+                      <span class="legend-label">Agent Run / Event</span>
+                    </div>
+                  </div>
                   <div class="legend-group">
                     <div class="legend-group-title">Other</div>
                     <div class="legend-item">
@@ -964,6 +1010,14 @@ <h4>Add New Node</h4>
                       <option value="Batch">Batch</option>
                       <option value="Operation">Operation</option>
                     </optgroup>
+                    <optgroup label="Process">
+                      <option value="ProcessMedium">ProcessMedium</option>
+                      <option value="UnitOperation">UnitOperation</option>
+                      <option value="OperatingEnvelope">OperatingEnvelope</option>
+                      <option value="PhysicalPrinciple">PhysicalPrinciple</option>
+                      <option value="ChemicalSpecies">ChemicalSpecies</option>
+                      <option value="Reaction">Reaction</option>
+                    </optgroup>
                   </select>
                 </div>
                 <div class="form-group">
@@ -1478,6 +1532,14 @@ <h3 id="graph-modal-title">Graph: Node</h3>
                 <span class="legend-color" style="background: #0097A7;"></span>
                 <span class="legend-label">Siemens HMI</span>
               </div>
+              <div class="legend-item">
+                <span class="legend-color" style="background: #00ACC1;"></span>
+                <span class="legend-label">Process (Medium/Op/Principle)</span>
+              </div>
+              <div class="legend-item">
+                <span class="legend-color" style="background: #F44336;"></span>
+                <span class="legend-label">Anomalies</span>
+              </div>
             </div>
           </div>
         </div>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index ae5c557..e9aa32d 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -376,9 +376,11 @@ async function stopActiveAgent(reason = 'stopped_by_user') {
 
 // Select file dialog
 ipcMain.handle('select-file', async (event, options) => {
+  const properties = ['openFile'];
+  if (options && options.multiple) properties.push('multiSelections');
   const result = await dialog.showOpenDialog(mainWindow, {
-    properties: ['openFile'],
-    filters: options.filters || [
+    properties,
+    filters: (options && options.filters) || [
       { name: 'All Supported', extensions: ['json', 'sc', 'L5X', 'st', 'xml'] },
       { name: 'Ignition Backup', extensions: ['json'] },
       { name: 'Rockwell PLC', extensions: ['sc', 'L5X'] },
@@ -386,6 +388,9 @@ ipcMain.handle('select-file', async (event, options) => {
       { name: 'TIA Portal XML', extensions: ['xml'] }
     ]
   });
+  if (options && options.multiple) {
+    return { filePaths: result.filePaths || [] };
+  }
   return result.filePaths[0] || null;
 });
 
@@ -1794,4 +1799,43 @@ ipcMain.handle('agents:stop-subsystem', async (event, subsystemId) => {
   if (!activeAgentRun) return { success: false, error: 'No active agent run' };
   const sent = sendAgentCommand({ cmd: 'stop-agent', subsystemId });
   return { success: sent, subsystemId };
+});
+
+// ============================================
+// Artifact Ingestion IPC (P&IDs / SOPs / Diagrams via GPT-5.4)
+// ============================================
+
+ipcMain.handle('ingest-artifact', async (event, filePath, sourceKind = 'pid') => {
+  try {
+    sendToRenderer('stream-output', { text: `Ingesting ${path.basename(filePath)} as ${sourceKind}...\n` });
+    const output = await runPythonScript('artifact_ingest.py', [
+      filePath,
+      '--source-kind', sourceKind,
+      '--verbose',
+      '--json',
+    ], { streaming: true, streamId: 'artifact-ingest' });
+    const result = JSON.parse(output || '{}');
+    return { success: true, ...result };
+  } catch (error) {
+    sendToRenderer('stream-output', { text: `Ingestion error: ${error.message}\n` });
+    return { success: false, error: error.message };
+  }
+});
+
+ipcMain.handle('ingest-artifact-batch', async (event, files) => {
+  try {
+    const filePaths = files.map(f => f.path);
+    const sourceKind = files[0]?.sourceKind || 'pid';
+    sendToRenderer('stream-output', { text: `Ingesting ${files.length} artifact(s)...\n` });
+    const output = await runPythonScript('artifact_ingest.py', [
+      ...filePaths,
+      '--source-kind', sourceKind,
+      '--verbose',
+      '--json',
+    ], { streaming: true, streamId: 'artifact-ingest' });
+    const result = JSON.parse(output || '{}');
+    return { success: true, ...result };
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
 });
\ No newline at end of file
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index 7615063..3be7255 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -60,6 +60,10 @@ contextBridge.exposeInMainWorld('api', {
   graphAiPropose: (description) => ipcRenderer.invoke('graph:ai-propose', description),
   graphAiExplain: (nodeNames) => ipcRenderer.invoke('graph:ai-explain', nodeNames),
   
+  // Artifact Ingestion (P&IDs, SOPs, Engineering Diagrams via GPT-5.4)
+  ingestArtifact: (filePath, sourceKind) => ipcRenderer.invoke('ingest-artifact', filePath, sourceKind),
+  ingestArtifactBatch: (files) => ipcRenderer.invoke('ingest-artifact-batch', files),
+
   // DEXPI P&ID Conversion API
   dexpiConvert: (options) => ipcRenderer.invoke('dexpi:convert', options),
   dexpiExport: () => ipcRenderer.invoke('dexpi:export'),
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 93bf9b4..0523a9e 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -1834,7 +1834,7 @@ async function loadGraphData() {
   if (loading) loading.classList.add('active');
   
   try {
-    const result = await window.api.graphLoad({ limit: 500 });
+    const result = await window.api.graphLoad({});
     
     if (result.success && graphRenderer) {
       graphRenderer.loadData(result);
@@ -2113,12 +2113,48 @@ document.getElementById('graph-search')?.addEventListener('input', (e) => {
   }
 });
 
-document.getElementById('graph-filter')?.addEventListener('change', (e) => {
-  if (graphRenderer) {
-    graphRenderer.filterByType(e.target.value);
+const GRAPH_FILTER_LABELS = {
+  plc: ['AOI', 'Tag', 'PLCTagTable', 'PLCTag'],
+  scada: ['UDT', 'Equipment', 'View', 'ViewComponent', 'ScadaTag', 'Script', 'NamedQuery', 'Project', 'GatewayEvent'],
+  siemens: ['TiaProject', 'PLCDevice', 'HMIDevice', 'HMIConnection'],
+  'siemens-hmi': ['HMIAlarm', 'HMIAlarmClass', 'HMIScript', 'HMIScreen', 'HMITagTable', 'HMITextList'],
+  mes: ['Material', 'Batch', 'ProductionOrder', 'Operation', 'CriticalControlPoint', 'ProcessDeviation'],
+  troubleshooting: ['FaultSymptom', 'FaultCause', 'OperatorPhrase', 'CommonPhrase', 'Intent'],
+  anomaly: ['AgentRun', 'AnomalyEvent'],
+  flows: ['DataFlow', 'EndToEndFlow'],
+  process: ['ProcessMedium', 'UnitOperation', 'OperatingEnvelope', 'PhysicalPrinciple', 'ChemicalSpecies', 'Reaction'],
+};
+
+document.getElementById('graph-filter')?.addEventListener('change', async (e) => {
+  const value = e.target.value;
+  if (value === 'all') {
+    await loadGraphData();
+  } else if (GRAPH_FILTER_LABELS[value]) {
+    await loadGraphDataFiltered(GRAPH_FILTER_LABELS[value]);
+  } else {
+    if (graphRenderer) graphRenderer.filterByType(value);
   }
 });
 
+async function loadGraphDataFiltered(nodeTypes) {
+  const loading = document.getElementById('graph-loading');
+  if (loading) loading.classList.add('active');
+
+  try {
+    const result = await window.api.graphLoad({ types: nodeTypes });
+    if (result.success && graphRenderer) {
+      graphRenderer.loadData(result);
+      if (loading) loading.classList.remove('active');
+    } else {
+      console.error('Failed to load filtered graph:', result.error);
+      if (loading) loading.innerHTML = `<p>Error: ${result.error}</p>`;
+    }
+  } catch (error) {
+    console.error('Failed to load filtered graph:', error);
+    if (loading) loading.innerHTML = `<p>Error: ${error.message}</p>`;
+  }
+}
+
 document.getElementById('btn-layout-force')?.addEventListener('click', () => {
   if (graphRenderer) {
     graphRenderer.switchLayout('force');
@@ -3382,6 +3418,95 @@ btnSaveSettings?.addEventListener('click', async () => {
   }
 });
 
+// ============================================
+// Artifact Ingestion (P&IDs / SOPs / Diagrams)
+// ============================================
+
+const btnSelectArtifact = document.getElementById('btn-select-artifact');
+const btnIngestArtifact = document.getElementById('btn-ingest-artifact');
+const artifactSourceKind = document.getElementById('artifact-source-kind');
+const artifactFileList = document.getElementById('artifact-file-list');
+const artifactIngestStatus = document.getElementById('artifact-ingest-status');
+
+let selectedArtifactFiles = [];
+
+btnSelectArtifact?.addEventListener('click', async () => {
+  const extensions = ['png', 'jpg', 'jpeg', 'bmp', 'tiff', 'tif', 'webp', 'gif', 'pdf', 'txt', 'md'];
+  const result = await api.selectFile({
+    filters: [{ name: 'Supported Files', extensions }],
+    multiple: true,
+  });
+
+  if (result && result.filePaths && result.filePaths.length > 0) {
+    selectedArtifactFiles = result.filePaths;
+    if (artifactFileList) {
+      artifactFileList.innerHTML = selectedArtifactFiles
+        .map(f => `<div>${f.split(/[\\/]/).pop()}</div>`)
+        .join('');
+    }
+    if (btnIngestArtifact) btnIngestArtifact.disabled = false;
+  }
+});
+
+btnIngestArtifact?.addEventListener('click', async () => {
+  if (selectedArtifactFiles.length === 0) return;
+
+  const sourceKind = artifactSourceKind ? artifactSourceKind.value : 'pid';
+  btnIngestArtifact.disabled = true;
+  btnIngestArtifact.textContent = 'Ingesting...';
+
+  appendOutput(`\n[Artifact Ingest] Processing ${selectedArtifactFiles.length} file(s) as ${sourceKind}...\n`);
+
+  try {
+    const files = selectedArtifactFiles.map(p => ({ path: p, sourceKind }));
+    const result = await api.ingestArtifactBatch(files);
+
+    if (result.success) {
+      if (result.node_details && result.node_details.length > 0) {
+        appendOutput(`[Artifact Ingest] Node updates (${result.node_details.length}):\n`);
+        for (const d of result.node_details) {
+          appendOutput(`  + ${d}\n`);
+        }
+      }
+      if (result.concept_details && result.concept_details.length > 0) {
+        appendOutput(`[Artifact Ingest] Process concepts (${result.concept_details.length}):\n`);
+        for (const d of result.concept_details) {
+          appendOutput(`  + ${d}\n`);
+        }
+      }
+      if (result.relationship_details && result.relationship_details.length > 0) {
+        appendOutput(`[Artifact Ingest] Relationships (${result.relationship_details.length}):\n`);
+        for (const d of result.relationship_details) {
+          appendOutput(`  ~ ${d}\n`);
+        }
+      }
+      appendOutput(
+        `[Artifact Ingest] Summary: ${result.nodes_updated || 0} node updates, ` +
+        `${result.concepts_created || 0} process concepts, ` +
+        `${result.relationships_created || 0} relationships\n`
+      );
+      if (result.errors && result.errors.length > 0) {
+        appendOutput(`[Artifact Ingest] ${result.errors.length} error(s):\n`);
+        for (const err of result.errors) {
+          appendOutput(`  - ${typeof err === 'string' ? err : JSON.stringify(err)}\n`);
+        }
+      }
+      if (artifactIngestStatus) {
+        artifactIngestStatus.style.display = 'block';
+        artifactIngestStatus.textContent =
+          `${result.nodes_updated || 0} updates, ${result.concepts_created || 0} concepts, ${result.relationships_created || 0} rels`;
+      }
+    } else {
+      appendOutput(`[Artifact Ingest] Error: ${result.error || 'Unknown error'}\n`);
+    }
+  } catch (err) {
+    appendOutput(`[Artifact Ingest] Error: ${err.message}\n`);
+  } finally {
+    btnIngestArtifact.disabled = false;
+    btnIngestArtifact.textContent = 'Ingest';
+  }
+});
+
 // ============================================
 // Database Connections Settings
 // ============================================
diff --git a/requirements.txt b/requirements.txt
index 09f2756..20f186c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,4 +22,10 @@ mysql-connector-python>=8.0.0
 psycopg2-binary>=2.9.0
 
 # HTTP client for Ignition gateway API
-requests>=2.28.0
\ No newline at end of file
+requests>=2.28.0
+
+# OpenAI GPT API for artifact extraction
+openai>=1.0.0
+
+# PDF text extraction (optional, for SOP ingestion)
+PyPDF2>=3.0.0
\ No newline at end of file
diff --git a/scripts/artifact_ingest.py b/scripts/artifact_ingest.py
new file mode 100644
index 0000000..762ee23
--- /dev/null
+++ b/scripts/artifact_ingest.py
@@ -0,0 +1,478 @@
+#!/usr/bin/env python3
+"""
+Artifact ingestion pipeline for P&IDs, SOPs, and engineering diagrams.
+
+Orchestrates:
+1. Source parsing (image or text)
+2. GPT-5.4 structured extraction
+3. Entity linking and concept normalization
+4. Provenance-aware Neo4j writes
+
+Usage:
+    from artifact_ingest import ArtifactIngester
+
+    ingester = ArtifactIngester(graph)
+    result = ingester.ingest_file("path/to/pid.png", source_kind="pid")
+"""
+
+import os
+import sys
+import json
+import time
+from pathlib import Path
+from typing import Dict, List, Optional, Any, Callable
+from dataclasses import asdict
+
+from neo4j_ontology import OntologyGraph, get_ontology_graph
+from gpt54_client import GPT54Client
+from artifact_linker import ArtifactLinker
+from artifact_models import ExtractionResult
+from process_semantics import (
+    EvidenceItem,
+    evidence_to_json,
+    merge_evidence,
+    PROCESS_NODE_SCHEMAS,
+    PROCESS_RELATIONSHIPS,
+)
+
+
+class ArtifactIngester:
+    """
+    End-to-end ingestion pipeline for process engineering artifacts.
+
+    Supports:
+    - P&IDs (images): .png, .jpg, .jpeg, .bmp, .tiff, .webp
+    - SOPs (text): .txt, .md, .pdf (text extraction only)
+    - Engineering diagrams (images): same as P&IDs
+    """
+
+    IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".tif", ".webp", ".gif"}
+    TEXT_EXTENSIONS = {".txt", ".md", ".csv", ".tsv"}
+
+    def __init__(
+        self,
+        graph: Optional[OntologyGraph] = None,
+        gpt_client: Optional[GPT54Client] = None,
+        verbose: bool = False,
+        on_progress: Optional[Callable[[str], None]] = None,
+    ):
+        self._graph = graph or get_ontology_graph()
+        self._gpt = gpt_client
+        self._linker = None  # initialized lazily after GPT client is ready
+        self._verbose = verbose
+        self._on_progress = on_progress or (lambda msg: None)
+
+    def _ensure_gpt(self) -> GPT54Client:
+        if self._gpt is None:
+            self._gpt = GPT54Client()
+        if self._linker is None:
+            self._linker = ArtifactLinker(self._graph, gpt_client=self._gpt)
+        return self._gpt
+
+    def _log(self, msg: str) -> None:
+        if self._verbose:
+            print(f"[ArtifactIngest] {msg}", file=sys.stderr, flush=True)
+        self._on_progress(msg)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def ingest_file(
+        self,
+        file_path: str,
+        source_kind: str = "pid",
+    ) -> Dict[str, Any]:
+        """
+        Ingest a single artifact file end-to-end.
+
+        Args:
+            file_path: Path to the source file.
+            source_kind: "pid", "sop", or "diagram".
+
+        Returns:
+            Summary dict with counts and any errors.
+        """
+        path = Path(file_path)
+        if not path.exists():
+            return {"error": f"File not found: {file_path}"}
+
+        self._log(f"Extracting facts from {path.name} ({source_kind})...")
+        gpt = self._ensure_gpt()
+
+        self._log(f"Loading entity cache for linking...")
+        entity_hints = self._linker.load_entity_cache()
+        ext = path.suffix.lower()
+
+        if ext in self.IMAGE_EXTENSIONS:
+            raw = gpt.extract_from_image(
+                str(path),
+                source_kind=source_kind,
+                existing_entities=entity_hints,
+                verbose=self._verbose,
+            )
+        elif ext in self.TEXT_EXTENSIONS:
+            text = path.read_text(encoding="utf-8", errors="replace")
+            raw = gpt.extract_from_text(
+                text,
+                source_file=str(path),
+                source_kind=source_kind,
+                existing_entities=entity_hints,
+                verbose=self._verbose,
+            )
+        elif ext == ".pdf":
+            text = self._extract_pdf_text(str(path))
+            raw = gpt.extract_from_text(
+                text,
+                source_file=str(path),
+                source_kind=source_kind,
+                existing_entities=entity_hints,
+                verbose=self._verbose,
+            )
+        else:
+            return {"error": f"Unsupported file type: {ext}"}
+
+        if "error" in raw:
+            return {"error": raw["error"], "raw": raw.get("raw", "")}
+
+        eq_count = len(raw.get("equipment_facts", []))
+        tag_count = len(raw.get("tag_facts", []))
+        media_count = len(raw.get("process_media", []))
+        op_count = len(raw.get("unit_operations", []))
+        species_count = len(raw.get("chemical_species", []))
+        rx_count = len(raw.get("reactions", []))
+        rel_count = len(raw.get("relationships", []))
+        self._log(
+            f"GPT extracted: {eq_count} equipment, {tag_count} tags, "
+            f"{media_count} media, {op_count} operations, "
+            f"{species_count} species, {rx_count} reactions, {rel_count} relationships"
+        )
+
+        self._log("Resolving extracted entity names against existing graph nodes...")
+        extraction = self._linker.normalize_extraction(
+            raw, source_file=str(path), source_kind=source_kind,
+            verbose=self._verbose,
+        )
+
+        self._log("Writing facts to Neo4j...")
+        summary = self._write_extraction(extraction)
+
+        self._log(
+            f"Done: {summary['nodes_updated']} updates, "
+            f"{summary['concepts_created']} concepts, "
+            f"{summary['relationships_created']} relationships"
+        )
+        return summary
+
+    def ingest_batch(
+        self,
+        files: List[Dict[str, str]],
+    ) -> Dict[str, Any]:
+        """
+        Ingest multiple artifacts.
+
+        Args:
+            files: List of dicts with "path" and "source_kind" keys.
+
+        Returns:
+            Aggregate summary.
+        """
+        totals = {
+            "files_processed": 0,
+            "files_failed": 0,
+            "nodes_updated": 0,
+            "concepts_created": 0,
+            "relationships_created": 0,
+            "node_details": [],
+            "concept_details": [],
+            "relationship_details": [],
+            "errors": [],
+        }
+
+        for i, f in enumerate(files, 1):
+            self._log(f"Processing file {i}/{len(files)}: {f['path']}")
+            result = self.ingest_file(f["path"], f.get("source_kind", "pid"))
+            if "error" in result:
+                totals["files_failed"] += 1
+                totals["errors"].append({"file": f["path"], "error": result["error"]})
+            else:
+                totals["files_processed"] += 1
+                totals["nodes_updated"] += result.get("nodes_updated", 0)
+                totals["concepts_created"] += result.get("concepts_created", 0)
+                totals["relationships_created"] += result.get("relationships_created", 0)
+                totals["node_details"].extend(result.get("node_details", []))
+                totals["concept_details"].extend(result.get("concept_details", []))
+                totals["relationship_details"].extend(result.get("relationship_details", []))
+
+        return totals
+
+    # ------------------------------------------------------------------
+    # Graph write helpers
+    # ------------------------------------------------------------------
+
+    def _write_extraction(self, extraction: ExtractionResult) -> Dict[str, Any]:
+        """Write a normalized ExtractionResult to Neo4j with provenance."""
+        summary = {
+            "source_file": extraction.source_file,
+            "source_kind": extraction.source_kind,
+            "nodes_updated": 0,
+            "concepts_created": 0,
+            "relationships_created": 0,
+            "node_details": [],
+            "concept_details": [],
+            "relationship_details": [],
+            "errors": extraction.errors[:],
+        }
+
+        with self._graph.session() as session:
+            for update in extraction.node_updates:
+                try:
+                    matched, existed = self._write_node_update(session, update)
+                    props_str = ", ".join(f"{k}={v}" for k, v in update.properties.items()) if update.properties else ""
+                    detail = f"{update.node_label}:{update.node_name}"
+                    if props_str:
+                        detail += f" ({props_str})"
+                    if matched:
+                        summary["nodes_updated"] += 1
+                        summary["node_details"].append(detail)
+                        action = "Updated" if existed else "Created"
+                        self._log(f"  {action} {detail}")
+                    else:
+                        self._log(f"  Skipped {detail} (not found in graph)")
+                except Exception as e:
+                    summary["errors"].append(f"Node update {update.node_name}: {e}")
+
+            seen_concepts = set()
+            for concept in extraction.process_concepts:
+                dedup_key = f"{concept.label}:{concept.name}"
+                if dedup_key in seen_concepts:
+                    continue
+                seen_concepts.add(dedup_key)
+                try:
+                    self._write_process_concept(session, concept)
+                    summary["concepts_created"] += 1
+                    props_str = ", ".join(f"{k}={v}" for k, v in concept.properties.items()) if concept.properties else ""
+                    detail = f"{concept.label}:{concept.name}"
+                    if props_str:
+                        detail += f" ({props_str})"
+                    summary["concept_details"].append(detail)
+                    self._log(f"  Created {detail}")
+                except Exception as e:
+                    summary["errors"].append(f"Concept {concept.name}: {e}")
+
+            seen_rels = set()
+            for rel in extraction.relationships:
+                dedup_key = f"{rel.source_label}:{rel.source_name}-{rel.rel_type}->{rel.target_label}:{rel.target_name}"
+                if dedup_key in seen_rels:
+                    continue
+                seen_rels.add(dedup_key)
+                try:
+                    detail = f"{rel.source_label}:{rel.source_name} -[{rel.rel_type}]-> {rel.target_label}:{rel.target_name}"
+                    linked = self._write_relationship(session, rel)
+                    if linked:
+                        summary["relationships_created"] += 1
+                        summary["relationship_details"].append(detail)
+                        self._log(f"  Linked {detail}")
+                    else:
+                        self._log(f"  Skipped {detail} (endpoint not found)")
+                except Exception as e:
+                    summary["errors"].append(f"Rel {rel.rel_type}: {e}")
+
+        return summary
+
+    EXISTING_LABELS = {
+        "AOI", "Tag", "UDT", "View", "ViewComponent",
+        "ScadaTag", "Script", "NamedQuery", "Project",
+        "FaultSymptom", "FaultCause", "OperatorPhrase",
+        "ControlPattern", "DataFlow", "SafetyElement",
+        "Material", "Batch", "ProductionOrder", "Operation",
+        "CriticalControlPoint", "ProcessDeviation",
+        "TiaProject", "PLCDevice", "HMIDevice", "HMIConnection",
+        "HMIAlarm", "HMIAlarmClass", "HMIScript", "HMIScreen",
+        "PLCTagTable", "PLCTag", "HMITagTable", "HMITextList",
+    }
+
+    def _write_node_update(self, session, update) -> bool:
+        """Update an existing node with new properties and evidence.
+
+        For backbone labels (Equipment, ScadaTag, AOI, etc.) uses MATCH so
+        it only updates nodes that already exist -- never creates new ones.
+        Returns True if a node was actually matched and updated.
+        """
+        ev_json = evidence_to_json(update.evidence)
+
+        set_clauses = []
+        params: Dict[str, Any] = {"name": update.node_name, "ev_json": ev_json}
+
+        for k, v in update.properties.items():
+            param_key = f"prop_{k}"
+            set_clauses.append(f"n.{k} = ${param_key}")
+            params[param_key] = v
+
+        set_clause = ", ".join(set_clauses) if set_clauses else ""
+        if set_clause:
+            set_clause = f"SET {set_clause}, "
+        else:
+            set_clause = "SET "
+
+        if update.node_label in self.EXISTING_LABELS:
+            verb = "MATCH"
+        else:
+            verb = "MERGE"
+
+        query = f"""
+            {verb} (n:{update.node_label} {{name: $name}})
+            {set_clause}
+                n.evidence_items = CASE
+                    WHEN n.evidence_items IS NULL THEN $ev_json
+                    ELSE n.evidence_items + $ev_json
+                END,
+                n.last_evidence_at = datetime()
+            RETURN n.name AS matched, n.created_at IS NOT NULL AS existed
+        """
+        result = session.run(query, params)
+        record = result.single()
+        if record is None:
+            return False, False
+        return True, bool(record.get("existed", True))
+
+    def _write_process_concept(self, session, concept) -> None:
+        """Create or merge a process-semantic node with provenance."""
+        ev_json = evidence_to_json(concept.evidence)
+
+        set_clauses = []
+        params: Dict[str, Any] = {"name": concept.name, "ev_json": ev_json}
+
+        for k, v in concept.properties.items():
+            param_key = f"prop_{k}"
+            set_clauses.append(f"n.{k} = COALESCE(n.{k}, ${param_key})")
+            params[param_key] = v
+
+        set_clause = ", ".join(set_clauses) if set_clauses else ""
+        if set_clause:
+            set_clause = f"SET {set_clause}, "
+        else:
+            set_clause = "SET "
+
+        query = f"""
+            MERGE (n:{concept.label} {{name: $name}})
+            {set_clause}
+                n.evidence_items = CASE
+                    WHEN n.evidence_items IS NULL THEN $ev_json
+                    ELSE n.evidence_items + $ev_json
+                END,
+                n.last_evidence_at = datetime()
+        """
+        session.run(query, params)
+
+    _PATH_KEYED_LABELS = {"ViewComponent"}
+
+    def _match_clause(self, alias: str, label: str, param_name: str) -> str:
+        """Return a MATCH clause using `path` for ViewComponent, `name` otherwise."""
+        if label in self._PATH_KEYED_LABELS:
+            return f"MATCH ({alias}:{label} {{path: ${param_name}}})"
+        return f"MATCH ({alias}:{label} {{name: ${param_name}}})"
+
+    def _write_relationship(self, session, rel) -> bool:
+        """Create a relationship with provenance metadata.
+
+        Returns True if both endpoints existed and the relationship was written.
+        """
+        ev_json = evidence_to_json(rel.evidence)
+
+        params: Dict[str, Any] = {
+            "src_name": rel.source_name,
+            "tgt_name": rel.target_name,
+            "ev_json": ev_json,
+        }
+
+        for k, v in rel.properties.items():
+            param_key = f"prop_{k}"
+            params[param_key] = v
+
+        src_match = self._match_clause("src", rel.source_label, "src_name")
+        tgt_match = self._match_clause("tgt", rel.target_label, "tgt_name")
+
+        query = f"""
+            {src_match}
+            {tgt_match}
+            MERGE (src)-[r:{rel.rel_type}]->(tgt)
+            SET r.evidence_items = CASE
+                    WHEN r.evidence_items IS NULL THEN $ev_json
+                    ELSE r.evidence_items + $ev_json
+                END,
+                r.last_evidence_at = datetime()
+            RETURN type(r) AS rel_type
+        """
+        result = session.run(query, params)
+        return result.single() is not None
+
+    # ------------------------------------------------------------------
+    # PDF text extraction
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _extract_pdf_text(pdf_path: str) -> str:
+        """Extract text from a PDF file. Falls back gracefully."""
+        try:
+            import PyPDF2
+            text_pages = []
+            with open(pdf_path, "rb") as f:
+                reader = PyPDF2.PdfReader(f)
+                for page in reader.pages:
+                    text_pages.append(page.extract_text() or "")
+            return "\n\n".join(text_pages)
+        except ImportError:
+            try:
+                import subprocess
+                result = subprocess.run(
+                    ["pdftotext", pdf_path, "-"],
+                    capture_output=True, text=True, timeout=30,
+                )
+                if result.returncode == 0:
+                    return result.stdout
+            except Exception:
+                pass
+        return f"[Could not extract text from {pdf_path}]"
+
+
+# ============================================================================
+# CLI entry point
+# ============================================================================
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Ingest P&IDs/SOPs into ontology")
+    parser.add_argument("files", nargs="+", help="Files to ingest")
+    parser.add_argument(
+        "--source-kind", default="pid",
+        choices=["pid", "sop", "diagram"],
+        help="Source type (default: pid)",
+    )
+    parser.add_argument("--verbose", action="store_true")
+    parser.add_argument("--json", action="store_true", help="JSON output")
+    args = parser.parse_args()
+
+    ingester = ArtifactIngester(verbose=args.verbose)
+
+    files = [{"path": f, "source_kind": args.source_kind} for f in args.files]
+    result = ingester.ingest_batch(files)
+
+    if args.json:
+        print(json.dumps(result, indent=2))
+    else:
+        print(f"\nIngestion complete:")
+        print(f"  Files processed: {result['files_processed']}")
+        print(f"  Files failed:    {result['files_failed']}")
+        print(f"  Nodes updated:   {result['nodes_updated']}")
+        print(f"  Concepts created:{result['concepts_created']}")
+        print(f"  Rels created:    {result['relationships_created']}")
+        if result["errors"]:
+            print(f"\nErrors:")
+            for err in result["errors"]:
+                print(f"  - {err}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/artifact_linker.py b/scripts/artifact_linker.py
new file mode 100644
index 0000000..c0d2a7e
--- /dev/null
+++ b/scripts/artifact_linker.py
@@ -0,0 +1,508 @@
+#!/usr/bin/env python3
+"""
+Entity linker for artifact extraction results.
+
+Resolves extracted mentions from GPT-5.4 output to existing ontology nodes,
+deduplicates process-semantic concepts, and produces a clean set of graph
+mutations ready for Neo4j writes.
+"""
+
+import json
+from typing import Dict, List, Optional, Any, Tuple
+
+from neo4j_ontology import OntologyGraph
+from process_semantics import (
+    EvidenceItem,
+    PROCESS_NODE_SCHEMAS,
+    PROCESS_RELATIONSHIPS,
+)
+from artifact_models import (
+    ExtractedNodeUpdate,
+    ExtractedRelationship,
+    ExtractedProcessConcept,
+    ExtractionResult,
+)
+
+
+class ArtifactLinker:
+    """
+    Resolves extracted mentions to existing graph entities and normalizes
+    process-semantic concepts before graph writes.
+
+    Uses GPT to match extracted names to existing graph nodes when a GPT
+    client is provided; falls back to substring matching otherwise.
+    """
+
+    def __init__(self, graph: OntologyGraph, gpt_client=None):
+        self._graph = graph
+        self._gpt = gpt_client
+        self._entity_cache: Dict[str, Dict[str, str]] = {}
+        self._entity_cache_raw: Dict[str, List[str]] = {}
+        self._gpt_resolved: Dict[str, Dict[str, Optional[str]]] = {}
+        self._gpt_visualizes: Dict[str, List[str]] = {}
+
+    def load_entity_cache(self) -> Dict[str, List[str]]:
+        """
+        Load known entity names from the graph for linking hints.
+        Returns dict mapping label -> list of names.
+        """
+        labels_to_query = [
+            "Equipment", "AOI", "UDT", "ScadaTag", "View", "ViewComponent",
+            "ProcessMedium", "UnitOperation", "OperatingEnvelope",
+            "PhysicalPrinciple", "ChemicalSpecies", "Reaction",
+            "Process", "Operation", "CriticalControlPoint",
+        ]
+        cache: Dict[str, List[str]] = {}
+        with self._graph.session() as session:
+            for label in labels_to_query:
+                try:
+                    if label == "ViewComponent":
+                        result = session.run(
+                            "MATCH (n:ViewComponent) RETURN n.path AS name LIMIT 1000"
+                        )
+                    else:
+                        result = session.run(
+                            f"MATCH (n:{label}) RETURN n.name AS name LIMIT 500"
+                        )
+                    names = [r["name"] for r in result if r["name"]]
+                    if names:
+                        cache[label] = names
+                except Exception:
+                    pass
+        self._entity_cache = {
+            label: {n.lower(): n for n in names}
+            for label, names in cache.items()
+        }
+        self._entity_cache_raw = cache
+        return cache
+
+    def _collect_extracted_mentions(self, raw: Dict[str, Any]) -> Dict[str, List[str]]:
+        """Gather all entity names from a raw GPT extraction keyed by label."""
+        mentions: Dict[str, set] = {}
+
+        for eq in raw.get("equipment_facts", []):
+            name = eq.get("equipment_name", "")
+            if name:
+                mentions.setdefault("Equipment", set()).add(name)
+
+        for tag in raw.get("tag_facts", []):
+            name = tag.get("tag_name", "")
+            if name:
+                mentions.setdefault("ScadaTag", set()).add(name)
+
+        for rel in raw.get("relationships", []):
+            src_label = rel.get("source_type", "")
+            src_name = rel.get("source_name", "")
+            tgt_label = rel.get("target_type", "")
+            tgt_name = rel.get("target_name", "")
+            if src_label and src_name:
+                mentions.setdefault(src_label, set()).add(src_name)
+            if tgt_label and tgt_name:
+                mentions.setdefault(tgt_label, set()).add(tgt_name)
+
+        return {label: sorted(names) for label, names in mentions.items()}
+
+    def run_gpt_entity_resolution(
+        self, raw: Dict[str, Any], verbose: bool = False
+    ) -> None:
+        """
+        Use GPT to resolve extracted mentions against existing graph entities.
+        Populates self._gpt_resolved with the mappings.
+        """
+        if not self._gpt:
+            return
+
+        self._gpt_visualizes = {}
+        extracted = self._collect_extracted_mentions(raw)
+        labels_to_resolve = {
+            label: names for label, names in extracted.items()
+            if label in self._entity_cache_raw and self._entity_cache_raw[label]
+        }
+
+        has_vc = bool(self._entity_cache_raw.get("ViewComponent"))
+        if has_vc and "Equipment" in extracted and "Equipment" not in labels_to_resolve:
+            labels_to_resolve["Equipment"] = extracted["Equipment"]
+
+        if not labels_to_resolve:
+            return
+
+        import sys
+        if verbose:
+            total = sum(len(v) for v in labels_to_resolve.values())
+            print(
+                f"[ArtifactLinker] Resolving {total} extracted mentions "
+                f"against {sum(len(v) for v in self._entity_cache_raw.values())} existing entities via GPT...",
+                file=sys.stderr, flush=True,
+            )
+
+        raw_result = self._gpt.resolve_entities(
+            labels_to_resolve,
+            self._entity_cache_raw,
+            verbose=verbose,
+        )
+
+        vis = raw_result.pop("visualizes", {})
+        if isinstance(vis, dict):
+            for equip, vcs in vis.items():
+                if isinstance(vcs, list):
+                    self._gpt_visualizes[equip] = [v for v in vcs if isinstance(v, str)]
+                elif isinstance(vcs, str):
+                    self._gpt_visualizes[equip] = [vcs]
+
+        self._gpt_resolved = raw_result
+
+        if verbose:
+            matched = sum(
+                1 for mappings in self._gpt_resolved.values()
+                for v in mappings.values() if v
+            )
+            vis_count = sum(len(v) for v in self._gpt_visualizes.values())
+            print(
+                f"[ArtifactLinker] GPT matched {matched} mentions to existing entities, "
+                f"{vis_count} VISUALIZES links proposed",
+                file=sys.stderr, flush=True,
+            )
+            for equip, vcs in self._gpt_visualizes.items():
+                for vc in vcs:
+                    print(
+                        f"[ArtifactLinker]   VISUALIZES: {vc} -> Equipment:{equip}",
+                        file=sys.stderr, flush=True,
+                    )
+
+    def resolve_name(self, label: str, raw_name: str) -> Tuple[str, bool]:
+        """
+        Resolve an extracted name to an existing graph entity.
+
+        Resolution order:
+        1. GPT-resolved mapping (if available)
+        2. Exact case-insensitive match
+        3. Return raw name as-is (not matched)
+        """
+        if not raw_name:
+            return raw_name, False
+
+        # Check GPT resolution first
+        gpt_mappings = self._gpt_resolved.get(label, {})
+        if raw_name in gpt_mappings:
+            resolved = gpt_mappings[raw_name]
+            if resolved:
+                return resolved, True
+            else:
+                return raw_name.strip(), False
+
+        # Exact case-insensitive match
+        label_cache = self._entity_cache.get(label, {})
+        lower = raw_name.lower().strip()
+        if lower in label_cache:
+            return label_cache[lower], True
+
+        return raw_name.strip(), False
+
+    def normalize_extraction(
+        self,
+        raw: Dict[str, Any],
+        source_file: str,
+        source_kind: str,
+        extraction_model: str = "gpt-5.4",
+        verbose: bool = False,
+    ) -> ExtractionResult:
+        """
+        Convert raw GPT-5.4 JSON output into a normalized ExtractionResult.
+
+        Performs:
+        - entity linking against known graph names
+        - process concept normalization
+        - relationship validation against allowed vocabulary
+        - evidence attachment
+        """
+        result = ExtractionResult(
+            source_file=source_file,
+            source_kind=source_kind,
+        )
+
+        self.run_gpt_entity_resolution(raw, verbose=verbose)
+
+        base_evidence = EvidenceItem(
+            source_file=source_file,
+            source_kind=source_kind,
+            extraction_model=extraction_model,
+            extraction_method="vision" if source_kind in ("pid", "diagram") else "text",
+        )
+
+        self._process_equipment_facts(raw, result, base_evidence)
+        self._process_tag_facts(raw, result, base_evidence)
+        self._process_media(raw, result, base_evidence)
+        self._process_operations(raw, result, base_evidence)
+        self._process_species(raw, result, base_evidence)
+        self._process_reactions(raw, result, base_evidence)
+        self._process_relationships(raw, result, base_evidence)
+        self._process_visualizes(result, base_evidence)
+
+        return result
+
+    # ------------------------------------------------------------------
+    # Internal normalization helpers
+    # ------------------------------------------------------------------
+
+    def _make_evidence(self, base: EvidenceItem, **overrides) -> EvidenceItem:
+        from dataclasses import asdict
+        d = asdict(base)
+        d.update(overrides)
+        return EvidenceItem(**d)
+
+    def _process_equipment_facts(
+        self, raw: Dict, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        for eq in raw.get("equipment_facts", []):
+            name = eq.get("equipment_name", "")
+            if not name:
+                continue
+
+            resolved, _ = self.resolve_name("Equipment", name)
+            ev = self._make_evidence(base_ev, source_excerpt=f"Equipment: {resolved}")
+
+            props: Dict[str, Any] = {}
+            if eq.get("service"):
+                props["service"] = eq["service"]
+            if eq.get("function"):
+                props["process_function"] = eq["function"]
+
+            result.node_updates.append(ExtractedNodeUpdate(
+                node_label="Equipment",
+                node_name=resolved,
+                properties=props,
+                evidence=[ev],
+            ))
+
+            for medium in eq.get("media_handled", []):
+                med_resolved, _ = self.resolve_name("ProcessMedium", medium)
+                result.process_concepts.append(ExtractedProcessConcept(
+                    label="ProcessMedium",
+                    name=med_resolved,
+                    properties={"category": "product"},
+                    evidence=[ev],
+                ))
+                result.relationships.append(ExtractedRelationship(
+                    source_label="Equipment", source_name=resolved,
+                    target_label="ProcessMedium", target_name=med_resolved,
+                    rel_type="HANDLES_MEDIUM",
+                    evidence=[ev],
+                ))
+
+            for op in eq.get("operations_performed", []):
+                op_resolved, _ = self.resolve_name("UnitOperation", op)
+                result.process_concepts.append(ExtractedProcessConcept(
+                    label="UnitOperation",
+                    name=op_resolved,
+                    properties={"category": "transfer"},
+                    evidence=[ev],
+                ))
+                result.relationships.append(ExtractedRelationship(
+                    source_label="Equipment", source_name=resolved,
+                    target_label="UnitOperation", target_name=op_resolved,
+                    rel_type="PERFORMS_OPERATION",
+                    evidence=[ev],
+                ))
+
+            for param in eq.get("operating_parameters", []):
+                env_name = f"{resolved}/{param.get('parameter', 'unknown')}"
+                env_props = {
+                    k: param[k] for k in [
+                        "parameter", "unit",
+                        "normal_low", "normal_high",
+                        "alarm_low", "alarm_high",
+                        "trip_low", "trip_high",
+                    ] if param.get(k) is not None
+                }
+                # Map alarm to warning for schema consistency
+                if "alarm_low" in env_props:
+                    env_props["low_warning"] = env_props.pop("alarm_low")
+                if "alarm_high" in env_props:
+                    env_props["high_warning"] = env_props.pop("alarm_high")
+                if "trip_low" in env_props:
+                    env_props["trip_low"] = env_props["trip_low"]
+                if "trip_high" in env_props:
+                    env_props["trip_high"] = env_props["trip_high"]
+
+                result.process_concepts.append(ExtractedProcessConcept(
+                    label="OperatingEnvelope",
+                    name=env_name,
+                    properties=env_props,
+                    evidence=[ev],
+                ))
+                result.relationships.append(ExtractedRelationship(
+                    source_label="Equipment", source_name=resolved,
+                    target_label="OperatingEnvelope", target_name=env_name,
+                    rel_type="HAS_OPERATING_ENVELOPE",
+                    evidence=[ev],
+                ))
+
+    def _process_tag_facts(
+        self, raw: Dict, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        for tag in raw.get("tag_facts", []):
+            name = tag.get("tag_name", "")
+            if not name:
+                continue
+
+            resolved, _ = self.resolve_name("ScadaTag", name)
+            ev = self._make_evidence(base_ev, source_excerpt=f"Tag: {resolved}")
+
+            props: Dict[str, Any] = {}
+            if tag.get("process_context"):
+                props["process_context"] = tag["process_context"]
+
+            result.node_updates.append(ExtractedNodeUpdate(
+                node_label="ScadaTag",
+                node_name=resolved,
+                properties=props,
+                evidence=[ev],
+            ))
+
+            if tag.get("measures"):
+                principle_name = tag["measures"]
+                pp_resolved, _ = self.resolve_name("PhysicalPrinciple", principle_name)
+                result.process_concepts.append(ExtractedProcessConcept(
+                    label="PhysicalPrinciple",
+                    name=pp_resolved,
+                    properties={"category": "analytical"},
+                    evidence=[ev],
+                ))
+                result.relationships.append(ExtractedRelationship(
+                    source_label="ScadaTag", source_name=resolved,
+                    target_label="PhysicalPrinciple", target_name=pp_resolved,
+                    rel_type="MEASURES",
+                    evidence=[ev],
+                ))
+
+    def _process_media(
+        self, raw: Dict, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        for medium in raw.get("process_media", []):
+            name = medium.get("name", "")
+            if not name:
+                continue
+            resolved, _ = self.resolve_name("ProcessMedium", name)
+            ev = self._make_evidence(base_ev, source_excerpt=f"Medium: {resolved}")
+            result.process_concepts.append(ExtractedProcessConcept(
+                label="ProcessMedium",
+                name=resolved,
+                properties={
+                    k: medium[k] for k in ["category", "phase", "description"]
+                    if medium.get(k)
+                },
+                evidence=[ev],
+            ))
+
+    def _process_operations(
+        self, raw: Dict, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        for op in raw.get("unit_operations", []):
+            name = op.get("name", "")
+            if not name:
+                continue
+            resolved, _ = self.resolve_name("UnitOperation", name)
+            ev = self._make_evidence(base_ev, source_excerpt=f"Operation: {resolved}")
+            result.process_concepts.append(ExtractedProcessConcept(
+                label="UnitOperation",
+                name=resolved,
+                properties={
+                    k: op[k] for k in ["category", "description"]
+                    if op.get(k)
+                },
+                evidence=[ev],
+            ))
+
+    def _process_species(
+        self, raw: Dict, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        for sp in raw.get("chemical_species", []):
+            name = sp.get("name", "")
+            if not name:
+                continue
+            resolved, _ = self.resolve_name("ChemicalSpecies", name)
+            ev = self._make_evidence(base_ev, source_excerpt=f"Species: {resolved}")
+            result.process_concepts.append(ExtractedProcessConcept(
+                label="ChemicalSpecies",
+                name=resolved,
+                properties={
+                    k: sp[k] for k in ["category", "cas_number", "description"]
+                    if sp.get(k)
+                },
+                evidence=[ev],
+            ))
+
+    def _process_reactions(
+        self, raw: Dict, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        for rx in raw.get("reactions", []):
+            name = rx.get("name", "")
+            if not name:
+                continue
+            resolved, _ = self.resolve_name("Reaction", name)
+            ev = self._make_evidence(base_ev, source_excerpt=f"Reaction: {resolved}")
+            result.process_concepts.append(ExtractedProcessConcept(
+                label="Reaction",
+                name=resolved,
+                properties={
+                    k: rx[k] for k in ["category", "description"]
+                    if rx.get(k)
+                },
+                evidence=[ev],
+            ))
+            for species in rx.get("species_involved", []):
+                sp_resolved, _ = self.resolve_name("ChemicalSpecies", species)
+                result.relationships.append(ExtractedRelationship(
+                    source_label="Reaction", source_name=resolved,
+                    target_label="ChemicalSpecies", target_name=sp_resolved,
+                    rel_type="INVOLVES_SPECIES",
+                    evidence=[ev],
+                ))
+
+    def _process_visualizes(
+        self, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        """Add VISUALIZES relationships proposed by GPT entity resolution."""
+        for equip_name, vc_names in self._gpt_visualizes.items():
+            for vc_name in vc_names:
+                ev = self._make_evidence(
+                    base_ev,
+                    source_excerpt=f"ViewComponent:{vc_name} -[VISUALIZES]-> Equipment:{equip_name}",
+                )
+                result.relationships.append(ExtractedRelationship(
+                    source_label="ViewComponent", source_name=vc_name,
+                    target_label="Equipment", target_name=equip_name,
+                    rel_type="VISUALIZES",
+                    evidence=[ev],
+                ))
+
+    def _process_relationships(
+        self, raw: Dict, result: ExtractionResult, base_ev: EvidenceItem
+    ) -> None:
+        allowed = set(PROCESS_RELATIONSHIPS.keys())
+        for rel in raw.get("relationships", []):
+            rel_type = rel.get("relationship", "")
+            if rel_type not in allowed:
+                continue
+
+            src_label = rel.get("source_type", "")
+            src_name = rel.get("source_name", "")
+            tgt_label = rel.get("target_type", "")
+            tgt_name = rel.get("target_name", "")
+
+            if not all([src_label, src_name, tgt_label, tgt_name]):
+                continue
+
+            src_resolved, _ = self.resolve_name(src_label, src_name)
+            tgt_resolved, _ = self.resolve_name(tgt_label, tgt_name)
+
+            ev = self._make_evidence(
+                base_ev,
+                source_excerpt=f"{src_label}:{src_resolved} -{rel_type}-> {tgt_label}:{tgt_resolved}",
+            )
+            result.relationships.append(ExtractedRelationship(
+                source_label=src_label, source_name=src_resolved,
+                target_label=tgt_label, target_name=tgt_resolved,
+                rel_type=rel_type,
+                evidence=[ev],
+            ))
diff --git a/scripts/artifact_models.py b/scripts/artifact_models.py
new file mode 100644
index 0000000..80e07a5
--- /dev/null
+++ b/scripts/artifact_models.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""
+Normalized extraction models for GPT-5.4 artifact ingestion.
+
+Defines the intermediate schema between raw GPT output and Neo4j writes.
+All extraction results are normalized into these dataclasses before any
+graph mutations happen.
+"""
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Any
+
+from process_semantics import EvidenceItem
+
+
+# ============================================================================
+# Extracted fact types
+# ============================================================================
+
+@dataclass
+class ExtractedNodeUpdate:
+    """An update to an existing ontology node extracted from a source."""
+    node_label: str                 # e.g. "Equipment", "AOI", "ScadaTag"
+    node_name: str                  # name used to MERGE/match
+    properties: Dict[str, Any] = field(default_factory=dict)
+    evidence: List[EvidenceItem] = field(default_factory=list)
+
+
+@dataclass
+class ExtractedRelationship:
+    """A relationship extracted between two entities."""
+    source_label: str
+    source_name: str
+    target_label: str
+    target_name: str
+    rel_type: str                   # e.g. "HANDLES_MEDIUM", "PERFORMS_OPERATION"
+    properties: Dict[str, Any] = field(default_factory=dict)
+    evidence: List[EvidenceItem] = field(default_factory=list)
+
+
+@dataclass
+class ExtractedProcessConcept:
+    """A new process-semantic concept to be induced."""
+    label: str                      # e.g. "ProcessMedium", "UnitOperation"
+    name: str
+    properties: Dict[str, Any] = field(default_factory=dict)
+    evidence: List[EvidenceItem] = field(default_factory=list)
+
+
+@dataclass
+class ExtractionResult:
+    """Complete normalized result from extracting one source artifact."""
+    source_file: str
+    source_kind: str                # "pid", "sop", "diagram"
+    node_updates: List[ExtractedNodeUpdate] = field(default_factory=list)
+    relationships: List[ExtractedRelationship] = field(default_factory=list)
+    process_concepts: List[ExtractedProcessConcept] = field(default_factory=list)
+    raw_mentions: List[Dict[str, Any]] = field(default_factory=list)
+    errors: List[str] = field(default_factory=list)
+
+
+# ============================================================================
+# GPT extraction prompt contract
+# ============================================================================
+
+EXTRACTION_RESPONSE_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "equipment_facts": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "equipment_name": {"type": "string"},
+                    "service": {"type": "string"},
+                    "function": {"type": "string"},
+                    "media_handled": {"type": "array", "items": {"type": "string"}},
+                    "operations_performed": {"type": "array", "items": {"type": "string"}},
+                    "operating_parameters": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "parameter": {"type": "string"},
+                                "unit": {"type": "string"},
+                                "normal_low": {"type": "number"},
+                                "normal_high": {"type": "number"},
+                                "alarm_low": {"type": "number"},
+                                "alarm_high": {"type": "number"},
+                                "trip_low": {"type": "number"},
+                                "trip_high": {"type": "number"},
+                            },
+                        },
+                    },
+                },
+            },
+        },
+        "tag_facts": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "tag_name": {"type": "string"},
+                    "measures": {"type": "string"},
+                    "process_context": {"type": "string"},
+                    "unit": {"type": "string"},
+                },
+            },
+        },
+        "process_media": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "category": {"type": "string"},
+                    "phase": {"type": "string"},
+                    "description": {"type": "string"},
+                },
+            },
+        },
+        "unit_operations": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "category": {"type": "string"},
+                    "description": {"type": "string"},
+                },
+            },
+        },
+        "chemical_species": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "category": {"type": "string"},
+                    "cas_number": {"type": "string"},
+                    "description": {"type": "string"},
+                },
+            },
+        },
+        "reactions": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "category": {"type": "string"},
+                    "description": {"type": "string"},
+                    "species_involved": {"type": "array", "items": {"type": "string"}},
+                },
+            },
+        },
+        "relationships": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "source_type": {"type": "string"},
+                    "source_name": {"type": "string"},
+                    "relationship": {"type": "string"},
+                    "target_type": {"type": "string"},
+                    "target_name": {"type": "string"},
+                },
+            },
+        },
+    },
+}
diff --git a/scripts/gpt54_client.py b/scripts/gpt54_client.py
new file mode 100644
index 0000000..2a8da18
--- /dev/null
+++ b/scripts/gpt54_client.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+"""
+GPT-5.4 multimodal client for artifact extraction.
+
+Handles image (P&ID, engineering diagram) and document (SOP PDF/text)
+understanding via the OpenAI API, producing structured JSON that feeds
+into the artifact normalization pipeline.
+"""
+
+import os
+import sys
+import json
+import base64
+import time
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+
+try:
+    from dotenv import load_dotenv
+except ImportError:
+    def load_dotenv(*_a, **_kw):
+        return False
+
+load_dotenv()
+
+
+class GPT54Client:
+    """
+    OpenAI GPT-5.4 client for multimodal artifact extraction.
+
+    Supports:
+    - Image inputs (P&IDs, engineering diagrams)
+    - Text/PDF inputs (SOPs, procedures)
+    - Structured JSON output via response_format
+    """
+
+    DEFAULT_MODEL = "gpt-5.4"
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+    ):
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "OPENAI_API_KEY not found. Set it in .env or pass api_key."
+            )
+        self.model = model or os.getenv("OPENAI_MODEL", self.DEFAULT_MODEL)
+
+        import openai
+        self.client = openai.OpenAI(api_key=self.api_key, timeout=300.0)
+
+    # ------------------------------------------------------------------
+    # Image encoding helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _encode_image(image_path: str) -> str:
+        with open(image_path, "rb") as f:
+            return base64.b64encode(f.read()).decode("utf-8")
+
+    @staticmethod
+    def _image_media_type(path: str) -> str:
+        ext = Path(path).suffix.lower()
+        return {
+            ".png": "image/png",
+            ".jpg": "image/jpeg",
+            ".jpeg": "image/jpeg",
+            ".gif": "image/gif",
+            ".webp": "image/webp",
+            ".bmp": "image/bmp",
+            ".tiff": "image/tiff",
+            ".tif": "image/tiff",
+        }.get(ext, "image/png")
+
+    # ------------------------------------------------------------------
+    # Core extraction methods
+    # ------------------------------------------------------------------
+
+    def extract_from_image(
+        self,
+        image_path: str,
+        source_kind: str = "pid",
+        existing_entities: Optional[Dict[str, List[str]]] = None,
+        verbose: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Extract structured process facts from an image (P&ID / diagram).
+
+        Args:
+            image_path: Path to the image file.
+            source_kind: "pid", "diagram", or "sop".
+            existing_entities: Dict mapping label -> list of known names
+                for entity linking hints.
+            verbose: Print debug output.
+
+        Returns:
+            Raw parsed JSON dict from GPT-5.4.
+        """
+        b64 = self._encode_image(image_path)
+        media = self._image_media_type(image_path)
+
+        system_prompt = self._build_system_prompt(source_kind, existing_entities)
+        user_content = [
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:{media};base64,{b64}",
+                    "detail": "high",
+                },
+            },
+            {
+                "type": "text",
+                "text": self._build_user_prompt(source_kind, image_path),
+            },
+        ]
+
+        return self._call(system_prompt, user_content, verbose=verbose)
+
+    def extract_from_text(
+        self,
+        text: str,
+        source_file: str = "",
+        source_kind: str = "sop",
+        existing_entities: Optional[Dict[str, List[str]]] = None,
+        verbose: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Extract structured process facts from text (SOP / procedure).
+
+        Args:
+            text: The document text content.
+            source_file: Original file path for reference.
+            source_kind: "sop", "procedure", or "manual".
+            existing_entities: Dict mapping label -> list of known names.
+            verbose: Print debug output.
+
+        Returns:
+            Raw parsed JSON dict from GPT-5.4.
+        """
+        system_prompt = self._build_system_prompt(source_kind, existing_entities)
+        user_content = [
+            {
+                "type": "text",
+                "text": self._build_user_prompt(source_kind, source_file)
+                    + f"\n\n--- DOCUMENT CONTENT ---\n{text[:60000]}",
+            },
+        ]
+
+        return self._call(system_prompt, user_content, verbose=verbose)
+
+    def resolve_entities(
+        self,
+        extracted_mentions: Dict[str, List[str]],
+        existing_entities: Dict[str, List[str]],
+        verbose: bool = False,
+    ) -> Dict[str, Dict[str, str]]:
+        """
+        Use GPT to match extracted entity mentions to existing graph nodes.
+
+        Args:
+            extracted_mentions: Dict mapping label -> list of names from extraction.
+                e.g. {"Equipment": ["Brew Kettle BK-001", "HX-200"], "ScadaTag": ["TT501"]}
+            existing_entities: Dict mapping label -> list of known names in graph.
+                e.g. {"Equipment": ["BK-001", "HX-200-A"], "ScadaTag": ["Area5/TT501.PV"]}
+
+        Returns:
+            Dict mapping label -> {extracted_name: resolved_name_or_null}.
+            resolved_name is the exact existing name if matched, or null if no match.
+        """
+        system_prompt = """You are an expert at matching industrial equipment and tag names across different naming conventions.
+
+You will receive two lists per entity type:
+- "extracted": names found in a P&ID, SOP, or engineering diagram
+- "existing": names already in the plant's ontology database
+
+Your job is to determine which extracted names refer to the same entity as which existing names.
+
+Industrial naming conventions vary: a P&ID might say "Brew Kettle BK-001" while the SCADA system has "BK-001" or "BK_001_BrewKettle". Tags like "TT-501" might appear in SCADA as "Area5/TT501.PV" or "TT_501_Temperature".
+
+ViewComponents are SCADA UI elements that visualize equipment. A ViewComponent whose name or path references an equipment ID likely VISUALIZES that equipment. When you see Equipment in the extracted list AND ViewComponent in the existing list, also return a "visualizes" key mapping equipment names to the ViewComponent names that display them.
+
+Rules:
+- Match based on tag numbers, equipment IDs, and functional identity -- not just substring overlap.
+- If an extracted name clearly refers to an existing entity, map it to the EXACT existing name.
+- If there is no plausible match, map it to null.
+- When in doubt, prefer no match over a wrong match.
+
+Return JSON:
+{
+  "Equipment": { "extracted_name": "existing_name_or_null", ... },
+  "ScadaTag": { "extracted_name": "existing_name_or_null", ... },
+  "visualizes": { "equipment_name": ["ViewComponent_name", ...], ... }
+}"""
+
+        user_parts = []
+        for label in extracted_mentions:
+            ext_list = extracted_mentions[label]
+            exist_list = existing_entities.get(label, [])
+            if not ext_list:
+                continue
+            user_parts.append(f"## {label}")
+            user_parts.append(f"Extracted: {json.dumps(ext_list)}")
+            user_parts.append(f"Existing:  {json.dumps(exist_list[:200])}")
+            user_parts.append("")
+
+        vc_list = existing_entities.get("ViewComponent", [])
+        if "Equipment" in extracted_mentions and vc_list:
+            user_parts.append("## ViewComponent (existing only — for VISUALIZES linking)")
+            user_parts.append(f"Existing:  {json.dumps(vc_list[:300])}")
+            user_parts.append("")
+
+        if not user_parts:
+            return {}
+
+        user_content = [{"type": "text", "text": "\n".join(user_parts)}]
+        result = self._call(system_prompt, user_content, verbose=verbose)
+
+        mappings: Dict[str, Any] = {}
+        for label, matches in result.items():
+            if isinstance(matches, dict):
+                mappings[label] = {
+                    k: v for k, v in matches.items()
+                    if isinstance(k, str)
+                }
+        return mappings
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _call(
+        self,
+        system_prompt: str,
+        user_content: List[Dict],
+        verbose: bool = False,
+    ) -> Dict[str, Any]:
+        """Make the actual API call and parse JSON response."""
+        if verbose:
+            print(f"[GPT54] Calling {self.model}...", file=sys.stderr, flush=True)
+
+        start = time.time()
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_content},
+            ],
+            max_completion_tokens=16000,
+            temperature=0.1,
+            response_format={"type": "json_object"},
+        )
+        elapsed = time.time() - start
+
+        text = response.choices[0].message.content or "{}"
+        if verbose:
+            tokens = response.usage
+            print(
+                f"[GPT54] Done in {elapsed:.1f}s "
+                f"(in={tokens.prompt_tokens}, out={tokens.completion_tokens})",
+                file=sys.stderr, flush=True,
+            )
+
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError:
+            return {"error": "Failed to parse GPT response as JSON", "raw": text[:2000]}
+
+    def _build_system_prompt(
+        self,
+        source_kind: str,
+        existing_entities: Optional[Dict[str, List[str]]] = None,
+    ) -> str:
+        entity_hint = ""
+        if existing_entities:
+            parts = []
+            for label, names in existing_entities.items():
+                sample = names[:30]
+                parts.append(f"  {label}: {json.dumps(sample)}")
+            entity_hint = (
+                "\n\nKnown entities already in the ontology (use these exact names when possible):\n"
+                + "\n".join(parts)
+            )
+
+        return f"""You are an expert process engineer analyzing industrial plant documentation.
+
+Your job is to extract structured facts from a {source_kind.upper()} source and return them as JSON.
+
+Extract ALL of the following when present:
+1. Equipment facts: names, services, functions, media handled, operations performed, operating parameters with ranges/limits.
+2. Tag/instrument facts: tag names, what they measure, units, process context.
+3. Process media: fluids, gases, utilities, products flowing through the system.
+4. Unit operations: what operations each piece of equipment performs (pumping, heating, mixing, filtration, etc.).
+5. Chemical species: chemicals, additives, reactants, products mentioned.
+6. Reactions: any chemical or physical transformations described.
+7. Relationships: connections between equipment, tags, media, operations, and species.
+
+Rules:
+- Use exact equipment/tag names from the source when possible.
+- When a name matches a known entity, use that exact name.
+- For operating parameters, extract numeric limits when available.
+- Classify media as: utility, product, waste, solvent, or gas.
+- Classify operations as: transfer, thermal, mixing, separation, cleaning, or reaction.
+- Return valid JSON matching the schema described below.
+{entity_hint}
+
+Response schema:
+{{
+  "equipment_facts": [{{
+    "equipment_name": "string",
+    "service": "string",
+    "function": "string",
+    "media_handled": ["string"],
+    "operations_performed": ["string"],
+    "operating_parameters": [{{
+      "parameter": "string",
+      "unit": "string",
+      "normal_low": number_or_null,
+      "normal_high": number_or_null,
+      "alarm_low": number_or_null,
+      "alarm_high": number_or_null,
+      "trip_low": number_or_null,
+      "trip_high": number_or_null
+    }}]
+  }}],
+  "tag_facts": [{{
+    "tag_name": "string",
+    "measures": "string",
+    "process_context": "string",
+    "unit": "string"
+  }}],
+  "process_media": [{{
+    "name": "string",
+    "category": "string",
+    "phase": "string",
+    "description": "string"
+  }}],
+  "unit_operations": [{{
+    "name": "string",
+    "category": "string",
+    "description": "string"
+  }}],
+  "chemical_species": [{{
+    "name": "string",
+    "category": "string",
+    "cas_number": "string",
+    "description": "string"
+  }}],
+  "reactions": [{{
+    "name": "string",
+    "category": "string",
+    "description": "string",
+    "species_involved": ["string"]
+  }}],
+  "relationships": [{{
+    "source_type": "string",
+    "source_name": "string",
+    "relationship": "string",
+    "target_type": "string",
+    "target_name": "string"
+  }}]
+}}"""
+
+    def _build_user_prompt(self, source_kind: str, source_path: str) -> str:
+        kind_labels = {
+            "pid": "P&ID (Piping and Instrumentation Diagram)",
+            "diagram": "engineering diagram",
+            "sop": "Standard Operating Procedure",
+            "procedure": "operating procedure",
+            "manual": "equipment manual",
+        }
+        label = kind_labels.get(source_kind, source_kind)
+        return (
+            f"Analyze this {label} and extract all process facts.\n"
+            f"Source file: {source_path}\n"
+            f"Return a single JSON object with all extracted facts."
+        )
diff --git a/scripts/graph_api.py b/scripts/graph_api.py
index e3bff45..4f89175 100644
--- a/scripts/graph_api.py
+++ b/scripts/graph_api.py
@@ -79,6 +79,13 @@ class GraphAPI:
         "vendor": "mes",
         "agentrun": "anomaly",
         "anomalyevent": "anomaly",
+        # Process-semantic layer
+        "processmedium": "process",
+        "unitoperation": "process",
+        "operatingenvelope": "process",
+        "physicalprinciple": "process",
+        "chemicalspecies": "process",
+        "reaction": "process",
     }
 
     # Color palette for node types
@@ -94,9 +101,40 @@ class GraphAPI:
         "overview": "#607D8B",
         "mes": "#00897B",
         "anomaly": "#F44336",
+        "process": "#00ACC1",
         "other": "#9E9E9E",
     }
 
+    # Schema-driven contract for the richer ontology.
+    # Maps label -> metadata for display, search, edit, and relationship rules.
+    NODE_LABEL_META = {
+        "AOI":               {"key": "name", "display": "name", "searchable": ["name", "purpose", "description"], "group": "plc"},
+        "Tag":               {"key": "name", "display": "name", "searchable": ["name", "description"],            "group": "plc"},
+        "UDT":               {"key": "name", "display": "name", "searchable": ["name", "purpose"],                "group": "scada"},
+        "Equipment":         {"key": "name", "display": "name", "searchable": ["name", "purpose", "type"],        "group": "scada"},
+        "View":              {"key": "name", "display": "name", "searchable": ["name", "purpose"],                "group": "scada"},
+        "ViewComponent":     {"key": "path", "display": "name", "searchable": ["path", "name", "purpose"],        "group": "scada-component"},
+        "ScadaTag":          {"key": "name", "display": "name", "searchable": ["name", "purpose"],                "group": "scada-tag"},
+        "Script":            {"key": "name", "display": "name", "searchable": ["name", "purpose"],                "group": "scada"},
+        "NamedQuery":        {"key": "name", "display": "name", "searchable": ["name", "purpose"],                "group": "scada"},
+        "FaultSymptom":      {"key": "symptom", "display": "symptom", "searchable": ["symptom"],                  "group": "troubleshooting"},
+        "FaultCause":        {"key": "cause",   "display": "cause",   "searchable": ["cause"],                    "group": "troubleshooting"},
+        "OperatorPhrase":    {"key": "phrase",  "display": "phrase",  "searchable": ["phrase"],                   "group": "troubleshooting"},
+        "Material":          {"key": "name", "display": "name", "searchable": ["name"],                           "group": "mes"},
+        "Batch":             {"key": "name", "display": "name", "searchable": ["name"],                           "group": "mes"},
+        "ProductionOrder":   {"key": "name", "display": "name", "searchable": ["name"],                           "group": "mes"},
+        "Operation":         {"key": "name", "display": "name", "searchable": ["name"],                           "group": "mes"},
+        "CriticalControlPoint": {"key": "name", "display": "name", "searchable": ["name"],                        "group": "mes"},
+        "ProcessMedium":     {"key": "name", "display": "name", "searchable": ["name", "description", "category"], "group": "process"},
+        "UnitOperation":     {"key": "name", "display": "name", "searchable": ["name", "description", "category"], "group": "process"},
+        "OperatingEnvelope": {"key": "name", "display": "name", "searchable": ["name", "parameter"],               "group": "process"},
+        "PhysicalPrinciple": {"key": "name", "display": "name", "searchable": ["name", "category"],                "group": "process"},
+        "ChemicalSpecies":   {"key": "name", "display": "name", "searchable": ["name", "category"],                "group": "process"},
+        "Reaction":          {"key": "name", "display": "name", "searchable": ["name", "category"],                "group": "process"},
+        "AgentRun":          {"key": "run_id", "display": "run_id", "searchable": ["run_id"],                      "group": "anomaly"},
+        "AnomalyEvent":      {"key": "event_id", "display": "summary", "searchable": ["summary", "event_id"],     "group": "anomaly"},
+    }
+
     def __init__(self, graph: Optional[OntologyGraph] = None):
         self._graph = graph
         self._owns_graph = False
@@ -150,7 +188,7 @@ def _format_edge(self, record: Dict) -> Dict:
     # =========================================================================
 
     def load_graph(
-        self, node_types: Optional[List[str]] = None, limit: int = 500
+        self, node_types: Optional[List[str]] = None, limit: int = 10000
     ) -> Dict:
         """
         Load graph data for visualization.
@@ -822,6 +860,8 @@ def get_schema(self) -> Dict:
             "nodeTypes": labels,
             "relationshipTypes": sorted(relationships),
             "groups": list(set(self.NODE_GROUPS.values())),
+            "labelMeta": self.NODE_LABEL_META,
+            "groupColors": self.NODE_COLORS,
         }
 
 
@@ -833,7 +873,7 @@ def main():
     # Load graph
     load_parser = subparsers.add_parser("load", help="Load graph data")
     load_parser.add_argument("--types", nargs="*", help="Node types to include")
-    load_parser.add_argument("--limit", type=int, default=500, help="Max nodes")
+    load_parser.add_argument("--limit", type=int, default=10000, help="Max nodes")
 
     # Get neighbors
     neighbors_parser = subparsers.add_parser("neighbors", help="Get node neighbors")
@@ -910,6 +950,15 @@ def main():
     # Schema
     subparsers.add_parser("schema", help="Get graph schema")
 
+    # Ingest artifact
+    ingest_parser = subparsers.add_parser("ingest-artifact", help="Ingest a P&ID/SOP/diagram")
+    ingest_parser.add_argument("file_path", help="Path to the artifact file")
+    ingest_parser.add_argument(
+        "--source-kind", default="pid",
+        choices=["pid", "sop", "diagram"],
+        help="Source type (default: pid)",
+    )
+
     args = parser.parse_args()
 
     if not args.command:
@@ -957,6 +1006,10 @@ def main():
             result = api.apply_batch(changes)
         elif args.command == "schema":
             result = api.get_schema()
+        elif args.command == "ingest-artifact":
+            from artifact_ingest import ArtifactIngester
+            ingester = ArtifactIngester(graph=api._get_graph(), verbose=True)
+            result = ingester.ingest_file(args.file_path, args.source_kind)
         else:
             output_error(f"Unknown command: {args.command}")
             return
diff --git a/scripts/incremental_analyzer.py b/scripts/incremental_analyzer.py
index 85894ea..30bd8ba 100644
--- a/scripts/incremental_analyzer.py
+++ b/scripts/incremental_analyzer.py
@@ -844,8 +844,38 @@ def _build_batch_context(
                     "hmi": item.get("hmi", ""),
                 }
 
+        # Inject process-semantic context when available
+        if item_type in ("Equipment", "AOI", "ScadaTag"):
+            context["process_semantics"] = self._get_process_context_for_items(
+                item_type, items
+            )
+
         return context
 
+    def _get_process_context_for_items(
+        self, item_type: str, items: List[Dict]
+    ) -> Dict[str, Any]:
+        """Fetch process-semantic context (media, operations, envelopes) for items."""
+        ctx: Dict[str, Any] = {}
+        for item in items:
+            name = item.get("name", "")
+            if not name:
+                continue
+            try:
+                if item_type == "Equipment":
+                    pctx = self._graph.get_process_context_for_equipment(name)
+                elif item_type == "ScadaTag":
+                    pctx = self._graph.get_process_context_for_tag(name)
+                elif item_type == "AOI":
+                    pctx = self._graph.get_process_context_for_equipment(name)
+                else:
+                    continue
+                if pctx and any(pctx.get(k) for k in pctx if k != "name"):
+                    ctx[name] = pctx
+            except Exception:
+                pass
+        return ctx
+
     def _get_system_prompt(self, item_type: str) -> str:
         """Get the system prompt for analyzing a specific item type."""
         base = """You are an expert in industrial automation and SCADA systems, specializing in Ignition by Inductive Automation.
@@ -998,6 +1028,27 @@ def _get_user_prompt(
                 if ctx.get("udts"):
                     parts.append(f"Displays UDTs: {ctx['udts']}")
 
+            # Add process-semantic context if available
+            process_ctx = context.get("process_semantics", {}).get(name, {})
+            if process_ctx:
+                if process_ctx.get("media"):
+                    parts.append(f"Handles media: {process_ctx['media']}")
+                if process_ctx.get("operations"):
+                    parts.append(f"Performs operations: {process_ctx['operations']}")
+                if process_ctx.get("envelopes"):
+                    envs = process_ctx["envelopes"]
+                    env_strs = []
+                    for e in envs:
+                        if isinstance(e, dict) and e.get("name"):
+                            rng = f"{e.get('normal_low', '?')}–{e.get('normal_high', '?')} {e.get('unit', '')}"
+                            env_strs.append(f"{e['parameter']}: {rng}")
+                    if env_strs:
+                        parts.append(f"Operating envelopes: {', '.join(env_strs)}")
+                if process_ctx.get("measures"):
+                    parts.append(f"Measures: {process_ctx['measures']}")
+                if process_ctx.get("controlled_operations"):
+                    parts.append(f"Controls operations: {process_ctx['controlled_operations']}")
+
             parts.append("")
 
         parts.append(
diff --git a/scripts/neo4j_ontology.py b/scripts/neo4j_ontology.py
index 92f6258..f2a9efc 100644
--- a/scripts/neo4j_ontology.py
+++ b/scripts/neo4j_ontology.py
@@ -95,6 +95,29 @@ class OntologyGraph:
     - HAS_SCREEN: HMIDevice -> HMIScreen
     - HAS_TEXT_LIST: HMIDevice -> HMITextList
     - MONITORS_TAG: HMIAlarm -> PLCTag
+
+    Process-Semantic Node Types:
+    - ProcessMedium: A material/utility stream (water, steam, product, etc.)
+    - UnitOperation: A canonical plant operation (pumping, heating, mixing, etc.)
+    - OperatingEnvelope: Normal ranges, alarm bands, trip windows for a parameter
+    - PhysicalPrinciple: A measurable physical quantity (temperature, pressure, flow)
+    - ChemicalSpecies: A chemical substance involved in plant processes
+    - Reaction: A chemical or physical transformation step
+
+    Process-Semantic Relationship Types:
+    - HANDLES_MEDIUM: Equipment -> ProcessMedium
+    - PERFORMS_OPERATION: Equipment -> UnitOperation
+    - HAS_OPERATING_ENVELOPE: Equipment -> OperatingEnvelope
+    - MEASURES: ScadaTag -> PhysicalPrinciple
+    - MONITORS_ENVELOPE: ScadaTag -> OperatingEnvelope
+    - IMPLEMENTS_CONTROL_OF: AOI -> UnitOperation
+    - USES_PRINCIPLE: UnitOperation -> PhysicalPrinciple
+    - INVOLVES_SPECIES: Reaction -> ChemicalSpecies
+    - PROCESSES_SPECIES: UnitOperation -> ChemicalSpecies
+    - HAS_REACTION: UnitOperation -> Reaction
+    - MEDIUM_CONTAINS: ProcessMedium -> ChemicalSpecies
+    - ENVELOPE_FOR_PRINCIPLE: OperatingEnvelope -> PhysicalPrinciple
+    - VISUALIZES: ViewComponent -> Equipment
     """
 
     def __init__(self, config: Optional[Neo4jConfig] = None):
@@ -200,6 +223,13 @@ def create_indexes(self) -> None:
                 "CREATE INDEX anomalyevent_state IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.state)",
                 "CREATE INDEX anomalyevent_severity IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.severity)",
                 "CREATE INDEX anomalyevent_dedup_key IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.dedup_key)",
+                # Process-semantic layer indexes
+                "CREATE INDEX processmedium_name IF NOT EXISTS FOR (pm:ProcessMedium) ON (pm.name)",
+                "CREATE INDEX unitoperation_name IF NOT EXISTS FOR (uo:UnitOperation) ON (uo.name)",
+                "CREATE INDEX operatingenvelope_name IF NOT EXISTS FOR (oe:OperatingEnvelope) ON (oe.name)",
+                "CREATE INDEX physicalprinciple_name IF NOT EXISTS FOR (pp:PhysicalPrinciple) ON (pp.name)",
+                "CREATE INDEX chemicalspecies_name IF NOT EXISTS FOR (cs:ChemicalSpecies) ON (cs.name)",
+                "CREATE INDEX reaction_name IF NOT EXISTS FOR (rx:Reaction) ON (rx.name)",
             ]
 
             for constraint in constraints:
@@ -2990,6 +3020,280 @@ def get_item_with_context(
             context = {k: v for k, v in dict(record).items() if k != "item"}
             return {"item": item_data, "context": context}
 
+    # =========================================================================
+    # Process-Semantic Layer Write Helpers
+    # =========================================================================
+
+    def create_process_medium(
+        self, name: str, category: str = "", phase: str = "",
+        description: str = "", purpose: str = "",
+        evidence_json: str = "",
+    ) -> str:
+        """Create or merge a ProcessMedium node with provenance."""
+        with self.session() as session:
+            session.run(
+                """
+                MERGE (n:ProcessMedium {name: $name})
+                SET n.category = COALESCE(n.category, $category),
+                    n.phase = COALESCE(n.phase, $phase),
+                    n.description = COALESCE(n.description, $description),
+                    n.purpose = COALESCE(n.purpose, $purpose),
+                    n.evidence_items = CASE
+                        WHEN $ev = '' THEN n.evidence_items
+                        WHEN n.evidence_items IS NULL THEN $ev
+                        ELSE n.evidence_items + $ev
+                    END,
+                    n.last_evidence_at = datetime()
+                """,
+                {"name": name, "category": category, "phase": phase,
+                 "description": description, "purpose": purpose, "ev": evidence_json},
+            )
+        return name
+
+    def create_unit_operation(
+        self, name: str, category: str = "",
+        description: str = "", purpose: str = "",
+        evidence_json: str = "",
+    ) -> str:
+        """Create or merge a UnitOperation node with provenance."""
+        with self.session() as session:
+            session.run(
+                """
+                MERGE (n:UnitOperation {name: $name})
+                SET n.category = COALESCE(n.category, $category),
+                    n.description = COALESCE(n.description, $description),
+                    n.purpose = COALESCE(n.purpose, $purpose),
+                    n.evidence_items = CASE
+                        WHEN $ev = '' THEN n.evidence_items
+                        WHEN n.evidence_items IS NULL THEN $ev
+                        ELSE n.evidence_items + $ev
+                    END,
+                    n.last_evidence_at = datetime()
+                """,
+                {"name": name, "category": category,
+                 "description": description, "purpose": purpose, "ev": evidence_json},
+            )
+        return name
+
+    def create_operating_envelope(
+        self, name: str, parameter: str = "", unit: str = "",
+        low_limit: float = None, low_warning: float = None,
+        normal_low: float = None, normal_high: float = None,
+        high_warning: float = None, high_limit: float = None,
+        trip_low: float = None, trip_high: float = None,
+        description: str = "", evidence_json: str = "",
+    ) -> str:
+        """Create or merge an OperatingEnvelope node with provenance."""
+        with self.session() as session:
+            session.run(
+                """
+                MERGE (n:OperatingEnvelope {name: $name})
+                SET n.parameter = COALESCE(n.parameter, $parameter),
+                    n.unit = COALESCE(n.unit, $unit),
+                    n.description = COALESCE(n.description, $description),
+                    n.evidence_items = CASE
+                        WHEN $ev = '' THEN n.evidence_items
+                        WHEN n.evidence_items IS NULL THEN $ev
+                        ELSE n.evidence_items + $ev
+                    END,
+                    n.last_evidence_at = datetime()
+                FOREACH (_ IN CASE WHEN $low_limit IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.low_limit = $low_limit)
+                FOREACH (_ IN CASE WHEN $low_warning IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.low_warning = $low_warning)
+                FOREACH (_ IN CASE WHEN $normal_low IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.normal_low = $normal_low)
+                FOREACH (_ IN CASE WHEN $normal_high IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.normal_high = $normal_high)
+                FOREACH (_ IN CASE WHEN $high_warning IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.high_warning = $high_warning)
+                FOREACH (_ IN CASE WHEN $high_limit IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.high_limit = $high_limit)
+                FOREACH (_ IN CASE WHEN $trip_low IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.trip_low = $trip_low)
+                FOREACH (_ IN CASE WHEN $trip_high IS NOT NULL THEN [1] ELSE [] END |
+                    SET n.trip_high = $trip_high)
+                """,
+                {"name": name, "parameter": parameter, "unit": unit,
+                 "description": description, "ev": evidence_json,
+                 "low_limit": low_limit, "low_warning": low_warning,
+                 "normal_low": normal_low, "normal_high": normal_high,
+                 "high_warning": high_warning, "high_limit": high_limit,
+                 "trip_low": trip_low, "trip_high": trip_high},
+            )
+        return name
+
+    def create_physical_principle(
+        self, name: str, category: str = "", unit_family: str = "",
+        description: str = "", evidence_json: str = "",
+    ) -> str:
+        """Create or merge a PhysicalPrinciple node with provenance."""
+        with self.session() as session:
+            session.run(
+                """
+                MERGE (n:PhysicalPrinciple {name: $name})
+                SET n.category = COALESCE(n.category, $category),
+                    n.unit_family = COALESCE(n.unit_family, $unit_family),
+                    n.description = COALESCE(n.description, $description),
+                    n.evidence_items = CASE
+                        WHEN $ev = '' THEN n.evidence_items
+                        WHEN n.evidence_items IS NULL THEN $ev
+                        ELSE n.evidence_items + $ev
+                    END,
+                    n.last_evidence_at = datetime()
+                """,
+                {"name": name, "category": category,
+                 "unit_family": unit_family, "description": description,
+                 "ev": evidence_json},
+            )
+        return name
+
+    def create_chemical_species(
+        self, name: str, category: str = "", cas_number: str = "",
+        molecular_formula: str = "", description: str = "",
+        evidence_json: str = "",
+    ) -> str:
+        """Create or merge a ChemicalSpecies node with provenance."""
+        with self.session() as session:
+            session.run(
+                """
+                MERGE (n:ChemicalSpecies {name: $name})
+                SET n.category = COALESCE(n.category, $category),
+                    n.cas_number = COALESCE(n.cas_number, $cas_number),
+                    n.molecular_formula = COALESCE(n.molecular_formula, $molecular_formula),
+                    n.description = COALESCE(n.description, $description),
+                    n.evidence_items = CASE
+                        WHEN $ev = '' THEN n.evidence_items
+                        WHEN n.evidence_items IS NULL THEN $ev
+                        ELSE n.evidence_items + $ev
+                    END,
+                    n.last_evidence_at = datetime()
+                """,
+                {"name": name, "category": category,
+                 "cas_number": cas_number, "molecular_formula": molecular_formula,
+                 "description": description, "ev": evidence_json},
+            )
+        return name
+
+    def create_reaction(
+        self, name: str, category: str = "", description: str = "",
+        conditions: str = "", evidence_json: str = "",
+    ) -> str:
+        """Create or merge a Reaction node with provenance."""
+        with self.session() as session:
+            session.run(
+                """
+                MERGE (n:Reaction {name: $name})
+                SET n.category = COALESCE(n.category, $category),
+                    n.description = COALESCE(n.description, $description),
+                    n.conditions = COALESCE(n.conditions, $conditions),
+                    n.evidence_items = CASE
+                        WHEN $ev = '' THEN n.evidence_items
+                        WHEN n.evidence_items IS NULL THEN $ev
+                        ELSE n.evidence_items + $ev
+                    END,
+                    n.last_evidence_at = datetime()
+                """,
+                {"name": name, "category": category,
+                 "description": description, "conditions": conditions,
+                 "ev": evidence_json},
+            )
+        return name
+
+    def create_process_relationship(
+        self, source_label: str, source_name: str,
+        target_label: str, target_name: str,
+        rel_type: str, evidence_json: str = "",
+        properties: dict = None,
+    ) -> bool:
+        """Create a process-semantic relationship with provenance.
+
+        Only allows relationship types defined in PROCESS_RELATIONSHIPS.
+        Returns True if the relationship was created/updated.
+        """
+        from process_semantics import PROCESS_RELATIONSHIPS
+        if rel_type not in PROCESS_RELATIONSHIPS:
+            return False
+
+        prop_sets = ""
+        params = {
+            "src_name": source_name,
+            "tgt_name": target_name,
+            "ev": evidence_json,
+        }
+        if properties:
+            for k, v in properties.items():
+                param_key = f"prop_{k}"
+                prop_sets += f", r.{k} = ${param_key}"
+                params[param_key] = v
+
+        with self.session() as session:
+            session.run(
+                f"""
+                MATCH (src:{source_label} {{name: $src_name}})
+                MATCH (tgt:{target_label} {{name: $tgt_name}})
+                MERGE (src)-[r:{rel_type}]->(tgt)
+                SET r.evidence_items = CASE
+                        WHEN $ev = '' THEN r.evidence_items
+                        WHEN r.evidence_items IS NULL THEN $ev
+                        ELSE r.evidence_items + $ev
+                    END,
+                    r.last_evidence_at = datetime(){prop_sets}
+                """,
+                params,
+            )
+        return True
+
+    def get_process_context_for_equipment(self, equipment_name: str) -> Dict:
+        """Get process-semantic context for an equipment node.
+
+        Returns media handled, operations performed, operating envelopes,
+        and connected tags with their physical principles.
+        """
+        with self.session() as session:
+            result = session.run(
+                """
+                MATCH (e:Equipment {name: $name})
+                OPTIONAL MATCH (e)-[:HANDLES_MEDIUM]->(pm:ProcessMedium)
+                OPTIONAL MATCH (e)-[:PERFORMS_OPERATION]->(uo:UnitOperation)
+                OPTIONAL MATCH (e)-[:HAS_OPERATING_ENVELOPE]->(oe:OperatingEnvelope)
+                OPTIONAL MATCH (e)<-[:MAPS_TO_SCADA]-(a:AOI)-[:IMPLEMENTS_CONTROL_OF]->(uo2:UnitOperation)
+                RETURN e.name AS name,
+                       collect(DISTINCT pm.name) AS media,
+                       collect(DISTINCT uo.name) AS operations,
+                       collect(DISTINCT {name: oe.name, parameter: oe.parameter,
+                                         normal_low: oe.normal_low, normal_high: oe.normal_high,
+                                         unit: oe.unit}) AS envelopes,
+                       collect(DISTINCT uo2.name) AS controlled_operations
+                """,
+                {"name": equipment_name},
+            )
+            record = result.single()
+            if not record:
+                return {}
+            return dict(record)
+
+    def get_process_context_for_tag(self, tag_name: str) -> Dict:
+        """Get process-semantic context for a SCADA tag."""
+        with self.session() as session:
+            result = session.run(
+                """
+                MATCH (t:ScadaTag {name: $name})
+                OPTIONAL MATCH (t)-[:MEASURES]->(pp:PhysicalPrinciple)
+                OPTIONAL MATCH (t)-[:MONITORS_ENVELOPE]->(oe:OperatingEnvelope)
+                RETURN t.name AS name,
+                       collect(DISTINCT pp.name) AS measures,
+                       collect(DISTINCT {name: oe.name, parameter: oe.parameter,
+                                         normal_low: oe.normal_low, normal_high: oe.normal_high,
+                                         unit: oe.unit}) AS envelopes
+                """,
+                {"name": tag_name},
+            )
+            record = result.single()
+            if not record:
+                return {}
+            return dict(record)
+
     # =========================================================================
     # Query Operations
     # =========================================================================
diff --git a/scripts/process_semantics.py b/scripts/process_semantics.py
new file mode 100644
index 0000000..efc1ca3
--- /dev/null
+++ b/scripts/process_semantics.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Process-semantics layer for the PLC/SCADA ontology.
+
+Defines canonical node types for physics, chemistry, and operating constraints
+that can be induced from both PLC/SCADA structure and external documents
+(P&IDs, SOPs, engineering diagrams).
+
+Node types:
+  ProcessMedium, UnitOperation, OperatingEnvelope,
+  PhysicalPrinciple, ChemicalSpecies, Reaction
+
+All write helpers follow the same MERGE-based pattern as the existing ontology
+and attach provenance metadata to every asserted fact.
+"""
+
+import json
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass, field, asdict
+from datetime import datetime
+
+
+# ============================================================================
+# Provenance metadata contract
+# ============================================================================
+
+@dataclass
+class EvidenceItem:
+    """Single piece of evidence supporting a graph fact."""
+    source_file: str = ""
+    source_kind: str = ""          # "pid", "sop", "diagram", "plc", "scada"
+    source_page: Optional[int] = None
+    source_region: str = ""        # bounding-box or section id
+    source_excerpt: str = ""       # verbatim snippet
+    extraction_model: str = ""     # "gpt-5.4", "claude-sonnet", "deterministic"
+    extraction_method: str = ""    # "vision", "text", "structured_parse"
+    confidence: float = 1.0
+    extracted_at: str = ""
+
+    def __post_init__(self):
+        if not self.extracted_at:
+            self.extracted_at = datetime.utcnow().isoformat()
+
+
+def evidence_to_json(items: List[EvidenceItem]) -> str:
+    return json.dumps([asdict(e) for e in items])
+
+
+def merge_evidence(existing_json: Optional[str], new_items: List[EvidenceItem]) -> str:
+    """Append new evidence items to an existing JSON array (append-only)."""
+    existing: list = []
+    if existing_json:
+        try:
+            existing = json.loads(existing_json)
+        except (json.JSONDecodeError, TypeError):
+            existing = []
+    existing.extend([asdict(e) for e in new_items])
+    return json.dumps(existing)
+
+
+# ============================================================================
+# Canonical process-semantic schemas
+# ============================================================================
+
+PROCESS_NODE_SCHEMAS: Dict[str, Dict[str, Any]] = {
+    "ProcessMedium": {
+        "key_property": "name",
+        "properties": {
+            "name": "str",           # e.g. "Steam", "CIP-Caustic", "Product-A"
+            "category": "str",       # "utility", "product", "waste", "solvent", "gas"
+            "phase": "str",          # "liquid", "gas", "solid", "mixed"
+            "description": "str",
+            "purpose": "str",
+        },
+        "description": "A material or utility stream handled by plant equipment.",
+    },
+    "UnitOperation": {
+        "key_property": "name",
+        "properties": {
+            "name": "str",           # e.g. "Pumping", "CIP", "Heating"
+            "category": "str",       # "transfer", "thermal", "mixing", "separation", "cleaning", "reaction"
+            "description": "str",
+            "purpose": "str",
+        },
+        "description": "A canonical plant operation such as pumping, mixing, or filtration.",
+    },
+    "OperatingEnvelope": {
+        "key_property": "name",
+        "properties": {
+            "name": "str",           # e.g. "BR-500-001/Temperature"
+            "parameter": "str",      # "temperature", "pressure", "flow", "level", "pH"
+            "unit": "str",           # "degC", "bar", "L/min"
+            "low_limit": "float",
+            "low_warning": "float",
+            "normal_low": "float",
+            "normal_high": "float",
+            "high_warning": "float",
+            "high_limit": "float",
+            "trip_low": "float",
+            "trip_high": "float",
+            "description": "str",
+        },
+        "description": "Normal ranges, alarm bands, and trip windows for a measured parameter.",
+    },
+    "PhysicalPrinciple": {
+        "key_property": "name",
+        "properties": {
+            "name": "str",           # e.g. "Temperature", "Pressure", "Flow"
+            "category": "str",       # "thermal", "fluid", "electrical", "mechanical", "analytical"
+            "unit_family": "str",    # "temperature", "pressure", "volumetric_flow", etc.
+            "description": "str",
+        },
+        "description": "A measurable physical quantity relevant to process control.",
+    },
+    "ChemicalSpecies": {
+        "key_property": "name",
+        "properties": {
+            "name": "str",           # e.g. "NaOH", "Ethanol", "Product-X"
+            "cas_number": "str",
+            "category": "str",       # "reactant", "product", "byproduct", "additive", "cleaning_agent"
+            "molecular_formula": "str",
+            "description": "str",
+        },
+        "description": "A specific chemical substance involved in plant processes.",
+    },
+    "Reaction": {
+        "key_property": "name",
+        "properties": {
+            "name": "str",           # e.g. "Neutralization-CIP", "Fermentation-Stage1"
+            "category": "str",       # "neutralization", "fermentation", "oxidation", "blending", etc.
+            "description": "str",
+            "conditions": "str",     # brief summary of required conditions
+        },
+        "description": "A chemical or physical transformation step in the process.",
+    },
+}
+
+# Allowed relationship types for the process layer
+PROCESS_RELATIONSHIPS: Dict[str, Dict[str, str]] = {
+    "HANDLES_MEDIUM":        {"from": "Equipment",        "to": "ProcessMedium"},
+    "PERFORMS_OPERATION":     {"from": "Equipment",        "to": "UnitOperation"},
+    "HAS_OPERATING_ENVELOPE":{"from": "Equipment",        "to": "OperatingEnvelope"},
+    "MEASURES":              {"from": "ScadaTag",          "to": "PhysicalPrinciple"},
+    "MONITORS_ENVELOPE":     {"from": "ScadaTag",          "to": "OperatingEnvelope"},
+    "IMPLEMENTS_CONTROL_OF": {"from": "AOI",               "to": "UnitOperation"},
+    "USES_PRINCIPLE":        {"from": "UnitOperation",     "to": "PhysicalPrinciple"},
+    "INVOLVES_SPECIES":      {"from": "Reaction",          "to": "ChemicalSpecies"},
+    "PROCESSES_SPECIES":     {"from": "UnitOperation",     "to": "ChemicalSpecies"},
+    "HAS_REACTION":          {"from": "UnitOperation",     "to": "Reaction"},
+    "MEDIUM_CONTAINS":       {"from": "ProcessMedium",     "to": "ChemicalSpecies"},
+    "ENVELOPE_FOR_PRINCIPLE":{"from": "OperatingEnvelope", "to": "PhysicalPrinciple"},
+    "VISUALIZES":            {"from": "ViewComponent",     "to": "Equipment"},
+}
+
+
+# ============================================================================
+# Schema metadata for graph API / UI contract
+# ============================================================================
+
+PROCESS_LABEL_META: Dict[str, Dict[str, Any]] = {
+    label: {
+        "key_property": schema["key_property"],
+        "display_property": "name",
+        "searchable_properties": ["name", "description", "purpose"] if "purpose" in schema["properties"] else ["name", "description"],
+        "editable_properties": list(schema["properties"].keys()),
+        "group": "process",
+        "description": schema["description"],
+    }
+    for label, schema in PROCESS_NODE_SCHEMAS.items()
+}