diff --git a/apps/cli/src/commands/eval/commands/prompt/accessors.ts b/apps/cli/src/commands/eval/commands/prompt/accessors.ts
deleted file mode 100644
index aeb45e44c..000000000
--- a/apps/cli/src/commands/eval/commands/prompt/accessors.ts
+++ /dev/null
@@ -1,236 +0,0 @@
-import type { EvaluatorConfig, JsonObject, TestMessage } from '@agentv/core';
-import { loadTestById, loadTests } from '@agentv/core';
-
-import { findRepoRoot } from '../../shared.js';
-
-interface PromptEvalInputResult {
-  readonly test_id: string;
-  readonly input: readonly JsonObject[];
-  readonly criteria: string;
-}
-
-interface PromptEvalExpectedOutputResult {
-  readonly test_id: string;
-  readonly criteria: string;
-  readonly expected_output: readonly JsonObject[];
-  readonly reference_answer?: string;
-  readonly assertions: readonly EvaluatorConfig[];
-}
-
-interface PromptEvalListResult {
-  readonly eval_path: string;
-  readonly test_ids: readonly string[];
-}
-
-export async function listPromptEvalTestIds(evalPath: string): Promise<PromptEvalListResult> {
-  const repoRoot = await findRepoRoot(process.cwd());
-  const tests = await loadTests(evalPath, repoRoot);
-
-  return {
-    eval_path: evalPath,
-    test_ids: tests.map((test) => test.id).sort(),
-  };
-}
-
-export async function getPromptEvalInput(
-  evalPath: string,
-  testId: string,
-): Promise<PromptEvalInputResult> {
-  const repoRoot = await findRepoRoot(process.cwd());
-  const evalCase = await loadTestById(evalPath, repoRoot, testId);
-  const fileMap = buildFileMap(evalCase.input, evalCase.file_paths);
-
-  return {
-    test_id: evalCase.id,
-    input: resolveMessages(evalCase.input, fileMap),
-    criteria: evalCase.criteria,
-  };
-}
-
-export async function getPromptEvalExpectedOutput(
-  evalPath: string,
-  testId: string,
-): Promise<PromptEvalExpectedOutputResult> {
-  const repoRoot = await findRepoRoot(process.cwd());
-  const evalCase = await loadTestById(evalPath, repoRoot, testId);
-
-  return {
-    test_id: evalCase.id,
-    criteria: evalCase.criteria,
-    expected_output: evalCase.expected_output,
-    reference_answer: evalCase.reference_answer,
-    assertions: evalCase.assertions ?? [],
-  };
-}
-
-export async function getPromptEvalGradingBrief(evalPath: string, testId: string): Promise<string> {
-  const repoRoot = await findRepoRoot(process.cwd());
-  const evalCase = await loadTestById(evalPath, repoRoot, testId);
-  const fileMap = buildFileMap(evalCase.input, evalCase.file_paths);
-  const resolvedInput = resolveMessages(evalCase.input, fileMap);
-
-  const lines: string[] = [];
-
-  // Input
-  const inputText = extractTextFromMessages(resolvedInput);
-  if (inputText) {
-    lines.push(`Input: "${inputText}"`);
-  }
-
-  // Files
-  if (evalCase.file_paths.length > 0) {
-    lines.push(`Files: ${evalCase.file_paths.join(', ')}`);
-  }
-
-  // Expected output
-  if (evalCase.reference_answer) {
-    lines.push(`Expected: "${evalCase.reference_answer}"`);
-  }
-
-  // Criteria
-  const criteria: string[] = [];
-  if (evalCase.criteria) {
-    criteria.push(evalCase.criteria);
-  }
-  for (const assertion of evalCase.assertions ?? []) {
-    const entry = assertion as Record<string, unknown>;
-    const type = entry.type as string | undefined;
-    const bag = (entry.config as Record<string, unknown>) ?? {};
-    if (type === 'contains') {
-      criteria.push(`Output contains '${entry.value}'`);
-    } else if (type === 'rubrics') {
-      const items = (entry.criteria ?? bag.criteria) as Array<{ outcome?: string }> | undefined;
-      if (Array.isArray(items)) {
-        for (const item of items) {
-          if (item.outcome) criteria.push(item.outcome);
-        }
-      }
-    } else if (type === 'llm-grader' || type === 'llm_grader') {
-      const prompt = entry.prompt ?? bag.prompt ?? bag.criteria;
-      criteria.push(`[llm-grader] ${typeof prompt === 'string' ? prompt : ''}`);
-    } else if (type === 'code-grader' || type === 'code_grader') {
-      const name = entry.name ?? type;
-      const desc = bag.description ?? entry.description;
-      criteria.push(`[code-grader] ${name}${desc ? `: ${desc}` : ''}`);
-    } else if (type === 'skill-trigger') {
-      const trigger = entry.should_trigger !== false;
-      criteria.push(`[skill-trigger] should_trigger: ${trigger} for ${entry.skill}`);
-    } else if (type) {
-      criteria.push(`[${type}] ${entry.value ?? bag.criteria ?? bag.prompt ?? ''}`);
-    }
-  }
-
-  if (criteria.length > 0) {
-    lines.push('Criteria:');
-    for (const c of criteria) {
-      lines.push(`  - ${c}`);
-    }
-  }
-
-  return lines.join('\n');
-}
-
-function extractTextFromMessages(messages: JsonObject[]): string {
-  for (const msg of messages) {
-    if (msg.role !== 'user') continue;
-    if (typeof msg.content === 'string') return msg.content;
-    if (Array.isArray(msg.content)) {
-      const textBlocks = (msg.content as JsonObject[])
-        .filter((b) => b.type === 'text')
-        .map((b) => b.value as string);
-      if (textBlocks.length > 0) return textBlocks.join(' ');
-    }
-  }
-  return '';
-}
-
-/**
- * Build a mapping from relative file names to resolved absolute paths.
- * Uses enriched input file segments as the primary source, then falls back
- * to suffix-matching against all file_paths.
- */
-function buildFileMap(
-  inputMessages: readonly TestMessage[],
-  allFilePaths: readonly string[],
-): Map<string, string> {
-  const map = new Map<string, string>();
-
-  for (const message of inputMessages) {
-    if (!Array.isArray(message.content)) {
-      continue;
-    }
-
-    for (const segment of message.content) {
-      registerResolvedFileSegment(map, segment);
-    }
-  }
-
-  // Fall back to suffix-matching against file_paths
-  return {
-    get(key: string): string | undefined {
-      const direct = map.get(key);
-      if (direct) return direct;
-      return allFilePaths.find((filePath) => filePath.endsWith(`/${key}`) || filePath === key);
-    },
-    has(key: string): boolean {
-      return this.get(key) !== undefined;
-    },
-  } as Map<string, string>;
-}
-
-function registerResolvedFileSegment(map: Map<string, string>, segment: JsonObject): void {
-  if (segment.type !== 'file' || typeof segment.resolvedPath !== 'string') {
-    return;
-  }
-
-  // `value` is the authored file reference from the eval. `path` is the
-  // normalized display/reference path attached during parsing. Usually they are
-  // the same, but both are valid lookup aliases for downstream prompt tooling.
-  const aliases = [segment.value, segment.path].filter(
-    (alias): alias is string => typeof alias === 'string',
-  );
-
-  for (const alias of aliases) {
-    map.set(alias, segment.resolvedPath);
-  }
-}
-
-/**
- * Resolve file references in messages, replacing relative values with absolute paths.
- * The agent can then read these files directly from the filesystem.
- */
-function resolveMessages(
-  messages: readonly TestMessage[],
-  fileMap: Map<string, string>,
-): JsonObject[] {
-  return messages.map((message) => {
-    if (typeof message.content === 'string') {
-      return { role: message.role, content: message.content } as JsonObject;
-    }
-
-    if (!Array.isArray(message.content)) {
-      return { role: message.role, content: message.content } as JsonObject;
-    }
-
-    const resolvedContent: JsonObject[] = [];
-    for (const segment of message.content) {
-      if (typeof segment === 'string') {
-        resolvedContent.push({ type: 'text', value: segment } as JsonObject);
-        continue;
-      }
-
-      const obj = segment as JsonObject;
-      if (obj.type === 'file' && typeof obj.value === 'string') {
-        const resolved = fileMap.get(obj.value);
-        resolvedContent.push({
-          type: 'file',
-          path: resolved ?? obj.value,
-        } as JsonObject);
-      } else {
-        resolvedContent.push(obj);
-      }
-    }
-
-    return { role: message.role, content: resolvedContent } as JsonObject;
-  });
-}
diff --git a/apps/cli/src/commands/eval/commands/prompt/index.ts b/apps/cli/src/commands/eval/commands/prompt/index.ts
deleted file mode 100644
index 570b2aa07..000000000
--- a/apps/cli/src/commands/eval/commands/prompt/index.ts
+++ /dev/null
@@ -1,81 +0,0 @@
-import { command, flag, option, optional, positional, string, subcommands } from 'cmd-ts';
-
-import {
-  getPromptEvalExpectedOutput,
-  getPromptEvalGradingBrief,
-  getPromptEvalInput,
-  listPromptEvalTestIds,
-} from './accessors.js';
-
-export const evalPromptEvalSubcommand = command({
-  name: 'eval',
-  description: 'Extract eval prompt data for agents',
-  args: {
-    list: flag({
-      long: 'list',
-      description: 'List available test IDs',
-    }),
-    input: flag({
-      long: 'input',
-      description: 'Extract the test input payload for a single test',
-    }),
-    expectedOutput: flag({
-      long: 'expected-output',
-      description: 'Extract expected output and grading context for a single test',
-    }),
-    gradingBrief: flag({
-      long: 'grading-brief',
-      description: 'Output human-readable grading brief with typed criteria',
-    }),
-    testId: option({
-      type: optional(string),
-      long: 'test-id',
-      description: 'Test ID (required for --input and --expected-output)',
-    }),
-    evalPath: positional({
-      type: string,
-      displayName: 'eval-path',
-      description: 'Path to evaluation .yaml, .json, or .jsonl file',
-    }),
-  },
-  handler: async ({ evalPath, expectedOutput, gradingBrief, input, list, testId }) => {
-    const selectedModes = [list, input, expectedOutput, gradingBrief].filter(Boolean).length;
-    if (selectedModes !== 1) {
-      throw new Error(
-        'Specify exactly one of --list, --input, --expected-output, or --grading-brief.',
-      );
-    }
-
-    if (gradingBrief) {
-      if (!testId) {
-        throw new Error('--test-id is required with --grading-brief.');
-      }
-      const brief = await getPromptEvalGradingBrief(evalPath, testId);
-      process.stdout.write(brief);
-      process.stdout.write('\n');
-      return;
-    }
-
-    if ((input || expectedOutput) && !testId) {
-      throw new Error('--test-id is required with --input and --expected-output.');
-    }
-
-    const requiredTestId = testId ?? '';
-    const output = list
-      ? await listPromptEvalTestIds(evalPath)
-      : input
-        ? await getPromptEvalInput(evalPath, requiredTestId)
-        : await getPromptEvalExpectedOutput(evalPath, requiredTestId);
-
-    process.stdout.write(JSON.stringify(output, null, 2));
-    process.stdout.write('\n');
-  },
-});
-
-export const evalPromptCommand = subcommands({
-  name: 'prompt',
-  description: 'Prompt commands',
-  cmds: {
-    eval: evalPromptEvalSubcommand,
-  },
-});
diff --git a/apps/cli/src/commands/eval/index.ts b/apps/cli/src/commands/eval/index.ts
index e9a3a991a..305590d1f 100644
--- a/apps/cli/src/commands/eval/index.ts
+++ b/apps/cli/src/commands/eval/index.ts
@@ -1,7 +1,6 @@
 import { subcommands } from 'cmd-ts';
 
 import { evalAssertCommand } from './commands/assert.js';
-import { evalPromptCommand } from './commands/prompt/index.js';
 import { evalRunCommand } from './commands/run.js';
 
 export const evalCommand = subcommands({
@@ -9,7 +8,6 @@ export const evalCommand = subcommands({
   description: 'Evaluation commands',
   cmds: {
     run: evalRunCommand,
-    prompt: evalPromptCommand,
     assert: evalAssertCommand,
   },
 });
diff --git a/apps/cli/src/commands/import/claude.ts b/apps/cli/src/commands/import/claude.ts
new file mode 100644
index 000000000..5664d1afe
--- /dev/null
+++ b/apps/cli/src/commands/import/claude.ts
@@ -0,0 +1,149 @@
+import { mkdir, writeFile } from 'node:fs/promises';
+import path from 'node:path';
+import { discoverClaudeSessions, parseClaudeSession, readTranscriptFile } from '@agentv/core';
+import { command, flag, option, optional, string } from 'cmd-ts';
+
+export const importClaudeCommand = command({
+  name: 'claude',
+  description: 'Import a Claude Code session transcript for offline grading',
+  args: {
+    sessionId: option({
+      type: optional(string),
+      long: 'session-id',
+      description: 'UUID of the Claude Code session to import',
+    }),
+    discover: option({
+      type: optional(string),
+      long: 'discover',
+      description: 'Discovery mode: "latest" to import the most recent session',
+    }),
+    projectPath: option({
+      type: optional(string),
+      long: 'project-path',
+      description: 'Filter sessions by project path',
+    }),
+    output: option({
+      type: optional(string),
+      long: 'output',
+      short: 'o',
+      description:
+        'Output file path (default: .agentv/transcripts/claude-<session-id-short>.jsonl)',
+    }),
+    projectsDir: option({
+      type: optional(string),
+      long: 'projects-dir',
+      description: 'Override the default ~/.claude/projects directory',
+    }),
+    list: flag({
+      long: 'list',
+      description: 'List available sessions instead of importing',
+    }),
+  },
+  handler: async ({ sessionId, discover, projectPath, output, projectsDir, list }) => {
+    if (list) {
+      const sessions = await discoverClaudeSessions({
+        projectPath,
+        projectsDir,
+        limit: 20,
+      });
+
+      if (sessions.length === 0) {
+        console.log('No Claude Code sessions found.');
+        return;
+      }
+
+      console.log(`Found ${sessions.length} session(s):\n`);
+      for (const session of sessions) {
+        const age = formatAge(session.updatedAt);
+        console.log(`  ${session.sessionId}  ${age}  ${session.projectDir}`);
+      }
+      return;
+    }
+
+    // Determine which session to import
+    let sessionFilePath: string;
+
+    if (sessionId) {
+      const sessions = await discoverClaudeSessions({
+        sessionId,
+        projectPath,
+        projectsDir,
+        limit: 1,
+      });
+
+      if (sessions.length === 0) {
+        console.error(`Error: session ${sessionId} not found.`);
+        process.exit(1);
+      }
+      sessionFilePath = sessions[0].filePath;
+    } else if (discover === 'latest') {
+      const sessions = await discoverClaudeSessions({
+        projectPath,
+        projectsDir,
+        latest: true,
+      });
+
+      if (sessions.length === 0) {
+        console.error('Error: no Claude Code sessions found.');
+        process.exit(1);
+      }
+      sessionFilePath = sessions[0].filePath;
+      sessionId = sessions[0].sessionId;
+      console.log(`Discovered latest session: ${sessionId}`);
+    } else {
+      console.error('Error: specify --session-id <uuid> or --discover latest to select a session.');
+      process.exit(1);
+    }
+
+    // Parse the session
+    const rawJsonl = await readTranscriptFile(sessionFilePath);
+    const transcript = parseClaudeSession(rawJsonl);
+
+    // Determine output path
+    const shortId = (sessionId ?? transcript.source.sessionId).slice(0, 8);
+    const outputPath = output ?? path.join('.agentv', 'transcripts', `claude-${shortId}.jsonl`);
+
+    // Ensure output directory exists
+    await mkdir(path.dirname(outputPath), { recursive: true });
+
+    // Write transcript as JSONL (one message per line)
+    const outputLines = transcript.messages.map((msg) => JSON.stringify(msg));
+    await writeFile(outputPath, `${outputLines.join('\n')}\n`, 'utf8');
+
+    const msgCount = transcript.messages.length;
+    const toolCount = transcript.messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0);
+
+    console.log(`Imported ${msgCount} messages (${toolCount} tool calls) → ${outputPath}`);
+
+    if (transcript.source.model) {
+      console.log(`  Model: ${transcript.source.model}`);
+    }
+    if (transcript.durationMs !== undefined) {
+      console.log(`  Duration: ${formatDurationMs(transcript.durationMs)}`);
+    }
+    if (transcript.tokenUsage) {
+      console.log(
+        `  Tokens: ${transcript.tokenUsage.input} in / ${transcript.tokenUsage.output} out`,
+      );
+    }
+  },
+});
+
+function formatAge(date: Date): string {
+  const diffMs = Date.now() - date.getTime();
+  const diffMin = Math.floor(diffMs / 60_000);
+  if (diffMin < 60) return `${diffMin}m ago`;
+  const diffHours = Math.floor(diffMin / 60);
+  if (diffHours < 24) return `${diffHours}h ago`;
+  const diffDays = Math.floor(diffHours / 24);
+  return `${diffDays}d ago`;
+}
+
+function formatDurationMs(ms: number): string {
+  if (ms < 1000) return `${ms}ms`;
+  const seconds = Math.floor(ms / 1000);
+  if (seconds < 60) return `${seconds}s`;
+  const minutes = Math.floor(seconds / 60);
+  const remainingSeconds = seconds % 60;
+  return `${minutes}m ${remainingSeconds}s`;
+}
diff --git a/apps/cli/src/commands/import/index.ts b/apps/cli/src/commands/import/index.ts
new file mode 100644
index 000000000..d76ddcaf0
--- /dev/null
+++ b/apps/cli/src/commands/import/index.ts
@@ -0,0 +1,11 @@
+import { subcommands } from 'cmd-ts';
+
+import { importClaudeCommand } from './claude.js';
+
+export const importCommand = subcommands({
+  name: 'import',
+  description: 'Import agent session transcripts for offline grading',
+  cmds: {
+    claude: importClaudeCommand,
+  },
+});
diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts
index 9a2990307..6ae8fbecb 100644
--- a/apps/cli/src/index.ts
+++ b/apps/cli/src/index.ts
@@ -4,8 +4,8 @@ import packageJson from '../package.json' with { type: 'json' };
 import { compareCommand } from './commands/compare/index.js';
 import { convertCommand } from './commands/convert/index.js';
 import { createCommand } from './commands/create/index.js';
-import { evalPromptCommand } from './commands/eval/commands/prompt/index.js';
 import { evalCommand } from './commands/eval/index.js';
+import { importCommand } from './commands/import/index.js';
 import { initCmdTsCommand } from './commands/init/index.js';
 import { pipelineCommand } from './commands/pipeline/index.js';
 import { resultsCommand } from './commands/results/index.js';
@@ -24,7 +24,7 @@ export const app = subcommands({
   version: packageJson.version,
   cmds: {
     eval: evalCommand,
-    prompt: evalPromptCommand,
+    import: importCommand,
     compare: compareCommand,
     convert: convertCommand,
     create: createCommand,
@@ -45,14 +45,14 @@ export const app = subcommands({
  * Known eval subcommand names — used to decide whether to inject the
  * implicit `run` subcommand for backward-compatible `agentv eval <paths>`.
  */
-const EVAL_SUBCOMMANDS = new Set(['run', 'prompt', 'assert']);
+const EVAL_SUBCOMMANDS = new Set(['run', 'assert']);
 
 /**
  * Top-level CLI command names (excluding `eval` itself).
- * Used to distinguish `agentv eval …` from `agentv prompt eval …`.
+ * Used to ensure `eval` is the top-level subcommand, not nested.
  */
 const TOP_LEVEL_COMMANDS = new Set([
-  'prompt',
+  'import',
   'compare',
   'convert',
   'create',
@@ -89,13 +89,12 @@ export function preprocessArgv(argv: string[]): string[] {
   // Implicit `run` subcommand: `agentv eval <arg>` → `agentv eval run <arg>`
   // when the first arg after `eval` is not a known eval subcommand.
   // This preserves backward compatibility now that `eval` is a subcommands group.
-  // Only applies when `eval` is the top-level subcommand, NOT when it appears
-  // inside another command (e.g. `agentv prompt eval …`).
+  // Only applies when `eval` is the top-level subcommand.
   // Exception: `--help` / `-h` should show the eval group help, not run's help.
   const evalIdx = result.indexOf('eval');
   if (evalIdx !== -1) {
     // Ensure no top-level command appears before `eval` in the argv —
-    // if one does, `eval` is a nested subcommand (e.g. `prompt eval`).
+    // if one does, `eval` is a nested subcommand.
     const isTopLevel = !result.slice(0, evalIdx).some((arg) => TOP_LEVEL_COMMANDS.has(arg));
     if (isTopLevel) {
       const nextArg = result[evalIdx + 1];
diff --git a/apps/cli/test/prompt-eval.integration.test.ts b/apps/cli/test/prompt-eval.integration.test.ts
deleted file mode 100644
index 77495c51b..000000000
--- a/apps/cli/test/prompt-eval.integration.test.ts
+++ /dev/null
@@ -1,168 +0,0 @@
-import { describe, expect, it } from 'bun:test';
-import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
-import { tmpdir } from 'node:os';
-import path from 'node:path';
-import { fileURLToPath } from 'node:url';
-import { execa } from 'execa';
-
-import { assertCoreBuild } from './setup-core-build.js';
-
-assertCoreBuild();
-
-interface PromptEvalFixture {
-  readonly baseDir: string;
-  readonly suiteDir: string;
-  readonly evalPath: string;
-}
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = path.dirname(__filename);
-const projectRoot = path.resolve(__dirname, '../../..');
-const CLI_ENTRY = path.join(projectRoot, 'apps/cli/src/cli.ts');
-
-async function createFixture(): Promise<PromptEvalFixture> {
-  const baseDir = await mkdtemp(path.join(tmpdir(), 'agentv-prompt-eval-'));
-  const suiteDir = path.join(baseDir, 'suite');
-  await mkdir(suiteDir, { recursive: true });
-
-  const evalPath = path.join(suiteDir, 'sample.eval.yaml');
-  await writeFile(
-    evalPath,
-    `description: Prompt eval CLI fixture
-
-tests:
-  - id: greeting-test
-    criteria: Assistant greets the user by name
-    assertions:
-      - name: mentions-name
-        type: contains
-        value: Taylor
-    input:
-      - role: user
-        content: Say hello to Taylor.
-    expected_output:
-      - role: assistant
-        content: Hello, Taylor!
-  - id: farewell-test
-    criteria: Assistant says goodbye politely
-    input:
-      - role: user
-        content: Say goodbye to Taylor.
-    expected_output:
-      - role: assistant
-        content: Goodbye, Taylor.
-`,
-    'utf8',
-  );
-
-  return { baseDir, suiteDir, evalPath } satisfies PromptEvalFixture;
-}
-
-async function runPromptCli(
-  fixture: PromptEvalFixture,
-  args: readonly string[],
-): Promise<{ stdout: string; stderr: string; exitCode: number }> {
-  const result = await execa('bun', ['--no-env-file', CLI_ENTRY, ...args], {
-    cwd: fixture.suiteDir,
-    env: {
-      ...process.env,
-      CI: 'true',
-    },
-    reject: false,
-  });
-
-  return {
-    stdout: result.stdout,
-    stderr: result.stderr,
-    exitCode: result.exitCode ?? 0,
-  };
-}
-
-describe('agentv prompt eval CLI', () => {
-  it('lists available test IDs', async () => {
-    const fixture = await createFixture();
-    try {
-      const result = await runPromptCli(fixture, ['prompt', 'eval', '--list', fixture.evalPath]);
-
-      expect(result.exitCode).toBe(0);
-      expect(JSON.parse(result.stdout)).toEqual({
-        eval_path: fixture.evalPath,
-        test_ids: ['farewell-test', 'greeting-test'],
-      });
-    } finally {
-      await rm(fixture.baseDir, { recursive: true, force: true });
-    }
-  });
-
-  it('returns prompt input for a specific test via --input', async () => {
-    const fixture = await createFixture();
-    try {
-      const result = await runPromptCli(fixture, [
-        'prompt',
-        'eval',
-        '--input',
-        fixture.evalPath,
-        '--test-id',
-        'greeting-test',
-      ]);
-
-      expect(result.exitCode).toBe(0);
-      expect(JSON.parse(result.stdout)).toEqual({
-        test_id: 'greeting-test',
-        input: [{ role: 'user', content: 'Say hello to Taylor.' }],
-        criteria: 'Assistant greets the user by name',
-      });
-    } finally {
-      await rm(fixture.baseDir, { recursive: true, force: true });
-    }
-  });
-
-  it('returns human-readable grading brief via --grading-brief', async () => {
-    const fixture = await createFixture();
-    try {
-      const result = await runPromptCli(fixture, [
-        'prompt',
-        'eval',
-        '--grading-brief',
-        fixture.evalPath,
-        '--test-id',
-        'greeting-test',
-      ]);
-
-      expect(result.exitCode).toBe(0);
-      expect(result.stdout).toContain('Input:');
-      expect(result.stdout).toContain('Say hello to Taylor.');
-      expect(result.stdout).toContain('Expected:');
-      expect(result.stdout).toContain('Hello, Taylor!');
-      expect(result.stdout).toContain('Criteria:');
-      expect(result.stdout).toContain('Taylor');
-    } finally {
-      await rm(fixture.baseDir, { recursive: true, force: true });
-    }
-  });
-
-  it('returns expected output and evaluator context for a specific test', async () => {
-    const fixture = await createFixture();
-    try {
-      const result = await runPromptCli(fixture, [
-        'prompt',
-        'eval',
-        '--expected-output',
-        fixture.evalPath,
-        '--test-id',
-        'greeting-test',
-      ]);
-
-      expect(result.exitCode).toBe(0);
-      expect(JSON.parse(result.stdout)).toEqual({
-        test_id: 'greeting-test',
-        criteria: 'Assistant greets the user by name',
-        expected_output: [{ role: 'assistant', content: 'Hello, Taylor!' }],
-        reference_answer: 'Hello, Taylor!',
-        assertions: [{ name: 'mentions-name', type: 'contains', value: 'Taylor' }],
-      });
-    } finally {
-      await rm(fixture.baseDir, { recursive: true, force: true });
-    }
-  });
-});
diff --git a/apps/cli/test/unit/preprocess-argv.test.ts b/apps/cli/test/unit/preprocess-argv.test.ts
index 116667d9d..d91c31bc3 100644
--- a/apps/cli/test/unit/preprocess-argv.test.ts
+++ b/apps/cli/test/unit/preprocess-argv.test.ts
@@ -3,79 +3,6 @@ import { describe, expect, it } from 'bun:test';
 import { preprocessArgv } from '../../src/index.js';
 
 describe('preprocessArgv', () => {
-  describe('prompt default subcommand insertion', () => {
-    it('does not rewrite `prompt` commands without explicit subcommands', () => {
-      const argv = ['node', 'agentv', 'prompt', 'file.yaml'];
-      expect(preprocessArgv(argv)).toEqual(argv);
-    });
-
-    it('does not rewrite bare `prompt` commands', () => {
-      const argv = ['node', 'agentv', 'prompt'];
-      expect(preprocessArgv(argv)).toEqual(argv);
-    });
-
-    it('does not insert a default accessor after `prompt eval` when followed by a file', () => {
-      const result = preprocessArgv(['node', 'agentv', 'prompt', 'eval', 'file.yaml']);
-      expect(result).toEqual(['node', 'agentv', 'prompt', 'eval', 'file.yaml']);
-    });
-
-    it('does not insert a default accessor when `prompt eval` has no further arguments', () => {
-      const argv = ['node', 'agentv', 'prompt', 'eval'];
-      expect(preprocessArgv(argv)).toEqual(argv);
-    });
-
-    it('passes through `prompt eval --input` with flags', () => {
-      const result = preprocessArgv([
-        'node',
-        'agentv',
-        'prompt',
-        'eval',
-        '--input',
-        'file.yaml',
-        '--test-id',
-        'case-1',
-      ]);
-      expect(result).toEqual([
-        'node',
-        'agentv',
-        'prompt',
-        'eval',
-        '--input',
-        'file.yaml',
-        '--test-id',
-        'case-1',
-      ]);
-    });
-
-    it('passes through `prompt eval --expected-output` with flags', () => {
-      const result = preprocessArgv([
-        'node',
-        'agentv',
-        'prompt',
-        'eval',
-        '--expected-output',
-        'file.yaml',
-        '--test-id',
-        'case-1',
-      ]);
-      expect(result).toEqual([
-        'node',
-        'agentv',
-        'prompt',
-        'eval',
-        '--expected-output',
-        'file.yaml',
-        '--test-id',
-        'case-1',
-      ]);
-    });
-
-    it('passes through `prompt eval --list`', () => {
-      const argv = ['node', 'agentv', 'prompt', 'eval', '--list', 'file.yaml'];
-      expect(preprocessArgv(argv)).toEqual(argv);
-    });
-  });
-
   describe('--eval-id convenience alias', () => {
     it('rewrites `--eval-id` → `--test-id`', () => {
       const result = preprocessArgv(['node', 'agentv', 'eval', 'file.yaml', '--eval-id', 'case-1']);
@@ -86,29 +13,6 @@ describe('preprocessArgv', () => {
       const result = preprocessArgv(['node', 'agentv', 'eval', 'file.yaml', '--eval-id=case-1']);
       expect(result).toEqual(['node', 'agentv', 'eval', 'run', 'file.yaml', '--test-id=case-1']);
     });
-
-    it('rewrites `--eval-id` in prompt commands', () => {
-      const result = preprocessArgv([
-        'node',
-        'agentv',
-        'prompt',
-        'eval',
-        'input',
-        'file.yaml',
-        '--eval-id',
-        'case-1',
-      ]);
-      expect(result).toEqual([
-        'node',
-        'agentv',
-        'prompt',
-        'eval',
-        'input',
-        'file.yaml',
-        '--test-id',
-        'case-1',
-      ]);
-    });
   });
 
   describe('eval implicit run subcommand', () => {
diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
index 08f93c66c..0388223d9 100644
--- a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
+++ b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -301,70 +301,19 @@ The `--file` option reads a JSON file with `{ "output": "...", "input": "..." }`
 
 This is the same interface that agent-orchestrated evals use — the EVAL.yaml transpiler emits `assertions` instructions for code graders so external grading agents can execute them directly.
 
-## Agent-Orchestrated Evals
+## Offline Grading
 
-Run evaluations without API keys by letting an external agent (e.g., Claude Code, Copilot CLI) orchestrate the eval pipeline.
-
-### Overview
-
-```bash
-agentv eval prompt eval --list evals/my-eval.yaml
-```
-
-Returns JSON listing the available `test_ids` for the eval file.
-
-### Get Task Input
+Grade existing agent sessions without re-running them. Import a transcript, then run deterministic evaluators:
 
 ```bash
-agentv eval prompt eval --input evals/my-eval.yaml --test-id case-123
-```
-
-Returns JSON with:
-- `input` — `[{role, content}]` array. File references use absolute paths (`{type: "file", path: "/abs/path"}`) that the agent can read directly from the filesystem.
-- `criteria` — grading criteria for the orchestrator's reference (do not pass to the candidate).
-
-### Get Grading Context
+# Import a Claude Code session
+agentv import claude --discover latest
 
-```bash
-agentv eval prompt eval --expected-output evals/my-eval.yaml --test-id case-123
+# Run evaluators against the imported transcript
+agentv eval evals/my-eval.yaml --transcript .agentv/transcripts/claude-<id>.jsonl
 ```
 
-Returns JSON with the data an external grader needs:
-
-- `expected_output` — reference assistant messages
-- `reference_answer` — flattened reference text when available
-- `criteria` — high-level success criteria
-- `assertions` — evaluator configs for the test
-
-### Get Grading Brief
-
-Output a human-readable summary of the grading criteria for a specific test, with type-prefixed assertion tags:
-
-```bash
-agentv eval prompt eval --grading-brief evals/my-eval.yaml --test-id case-123
-```
-
-Example output:
-
-```
-Input: "Summarise the following article in one sentence."
-Expected: "The quick brown fox jumps over the lazy dog near the river bank."
-Criteria:
-  - [code-grader] rouge-score: Measures n-gram recall and F1
-  - [llm-grader] Summary captures key points
-  - [skill-trigger] should_trigger: true for summariser
-```
-
-This is useful for agents orchestrating evals to understand what criteria a test is evaluated against before running it.
-
-### When to Use
-
-| Scenario | Command |
-|----------|---------|
-| Have API keys, want end-to-end automation | `agentv eval` |
-| Run a single assertion in isolation | `agentv eval assert <name>` |
-| No API keys, external agent can orchestrate the run | `agentv eval prompt eval --list/--input/--expected-output` |
-| Inspect grading criteria before running | `agentv eval prompt eval --grading-brief` |
+See the [Import tool docs](/docs/tools/import/) for all providers and options.
 
 ## Version Requirements
 
diff --git a/apps/web/src/content/docs/docs/evaluators/code-graders.mdx b/apps/web/src/content/docs/docs/evaluators/code-graders.mdx
index b62200f65..3befe3581 100644
--- a/apps/web/src/content/docs/docs/evaluators/code-graders.mdx
+++ b/apps/web/src/content/docs/docs/evaluators/code-graders.mdx
@@ -14,10 +14,10 @@ Code graders communicate via stdin/stdout JSON:
 **Input (stdin):**
 ```json
 {
-  "input_text": "What is 15 + 27?",
+  "input": "What is 15 + 27?",
   "criteria": "Correctly calculates 15 + 27 = 42",
-  "output_text": "The answer is 42.",
-  "expected_output_text": "42"
+  "output": "The answer is 42.",
+  "expected_output": "42"
 }
 
 **Output (stdout):**
@@ -41,7 +41,7 @@ Code graders communicate via stdin/stdout JSON:
 # validators/check_answer.py
 import json, sys
 data = json.load(sys.stdin)
-output_text = data.get("output_text", "")
+output_text = data.get("output", "")
 
 assertions = []
 
@@ -66,7 +66,7 @@ print(json.dumps({
 import { readFileSync } from "fs";
 
 const data = JSON.parse(readFileSync("/dev/stdin", "utf-8"));
-const outputText: string = data.output_text ?? "";
+const outputText: string = data.output ?? "";
 
 const assertions: Array<{ text: string; passed: boolean }> = [];
 
@@ -102,7 +102,8 @@ The `@agentv/eval` package provides a declarative API with automatic stdin/stdou
 #!/usr/bin/env bun
 import { defineCodeGrader } from '@agentv/eval';
 
-export default defineCodeGrader(({ outputText, criteria }) => {
+export default defineCodeGrader(({ output, criteria }) => {
+  const outputText = output?.[0]?.content ?? '';
   const assertions: Array<{ text: string; passed: boolean }> = [];
 
   if (outputText.includes(criteria)) {
@@ -146,7 +147,9 @@ Use `createTargetClient` from the SDK:
 #!/usr/bin/env bun
 import { createTargetClient, defineCodeGrader } from '@agentv/eval';
 
-export default defineCodeGrader(async ({ inputText, outputText }) => {
+export default defineCodeGrader(async ({ input, output }) => {
+  const inputText = input?.[0]?.content ?? '';
+  const outputText = output?.[0]?.content ?? '';
   const target = createTargetClient();
   if (!target) return { score: 0, assertions: [{ text: 'Target not configured', passed: false }] };
 
@@ -171,14 +174,14 @@ Use `target.invokeBatch(requests)` for multiple calls in parallel.
 
 ## Advanced Input Fields
 
-Beyond the basic text fields (`input_text`, `output_text`, `expected_output_text`, `criteria`), code graders receive additional structured context:
+Beyond the basic text fields (`input`, `output`, `expected_output`, `criteria`), code graders receive additional structured context:
 
 | Field | Type | Description |
 |-------|------|-------------|
+| `input` | `string \| Message[]` | Input text or full resolved input message array |
+| `output` | `string \| Message[]` | Agent output text or full execution trace with tool calls |
+| `expected_output` | `string \| Message[]` | Expected output text or expected agent behavior including tool calls |
 | `input_files` | `string[]` | Paths to input files referenced in the eval |
-| `input` | `Message[]` | Full resolved input message array |
-| `expected_output` | `Message[]` | Expected agent behavior including tool calls |
-| `output` | `Message[]` | Actual agent execution trace with tool calls |
 | `trace` | `TraceSummary` | Lightweight execution metrics (tool calls, errors) |
 | `token_usage` | `{input, output}` | Token consumption |
 | `cost_usd` | `number` | Estimated cost in USD |
@@ -293,7 +296,7 @@ agentv eval assert rouge-score --file result.json
 
 The command:
 1. Discovers the grader script by walking up directories looking for `.agentv/graders/<name>.{ts,js,mts,mjs}`
-2. Passes `{ output_text, output, input, input_text }` to the script via stdin
+2. Passes `{ output, input, criteria }` to the script via stdin
 3. Prints the grader's JSON result to stdout
 4. Exits 0 if score >= 0.5, exit 1 otherwise
 
@@ -304,5 +307,5 @@ This is the same interface that agent-orchestrated evals use — the EVAL.yaml t
 Pipe JSON directly to the grader script for full control:
 
 ```bash
-echo '{"input_text":"What is 2+2?","criteria":"4","output_text":"4","expected_output_text":"4"}' | python validators/check_answer.py
+echo '{"input":"What is 2+2?","criteria":"4","output":"4","expected_output":"4"}' | python validators/check_answer.py
 ```
diff --git a/apps/web/src/content/docs/docs/guides/agent-skills-evals.mdx b/apps/web/src/content/docs/docs/guides/agent-skills-evals.mdx
index e279fc4cd..f5a1d1ac1 100644
--- a/apps/web/src/content/docs/docs/guides/agent-skills-evals.mdx
+++ b/apps/web/src/content/docs/docs/guides/agent-skills-evals.mdx
@@ -79,23 +79,18 @@ The `files[]` field lists files that the agent needs during evaluation. Paths ar
 
 AgentV resolves these paths and copies the files into the workspace before the agent runs. If a file is missing, the test case fails with a `file_copy_error`.
 
-## Agent mode (no API keys)
+## Offline grading (no API keys)
 
-AgentV's prompt subcommands work with evals.json, enabling agent-mode evaluation without API keys:
+Grade existing agent sessions offline using `agentv import` to convert transcripts, then run deterministic evaluators:
 
 ```bash
-# List test IDs
-agentv prompt eval --list evals.json
+# Import a Claude Code session transcript
+agentv import claude --discover latest
 
-# Get input for a specific test
-agentv prompt eval --input evals.json --test-id 1
-
-# Get expected output and evaluator criteria for a test
-agentv prompt eval --expected-output evals.json --test-id 1
+# Run deterministic evaluators against the imported transcript
+agentv eval evals.json --target copilot-log
 ```
 
-In agent mode, the host agent uses these accessors to enumerate tests, act as the candidate, and then grade the saved answer against the eval spec.
-
 If you're using the `agentv-bench` skill bundle, validate your evals before running:
 
 ```bash
diff --git a/apps/web/src/content/docs/docs/guides/skill-improvement-workflow.mdx b/apps/web/src/content/docs/docs/guides/skill-improvement-workflow.mdx
index 53f937e32..40db69514 100644
--- a/apps/web/src/content/docs/docs/guides/skill-improvement-workflow.mdx
+++ b/apps/web/src/content/docs/docs/guides/skill-improvement-workflow.mdx
@@ -119,31 +119,17 @@ Run the same evaluation **with** the skill loaded:
 agentv eval evals.json --target candidate
 ```
 
-Or if using agent mode (no API keys required):
+Or grade existing sessions offline (no API keys required):
 
 ```bash
-# List available test cases
-agentv prompt eval --list evals.json
+# Import a Claude Code session transcript
+agentv import claude --discover latest
 
-# Get the input prompt for a test case
-agentv prompt eval --input evals.json --test-id 1
-
-# After running the agent, fetch expected output and evaluator criteria
-agentv prompt eval --expected-output evals.json --test-id 1
+# Run deterministic evaluators against the imported transcript
+agentv eval evals.json --target copilot-log
 ```
 
-Agent mode is useful when you want to evaluate skills with agents that don't have a direct API integration — you orchestrate the run yourself and use AgentV's accessor commands to read the eval spec.
-
-If you're using the `agentv-bench` skill bundle, the equivalent wrappers are:
-
-```bash
-cd plugins/agentv-dev/skills/agentv-bench
-bun install
-bun scripts/quick-validate.ts --scope wrappers
-bun scripts/prompt-eval.ts --list evals.json
-bun scripts/prompt-eval.ts --input evals.json --test-id 1
-bun scripts/prompt-eval.ts --expected-output evals.json --test-id 1
-```
+Offline grading is useful when you want to evaluate skills with agents that don't have a direct API integration — import the session transcript and run deterministic evaluators.
 
 ## Step 4: Compare Results
 
diff --git a/apps/web/src/content/docs/docs/tools/import.mdx b/apps/web/src/content/docs/docs/tools/import.mdx
new file mode 100644
index 000000000..bc5029fca
--- /dev/null
+++ b/apps/web/src/content/docs/docs/tools/import.mdx
@@ -0,0 +1,111 @@
+---
+title: Import
+description: Import agent session transcripts for offline grading
+sidebar:
+  order: 3
+---
+
+The `import` command converts agent session transcripts into AgentV's `Message[]` format for offline grading — no re-running the agent, no API keys needed.
+
+## Supported Providers
+
+| Provider | Command | Source |
+|----------|---------|--------|
+| Claude Code | `agentv import claude` | `~/.claude/projects/<path>/<uuid>.jsonl` |
+
+Codex and Copilot importers are planned for future releases.
+
+## `import claude`
+
+Import a Claude Code session transcript.
+
+### Discover available sessions
+
+```bash
+agentv import claude --list
+```
+
+Output:
+
+```
+Found 5 session(s):
+
+  4c4f9e4e-e6f1-490b-a1b1-9aef543ebf22  2m ago  -home-user-myproject
+  087b801a-7a63-48ff-b348-62563a290b23  1h ago  -home-user-myproject
+  ed8b8c62-4414-49fb-8739-006d809c8588  3h ago  -home-user-other-project
+```
+
+### Import latest session
+
+```bash
+agentv import claude --discover latest
+```
+
+### Import a specific session
+
+```bash
+agentv import claude --session-id 4c4f9e4e-e6f1-490b-a1b1-9aef543ebf22
+```
+
+### Filter by project path
+
+```bash
+agentv import claude --discover latest --project-path /home/user/myproject
+```
+
+### Custom output path
+
+```bash
+agentv import claude --discover latest -o transcripts/my-session.jsonl
+```
+
+Default output: `.agentv/transcripts/claude-<session-id-short>.jsonl`
+
+## Options
+
+| Flag | Description |
+|------|-------------|
+| `--session-id <uuid>` | Import a specific session by UUID |
+| `--discover latest` | Import the most recent session |
+| `--project-path <path>` | Filter sessions by project path |
+| `--output, -o <path>` | Custom output file path |
+| `--projects-dir <dir>` | Override `~/.claude/projects` directory |
+| `--list` | List available sessions instead of importing |
+
+## Output Format
+
+The imported transcript is written as JSONL — one `Message` object per line:
+
+```json
+{"role":"user","content":"Fix the bug in auth.ts"}
+{"role":"assistant","content":"I'll fix the authentication bug.","toolCalls":[{"tool":"Read","input":{"file_path":"src/auth.ts"},"id":"toolu_01...","output":"...file contents..."}]}
+```
+
+Each message follows AgentV's standard `Message` interface with `role`, `content`, and optional `toolCalls` (including tool outputs paired from subsequent events).
+
+## What Gets Parsed
+
+| Claude Event | AgentV Message |
+|-------------|----------------|
+| `user` | `{ role: 'user', content }` |
+| `assistant` | `{ role: 'assistant', content, toolCalls }` |
+| `tool_use` blocks | `ToolCall { tool, input, id }` |
+| `tool_result` blocks | Paired with matching `tool_use` by ID |
+| `progress`, `system` | Skipped |
+| Subagent events | Filtered out (v1) |
+
+Token usage is aggregated from the final cumulative value per LLM request. Duration is computed from first-to-last event timestamp.
+
+## Workflow
+
+Import a session, then run evaluators against it:
+
+```bash
+# 1. Import the latest Claude Code session
+agentv import claude --discover latest
+
+# 2. Run evaluators against the imported transcript
+agentv eval evals/my-eval.yaml --transcript .agentv/transcripts/claude-4c4f9e4e.jsonl
+```
+
+See `examples/features/import-claude/` for a complete working example.
diff --git a/examples/features/import-claude/README.md b/examples/features/import-claude/README.md
new file mode 100644
index 000000000..ed4dba683
--- /dev/null
+++ b/examples/features/import-claude/README.md
@@ -0,0 +1,65 @@
+# Import Claude — Offline Transcript Grading
+
+Demonstrates importing a Claude Code session transcript and grading it
+offline with deterministic evaluators. **No LLM API key needed.**
+
+Evaluators used:
+- `code-grader` — custom TypeScript grader inspecting the full `Message[]` with tool calls
+
+## Setup
+
+### 1. Run a Claude Code session
+
+Start a Claude Code session on any project:
+
+```bash
+claude -p "List all TypeScript files in this project"
+```
+
+### 2. Import the session transcript
+
+```bash
+agentv import claude --discover latest -o transcripts/session.jsonl
+```
+
+Or import a specific session:
+
+```bash
+# List available sessions
+agentv import claude --list
+
+# Import by session ID
+agentv import claude --session-id <uuid> -o transcripts/session.jsonl
+```
+
+### 3. Run the eval
+
+```bash
+agentv eval evals/transcript-check.EVAL.yaml
+```
+
+## How it works
+
+```
+~/.claude/projects/<encoded-path>/<uuid>.jsonl
+  ↓ agentv import claude (reads from disk, converts to Message[])
+.agentv/transcripts/claude-<short-id>.jsonl
+  ↓ code-grader (deterministic)
+pass/fail
+```
+
+The import pipeline:
+1. Discovers Claude Code sessions in `~/.claude/projects/`
+2. Parses the JSONL transcript (user messages, assistant responses, tool calls)
+3. Pairs `tool_use` blocks with `tool_result` responses
+4. Aggregates token usage (last cumulative value per LLM request)
+5. Writes a clean `Message[]` JSONL for evaluation
+
+## Evaluators
+
+### transcript-quality (code-grader)
+
+Custom grader using `defineCodeGrader` from `@agentv/eval`. Validates:
+1. Transcript contains at least one assistant message
+2. Tool calls were recorded with outputs
+3. No empty assistant messages
diff --git a/examples/features/import-claude/evals/transcript-check.EVAL.yaml b/examples/features/import-claude/evals/transcript-check.EVAL.yaml
new file mode 100644
index 000000000..ecd18a84c
--- /dev/null
+++ b/examples/features/import-claude/evals/transcript-check.EVAL.yaml
@@ -0,0 +1,8 @@
+tests:
+  - id: transcript-quality
+    input: "Analyze the imported Claude Code transcript"
+    criteria: "Transcript contains meaningful assistant messages with tool calls"
+    assertions:
+      - name: transcript-quality
+        type: code-grader
+        command: [bun, graders/transcript-quality.ts]
diff --git a/examples/features/import-claude/graders/transcript-quality.ts b/examples/features/import-claude/graders/transcript-quality.ts
new file mode 100644
index 000000000..91e61ad04
--- /dev/null
+++ b/examples/features/import-claude/graders/transcript-quality.ts
@@ -0,0 +1,36 @@
+#!/usr/bin/env bun
+import { defineCodeGrader } from '@agentv/eval';
+
+export default defineCodeGrader(({ output }) => {
+  const assertions: Array<{ text: string; passed: boolean }> = [];
+
+  // Check 1: Has assistant messages
+  const assistantMessages = output?.filter((m) => m.role === 'assistant') ?? [];
+  assertions.push({
+    text: `Contains assistant messages (found ${assistantMessages.length})`,
+    passed: assistantMessages.length > 0,
+  });
+
+  // Check 2: Has tool calls with outputs
+  const toolCalls = assistantMessages.flatMap((m) => m.toolCalls ?? []);
+  const toolCallsWithOutput = toolCalls.filter((tc) => tc.output != null);
+  assertions.push({
+    text: `Tool calls have outputs (${toolCallsWithOutput.length}/${toolCalls.length})`,
+    passed: toolCalls.length === 0 || toolCallsWithOutput.length > 0,
+  });
+
+  // Check 3: No empty assistant messages
+  const emptyAssistant = assistantMessages.filter(
+    (m) => !m.content && (!m.toolCalls || m.toolCalls.length === 0),
+  );
+  assertions.push({
+    text: `No empty assistant messages (found ${emptyAssistant.length})`,
+    passed: emptyAssistant.length === 0,
+  });
+
+  const passed = assertions.filter((a) => a.passed).length;
+  return {
+    score: assertions.length > 0 ? passed / assertions.length : 0,
+    assertions,
+  };
+});
diff --git a/packages/core/src/import/claude-parser.ts b/packages/core/src/import/claude-parser.ts
new file mode 100644
index 000000000..03d3d2d19
--- /dev/null
+++ b/packages/core/src/import/claude-parser.ts
@@ -0,0 +1,323 @@
+/**
+ * Claude Code session JSONL parser.
+ *
+ * Reads a Claude Code session transcript (~/.claude/projects/<encoded-path>/<uuid>.jsonl)
+ * and converts it to AgentV's Message[] format.
+ *
+ * Each line is a JSON object with:
+ *   { type, message: { role, content }, sessionId, timestamp, uuid, requestId, ... }
+ *
+ * Supported event types:
+ *   user      → Message { role: 'user' } (also contains tool_result blocks)
+ *   assistant → Message { role: 'assistant', toolCalls from tool_use content blocks }
+ *
+ * Skipped event types: progress, system, file-history-snapshot
+ *
+ * Key behaviors:
+ *   - tool_use blocks in assistant events → ToolCall (pending output)
+ *   - tool_result blocks in user events → matched to pending tool_use by tool_use_id
+ *   - Usage is cumulative per requestId; only the last value per requestId is used
+ *   - Streaming assistant events with the same requestId are deduplicated (keep latest)
+ *   - Subagent events (isSidechain: true) are filtered out in v1
+ *   - Duration is from first↔last event timestamp (including skipped types)
+ *   - cost_usd is null (Claude Code does not report per-session cost)
+ */
+
+import type { Message, ToolCall } from '../evaluation/providers/types.js';
+import type { TranscriptEntry, TranscriptSource } from './types.js';
+
+interface ClaudeEvent {
+  readonly type: string;
+  readonly requestId?: string;
+  readonly isSidechain?: boolean;
+  readonly message?: {
+    readonly role?: string;
+    readonly content?: string | readonly ClaudeContentBlock[];
+    readonly usage?: ClaudeUsage;
+    readonly model?: string;
+  };
+  readonly sessionId?: string;
+  readonly timestamp?: string;
+  readonly uuid?: string;
+  readonly cwd?: string;
+}
+
+interface ClaudeContentBlock {
+  readonly type: string;
+  readonly text?: string;
+  readonly thinking?: string;
+  readonly name?: string;
+  readonly input?: unknown;
+  readonly id?: string;
+  readonly tool_use_id?: string;
+  readonly content?: string | readonly { readonly type: string; readonly text?: string }[];
+}
+
+interface ClaudeUsage {
+  readonly input_tokens?: number;
+  readonly output_tokens?: number;
+  readonly cache_read_input_tokens?: number;
+}
+
+const SKIPPED_TYPES = new Set(['progress', 'system', 'file-history-snapshot']);
+
+export function parseClaudeSession(jsonl: string): TranscriptEntry {
+  const messages: Message[] = [];
+  let sessionId = '';
+  let projectPath: string | undefined;
+  let model: string | undefined;
+  let startTimestamp: string | undefined;
+  let endTimestamp: string | undefined;
+
+  // Track usage per requestId — values are cumulative, so we only keep the last
+  const usageByRequestId = new Map<string, ClaudeUsage>();
+
+  // Track the last assistant message per requestId to deduplicate streaming updates
+  let lastAssistantRequestId: string | undefined;
+  let lastAssistantIdx = -1;
+
+  // Track pending tool_use IDs for pairing with tool_result in user events
+  const pendingToolCalls = new Map<string, { msgIdx: number; toolIdx: number }>();
+
+  const lines = jsonl.split('\n').filter((l) => l.trim().length > 0);
+
+  for (const line of lines) {
+    let event: ClaudeEvent;
+    try {
+      event = JSON.parse(line) as ClaudeEvent;
+    } catch {
+      continue;
+    }
+
+    if (!event.type) continue;
+
+    // Track timestamps from ALL events (including skipped types) for accurate duration
+    if (event.timestamp) {
+      if (!startTimestamp) startTimestamp = event.timestamp;
+      endTimestamp = event.timestamp;
+    }
+
+    // Skip non-message event types
+    if (SKIPPED_TYPES.has(event.type)) continue;
+
+    // Skip subagent events (v1: only process main conversation)
+    if (event.isSidechain) continue;
+
+    // Capture session metadata from first event
+    if (!sessionId && event.sessionId) {
+      sessionId = event.sessionId;
+    }
+    if (!projectPath && event.cwd) {
+      projectPath = event.cwd;
+    }
+
+    switch (event.type) {
+      case 'user': {
+        const msg = event.message;
+        if (!msg) break;
+
+        const contentArr = msg.content;
+
+        // User events can contain both tool_result blocks (responses to tool_use)
+        // and text blocks. Process tool_results first, then extract text.
+        if (Array.isArray(contentArr)) {
+          for (const block of contentArr as readonly ClaudeContentBlock[]) {
+            if (block.type === 'tool_result' && block.tool_use_id) {
+              const pending = pendingToolCalls.get(block.tool_use_id);
+              if (pending) {
+                const existingMsg = messages[pending.msgIdx];
+                const existingCalls = [...(existingMsg.toolCalls ?? [])];
+                existingCalls[pending.toolIdx] = {
+                  ...existingCalls[pending.toolIdx],
+                  output: extractToolResultContent(block.content),
+                };
+                messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
+                pendingToolCalls.delete(block.tool_use_id);
+              }
+            }
+          }
+        }
+
+        // Extract text content for the user message
+        const text = extractTextContent(contentArr);
+        if (text !== undefined) {
+          messages.push({ role: 'user', content: text });
+        }
+        break;
+      }
+
+      case 'assistant': {
+        const msg = event.message;
+        if (!msg) break;
+
+        // Capture model from first assistant message
+        if (!model && msg.model) {
+          model = msg.model;
+        }
+
+        // Track usage (cumulative per requestId — last value wins)
+        if (msg.usage && event.requestId) {
+          usageByRequestId.set(event.requestId, msg.usage);
+        }
+
+        // Parse content array for text and tool_use blocks
+        const { text, toolCalls } = extractAssistantContent(msg.content);
+
+        // Deduplicate streaming assistant events with the same requestId
+        if (
+          event.requestId &&
+          event.requestId === lastAssistantRequestId &&
+          lastAssistantIdx >= 0
+        ) {
+          // Replace the previous partial message
+          messages[lastAssistantIdx] = {
+            role: 'assistant',
+            content: text || undefined,
+            toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+          };
+          // Re-register tool calls for pairing
+          registerPendingToolCalls(toolCalls, lastAssistantIdx, pendingToolCalls);
+        } else {
+          // Only push if there's actual content or tool calls
+          if (text || toolCalls.length > 0) {
+            lastAssistantIdx = messages.length;
+            messages.push({
+              role: 'assistant',
+              content: text || undefined,
+              toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+            });
+            registerPendingToolCalls(toolCalls, lastAssistantIdx, pendingToolCalls);
+          }
+        }
+        lastAssistantRequestId = event.requestId;
+        break;
+      }
+    }
+  }
+
+  // Compute final usage from last-seen value per requestId
+  let totalInputTokens = 0;
+  let totalOutputTokens = 0;
+  for (const usage of usageByRequestId.values()) {
+    totalInputTokens += Number(usage.input_tokens ?? 0);
+    totalOutputTokens += Number(usage.output_tokens ?? 0);
+  }
+  const hasUsage = usageByRequestId.size > 0;
+
+  let durationMs: number | undefined;
+  if (startTimestamp && endTimestamp) {
+    durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
+  }
+
+  const source: TranscriptSource = {
+    provider: 'claude',
+    sessionId,
+    projectPath,
+    startedAt: startTimestamp,
+    model,
+  };
+
+  return {
+    messages,
+    source,
+    tokenUsage: hasUsage ? { input: totalInputTokens, output: totalOutputTokens } : undefined,
+    durationMs,
+    costUsd: null,
+  };
+}
+
+/**
+ * Register tool_use IDs from an assistant message for later pairing with tool_result.
+ */
+function registerPendingToolCalls(
+  toolCalls: ToolCall[],
+  msgIdx: number,
+  pending: Map<string, { msgIdx: number; toolIdx: number }>,
+): void {
+  for (let i = 0; i < toolCalls.length; i++) {
+    const id = toolCalls[i].id;
+    if (id) {
+      pending.set(id, { msgIdx, toolIdx: i });
+    }
+  }
+}
+
+/**
+ * Extract text content from a message's content field.
+ */
+function extractTextContent(
+  content: string | readonly ClaudeContentBlock[] | undefined,
+): string | undefined {
+  if (content === undefined || content === null) return undefined;
+  if (typeof content === 'string') return content;
+
+  const textParts: string[] = [];
+  for (const block of content) {
+    if (block.type === 'text' && block.text) {
+      textParts.push(block.text);
+    }
+  }
+  return textParts.length > 0 ? textParts.join('') : undefined;
+}
+
+/**
+ * Extract text and tool_use calls from an assistant message's content array.
+ * Note: tool_result blocks appear in user events, not here.
+ */
+function extractAssistantContent(content: string | readonly ClaudeContentBlock[] | undefined): {
+  text: string | undefined;
+  toolCalls: ToolCall[];
+} {
+  if (content === undefined || content === null) {
+    return { text: undefined, toolCalls: [] };
+  }
+  if (typeof content === 'string') {
+    return { text: content, toolCalls: [] };
+  }
+
+  const textParts: string[] = [];
+  const toolCalls: ToolCall[] = [];
+
+  for (const block of content) {
+    switch (block.type) {
+      case 'text':
+        if (block.text) textParts.push(block.text);
+        break;
+
+      case 'tool_use':
+        if (block.name) {
+          toolCalls.push({
+            tool: block.name,
+            input: block.input,
+            id: block.id,
+          });
+        }
+        break;
+
+      // Skip thinking blocks and other types
+    }
+  }
+
+  return {
+    text: textParts.length > 0 ? textParts.join('') : undefined,
+    toolCalls,
+  };
+}
+
+/**
+ * Extract text from a tool_result content field.
+ */
+function extractToolResultContent(
+  content: string | readonly { readonly type: string; readonly text?: string }[] | undefined,
+): string | undefined {
+  if (content === undefined || content === null) return undefined;
+  if (typeof content === 'string') return content;
+
+  const parts: string[] = [];
+  for (const block of content) {
+    if (block.type === 'text' && block.text) {
+      parts.push(block.text);
+    }
+  }
+  return parts.length > 0 ? parts.join('') : undefined;
+}
diff --git a/packages/core/src/import/index.ts b/packages/core/src/import/index.ts
new file mode 100644
index 000000000..7e695fd3b
--- /dev/null
+++ b/packages/core/src/import/index.ts
@@ -0,0 +1,7 @@
+export { parseClaudeSession } from './claude-parser.js';
+export {
+  discoverClaudeSessions,
+  type ClaudeDiscoverOptions,
+  type ClaudeSession,
+} from './session-discovery.js';
+export { readTranscriptFile, type TranscriptEntry, type TranscriptSource } from './types.js';
diff --git a/packages/core/src/import/session-discovery.ts b/packages/core/src/import/session-discovery.ts
new file mode 100644
index 000000000..e03fadaf4
--- /dev/null
+++ b/packages/core/src/import/session-discovery.ts
@@ -0,0 +1,114 @@
+/**
+ * Claude Code session discovery.
+ *
+ * Scans ~/.claude/projects/ for session JSONL files. Claude Code stores
+ * sessions at:
+ *   ~/.claude/projects/<encoded-project-path>/<uuid>.jsonl
+ *
+ * Where <encoded-project-path> is the absolute project path with `/` replaced
+ * by `-` and prefixed with `-` (e.g., `-home-user-myproject`).
+ *
+ * Sessions are returned sorted by modification time (most recent first).
+ */
+
+import { readdir, stat } from 'node:fs/promises';
+import { homedir } from 'node:os';
+import path from 'node:path';
+
+export interface ClaudeSession {
+  /** UUID of the session */
+  readonly sessionId: string;
+  /** Full path to the JSONL file */
+  readonly filePath: string;
+  /** Encoded project directory name */
+  readonly projectDir: string;
+  /** Last modification time */
+  readonly updatedAt: Date;
+}
+
+export interface ClaudeDiscoverOptions {
+  /** Filter by session UUID (exact match). */
+  readonly sessionId?: string;
+  /** Filter by project path (substring match against encoded dir name). */
+  readonly projectPath?: string;
+  /** Maximum number of sessions to return (default: 10). */
+  readonly limit?: number;
+  /** Override the default ~/.claude/projects directory. */
+  readonly projectsDir?: string;
+  /** Return only the most recent session. */
+  readonly latest?: boolean;
+}
+
+const DEFAULT_PROJECTS_DIR = () => path.join(homedir(), '.claude', 'projects');
+
+/**
+ * Encode a filesystem path to Claude Code's project directory format.
+ * `/home/user/myproject` → `-home-user-myproject`
+ */
+function encodeProjectPath(projectPath: string): string {
+  return projectPath.replace(/\//g, '-');
+}
+
+export async function discoverClaudeSessions(
+  opts?: ClaudeDiscoverOptions,
+): Promise<ClaudeSession[]> {
+  const projectsDir = opts?.projectsDir ?? DEFAULT_PROJECTS_DIR();
+  const limit = opts?.latest ? 1 : (opts?.limit ?? 10);
+
+  let projectDirs: string[];
+  try {
+    projectDirs = await readdir(projectsDir);
+  } catch {
+    return [];
+  }
+
+  // Filter project directories if projectPath is specified
+  if (opts?.projectPath) {
+    const encoded = encodeProjectPath(opts.projectPath);
+    projectDirs = projectDirs.filter((dir) => dir === encoded || dir.includes(encoded));
+  }
+
+  const sessions: ClaudeSession[] = [];
+
+  for (const projectDir of projectDirs) {
+    const dirPath = path.join(projectsDir, projectDir);
+
+    let entries: string[];
+    try {
+      entries = await readdir(dirPath);
+    } catch {
+      continue;
+    }
+
+    for (const entry of entries) {
+      if (!entry.endsWith('.jsonl')) continue;
+
+      const sessionId = entry.replace(/\.jsonl$/, '');
+
+      // Filter by session ID if specified
+      if (opts?.sessionId && sessionId !== opts.sessionId) continue;
+
+      const filePath = path.join(dirPath, entry);
+
+      let updatedAt: Date;
+      try {
+        const fileStat = await stat(filePath);
+        updatedAt = fileStat.mtime;
+      } catch {
+        updatedAt = new Date(0);
+      }
+
+      sessions.push({
+        sessionId,
+        filePath,
+        projectDir,
+        updatedAt,
+      });
+    }
+  }
+
+  // Sort by modification time, most recent first
+  sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
+
+  return sessions.slice(0, limit);
+}
diff --git a/packages/core/src/import/types.ts b/packages/core/src/import/types.ts
new file mode 100644
index 000000000..5595dfd82
--- /dev/null
+++ b/packages/core/src/import/types.ts
@@ -0,0 +1,43 @@
+/**
+ * Core types for the transcript import pipeline.
+ *
+ * A TranscriptEntry represents a single event in a parsed agent session
+ * transcript (user message, assistant response, tool call, etc.).
+ *
+ * A TranscriptSource describes where a transcript came from (provider,
+ * session ID, file path, etc.).
+ */
+
+import { readFile } from 'node:fs/promises';
+
+import type { Message, ProviderTokenUsage } from '../evaluation/providers/types.js';
+
+/**
+ * A parsed transcript: ordered messages plus session metadata.
+ */
+export interface TranscriptEntry {
+  readonly messages: Message[];
+  readonly source: TranscriptSource;
+  readonly tokenUsage?: ProviderTokenUsage;
+  readonly durationMs?: number;
+  readonly costUsd?: number | null;
+}
+
+/**
+ * Metadata describing the origin of a transcript.
+ */
+export interface TranscriptSource {
+  readonly provider: string;
+  readonly sessionId: string;
+  readonly projectPath?: string;
+  readonly startedAt?: string;
+  readonly model?: string;
+}
+
+/**
+ * Read a JSONL transcript file and return its raw text.
+ * Throws if the file does not exist or cannot be read.
+ */
+export async function readTranscriptFile(filePath: string): Promise<string> {
+  return readFile(filePath, 'utf8');
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 852f73f66..37bb2e8a2 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -87,6 +87,9 @@ export {
   discoverGraders as discoverJudges,
 } from './evaluation/registry/grader-discovery.js';
 
+// Import pipeline
+export * from './import/index.js';
+
 export type AgentKernel = {
   status: string;
 };
diff --git a/plugins/agentv-dev/skills/agentv-bench/references/migrating-from-skill-creator.md b/plugins/agentv-dev/skills/agentv-bench/references/migrating-from-skill-creator.md
index 7c02b7fb7..2231efbf6 100644
--- a/plugins/agentv-dev/skills/agentv-bench/references/migrating-from-skill-creator.md
+++ b/plugins/agentv-dev/skills/agentv-bench/references/migrating-from-skill-creator.md
@@ -10,10 +10,8 @@ AgentV runs skill-creator's evals.json directly — no conversion required:
 # Run evals.json with AgentV
 agentv eval evals.json
 
-# Or in agent mode (no API keys)
-agentv prompt eval --list evals.json
-agentv prompt eval --input evals.json --test-id 1
-agentv prompt eval --expected-output evals.json --test-id 1
+# Or run a single assertion offline (no API keys)
+agentv eval assert <grader-name> --agent-output "..." --agent-input "..."
 ```
 
 AgentV automatically:
@@ -22,18 +20,7 @@ AgentV automatically:
 - Converts `assertions` → LLM-grader evaluators
 - Resolves `files[]` paths relative to the evals.json directory
 
-If you're using the `agentv-bench` skill, the bundled Bun scripts wrap these same commands and artifacts instead of inventing a second format:
-
-```bash
-cd plugins/agentv-dev/skills/agentv-bench
-bun install
-bun scripts/run-eval.ts --eval-path ../../../../examples/features/agent-skills-evals/evals.json --dry-run
-bun scripts/prompt-eval.ts --list ../../../../examples/features/agent-skills-evals/evals.json
-bun scripts/convert-evals.ts --eval-path ../../../../examples/features/agent-skills-evals/evals.json --out /tmp/eval.yaml
-bun scripts/generate-report.ts --artifacts .agentv/artifacts --out /tmp/agentv-review.html
-```
-
-These scripts still call `agentv` wherever possible. Code graders, grading, and artifact generation remain in AgentV core; the scripts just orchestrate and summarize the existing outputs.
+If you're using the `agentv-bench` skill, it orchestrates these same AgentV commands. Code graders, grading, and artifact generation remain in AgentV core; the skill just orchestrates and summarizes the existing outputs.
 
 ## What You Gain
 
diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md b/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md
index 86c1438f0..5d9cc8331 100644
--- a/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md
+++ b/plugins/agentv-dev/skills/agentv-eval-writer/SKILL.md
@@ -30,15 +30,10 @@ agentv convert evals.json
 
 # Run directly without converting (all commands accept evals.json)
 agentv eval evals.json
-agentv prompt eval --list evals.json
-agentv prompt eval --input evals.json --test-id 1
-agentv prompt eval --expected-output evals.json --test-id 1
 ```
 
 The converter maps `prompt` → `input`, `expected_output` → `expected_output`, `assertions` → `assertions` (`llm-grader`), and resolves `files[]` paths. The generated YAML includes TODO comments for AgentV features to add (workspace setup, code graders, rubrics, required gates).
 
-If you're running the lifecycle through `agentv-bench`, use `agentv convert` and `agentv prompt eval` directly — the Python scripts in `agentv-bench/scripts/` orchestrate these same commands.
-
 After converting, enhance the YAML with AgentV-specific capabilities shown below.
 
 ## From Chat Transcript
@@ -540,10 +535,11 @@ agentv eval <file.yaml> [--test-id <id>] [--target <name>] [--dry-run] [--thresh
 # Run with OTLP JSON file (importable by OTel backends)
 agentv eval <file.yaml> --otel-file traces/eval.otlp.json
 
-# Agent-orchestrated evals (no API keys needed)
-agentv prompt eval --list <file.yaml>                               # enumerate test IDs
-agentv prompt eval --input <file.yaml> --test-id <id>               # task input JSON (file paths, not embedded content)
-agentv prompt eval --expected-output <file.yaml> --test-id <id>     # expected output + grader criteria
+# Run a single assertion in isolation (no API keys needed)
+agentv eval assert <grader-name> --agent-output "..." --agent-input "..."
+
+# Import agent transcripts for offline grading
+agentv import claude --discover latest
 
 # Re-run only execution errors from a previous output
 agentv eval <file.yaml> --retry-errors <previous-output.jsonl>