diff --git a/.gitignore b/.gitignore index 6e5f8cc59c..35f60f9b68 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ apps/web/src/components/__screenshots__ .vitest-* __screenshots__/ .tanstack +artifacts/ \ No newline at end of file diff --git a/README.md b/README.md index d7b2fccb8f..5df4d716bc 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,10 @@ We are very very early in this project. Expect bugs. We are not accepting contributions yet. +## Performance benchmarks + +See [docs/perf-benchmarks.md](./docs/perf-benchmarks.md) for the local perf harness, seeded scenarios, artifact format, and the commands for automated and manual benchmark runs. + ## If you REALLY want to contribute still.... read this first Read [CONTRIBUTING.md](./CONTRIBUTING.md) before opening an issue or PR. diff --git a/apps/server/integration/TestProviderAdapter.integration.ts b/apps/server/integration/TestProviderAdapter.integration.ts index 9c87d9821a..7a5c7972fc 100644 --- a/apps/server/integration/TestProviderAdapter.integration.ts +++ b/apps/server/integration/TestProviderAdapter.integration.ts @@ -488,7 +488,9 @@ export const makeTestProviderAdapterHarness = (options?: MakeTestProviderAdapter readThread, rollbackThread, stopAll, - streamEvents: Stream.fromQueue(runtimeEvents), + get streamEvents() { + return Stream.fromQueue(runtimeEvents); + }, }; const queueTurnResponse = ( diff --git a/apps/server/integration/perf/seedPerfState.test.ts b/apps/server/integration/perf/seedPerfState.test.ts new file mode 100644 index 0000000000..c50da3cc84 --- /dev/null +++ b/apps/server/integration/perf/seedPerfState.test.ts @@ -0,0 +1,53 @@ +import { readFile, rm } from "node:fs/promises"; +import { join } from "node:path"; + +import { afterEach, describe, expect, it } from "vitest"; + +import { PERF_CATALOG_IDS, getPerfSeedScenario } from "@t3tools/shared/perf/scenarioCatalog"; +import { seedPerfState } from "./seedPerfState.ts"; + +describe("seedPerfState", () => { + const runParentDirsToCleanup: string[] = []; + + afterEach(async () => { + await Promise.all( + runParentDirsToCleanup + .splice(0) + .map((runParentDir) => rm(runParentDir, { recursive: true, force: true })), + ); + }); + + it("seeds large thread fixtures through the real event store and projection pipeline", async () => { + const seeded = await seedPerfState("large_threads"); + runParentDirsToCleanup.push(seeded.runParentDir); + const scenario = getPerfSeedScenario("large_threads"); + + expect(seeded.snapshot.projects).toHaveLength(5); + expect(seeded.snapshot.threads).toHaveLength(30); + expect(seeded.baseDir).toBe(join(seeded.runParentDir, "base")); + expect(new Set(seeded.snapshot.threads.map((thread) => thread.projectId)).size).toBe(5); + + const heavyThread = seeded.snapshot.threads.find( + (thread) => thread.id === PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + ); + const heavyThreadScenario = scenario.threads.find( + (thread) => thread.id === PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + ); + expect(heavyThread?.messages).toHaveLength(2_000); + expect(heavyThreadScenario?.turnCount ?? Number.POSITIVE_INFINITY).toBeLessThan(100); + expect((heavyThread?.activities.length ?? 0) > 0).toBe(true); + expect((heavyThread?.proposedPlans.length ?? 0) > 0).toBe(true); + expect((heavyThread?.checkpoints.length ?? 0) >= 20).toBe(true); + expect((heavyThread?.checkpoints[0]?.files.length ?? 0) >= 12).toBe(true); + }); + + it("enables assistant streaming in the burst base seed for websocket perf runs", async () => { + const seeded = await seedPerfState("burst_base"); + runParentDirsToCleanup.push(seeded.runParentDir); + + const rawSettings = await readFile(join(seeded.baseDir, "userdata/settings.json"), "utf8"); + expect(JSON.parse(rawSettings)).toMatchObject({ + enableAssistantStreaming: true, + }); + }); +}); diff --git a/apps/server/integration/perf/seedPerfState.ts b/apps/server/integration/perf/seedPerfState.ts new file mode 100644 index 0000000000..79a5b92bf8 --- /dev/null +++ b/apps/server/integration/perf/seedPerfState.ts @@ -0,0 +1,589 @@ +import { execFileSync } from "node:child_process"; +import { cp, mkdir, mkdtemp, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import * as NodeServices from "@effect/platform-node/NodeServices"; +import { + CheckpointRef, + CommandId, + EventId, + type OrchestrationEvent, + type OrchestrationReadModel, + type ProjectId, +} from "@t3tools/contracts"; +import { Effect, Layer, ManagedRuntime } from "effect"; + +import { + buildPerfAssistantMessageCountPlan, + getPerfSeedScenario, + perfEventId, + perfMessageIdForThread, + perfTurnIdForThread, + type PerfProjectScenario, + type PerfSeedScenario, + type PerfSeedScenarioId, + type PerfSeedThreadScenario, +} from "@t3tools/shared/perf/scenarioCatalog"; +import { ServerConfig } from "../../src/config.ts"; +import { OrchestrationProjectionPipelineLive } from "../../src/orchestration/Layers/ProjectionPipeline.ts"; +import { OrchestrationProjectionSnapshotQueryLive } from "../../src/orchestration/Layers/ProjectionSnapshotQuery.ts"; +import { OrchestrationProjectionPipeline } from "../../src/orchestration/Services/ProjectionPipeline.ts"; +import { ProjectionSnapshotQuery } from "../../src/orchestration/Services/ProjectionSnapshotQuery.ts"; +import { OrchestrationEventStoreLive } from "../../src/persistence/Layers/OrchestrationEventStore.ts"; +import { layerConfig as SqlitePersistenceLayerLive } from "../../src/persistence/Layers/Sqlite.ts"; +import { OrchestrationEventStore } from "../../src/persistence/Services/OrchestrationEventStore.ts"; +import { ServerSettingsService, ServerSettingsLive } from "../../src/serverSettings.ts"; + +export interface PerfSeededState { + readonly scenarioId: PerfSeedScenarioId; + readonly runParentDir: string; + readonly baseDir: string; + readonly workspaceRoot: string; + readonly snapshot: OrchestrationReadModel; +} + +const templateDirPromises = new Map>(); + +function runGit(cwd: string, args: ReadonlyArray) { + execFileSync("git", args, { + cwd, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf8", + }); +} + +async function initializeGitWorkspace(workspaceRoot: string): Promise { + await mkdir(workspaceRoot, { recursive: true }); + runGit(workspaceRoot, ["init", "--initial-branch=main"]); + runGit(workspaceRoot, ["config", "user.email", "perf@example.com"]); + runGit(workspaceRoot, ["config", "user.name", "Perf Fixture"]); + await writeFile( + join(workspaceRoot, "README.md"), + "# Performance Workspace\n\nSeeded fixture state for local perf regression tests.\n", + "utf8", + ); + runGit(workspaceRoot, ["add", "."]); + runGit(workspaceRoot, ["commit", "-m", "Initial perf workspace"]); +} + +function plusMs(baseTimeMs: number, offsetMs: number): string { + return new Date(baseTimeMs + offsetMs).toISOString(); +} + +function makeCommandId(prefix: string, threadId: string, turnIndex: number): CommandId { + return CommandId.makeUnsafe(`${prefix}:${threadId}:${turnIndex.toString().padStart(4, "0")}`); +} + +function buildProjectEvent( + project: PerfProjectScenario, + workspaceRoot: string, + createdAt: string, +): Omit { + return { + type: "project.created", + eventId: EventId.makeUnsafe(`perf-project-created:${String(project.id)}`), + aggregateKind: "project", + aggregateId: project.id, + occurredAt: createdAt, + commandId: CommandId.makeUnsafe(`perf-project-create:${String(project.id)}`), + causationEventId: null, + correlationId: CommandId.makeUnsafe(`perf-project-create:${String(project.id)}`), + metadata: {}, + payload: { + projectId: project.id, + title: project.title, + workspaceRoot, + defaultModelSelection: project.defaultModelSelection, + scripts: [], + createdAt, + updatedAt: createdAt, + }, + }; +} + +function buildThreadCreatedEvent( + thread: PerfSeedThreadScenario, + project: PerfProjectScenario, + createdAt: string, +): Omit { + return { + type: "thread.created", + eventId: EventId.makeUnsafe(`perf-thread-created:${String(thread.id)}`), + aggregateKind: "thread", + aggregateId: thread.id, + occurredAt: createdAt, + commandId: CommandId.makeUnsafe(`perf-thread-create:${String(thread.id)}`), + causationEventId: null, + correlationId: CommandId.makeUnsafe(`perf-thread-create:${String(thread.id)}`), + metadata: {}, + payload: { + threadId: thread.id, + projectId: thread.projectId, + title: thread.title, + modelSelection: project.defaultModelSelection, + runtimeMode: "full-access", + interactionMode: "default", + branch: null, + worktreePath: null, + createdAt, + updatedAt: createdAt, + }, + }; +} + +function buildUserMessageText(thread: PerfSeedThreadScenario, turnIndex: number): string { + const base = `${thread.title} request ${turnIndex}: review the current workspace state and explain the next change.`; + if (turnIndex % 11 === 0) { + return `${base}\n\nFocus on virtualization, batching, and cross-thread navigation latency.`; + } + if (turnIndex % 7 === 0) { + return `${base}\n\nSummarize CPU-sensitive paths and any websocket burst handling concerns.`; + } + return base; +} + +function buildAssistantMessageText( + thread: PerfSeedThreadScenario, + turnIndex: number, + assistantMessageIndex: number, + assistantMessageCount: number, +): string { + const phaseLabel = + assistantMessageIndex === 1 + ? "Opening" + : assistantMessageIndex === assistantMessageCount + ? "Settled" + : assistantMessageIndex % 5 === 0 + ? "Checkpoint" + : "Loop"; + const focusTopics = [ + "checked the visible timeline window and sidebar ordering", + "trimmed redundant projection work before the next render pass", + "reviewed websocket fan-out and message grouping pressure", + "verified checkpoint rows stay bounded while hidden threads move", + "kept the active route stable while background threads kept streaming", + ]; + const topic = focusTopics[(turnIndex + assistantMessageIndex) % focusTopics.length]!; + const threadBias = + thread.category === "heavy" + ? "The thread is still dense enough to stress virtualization." + : thread.category === "burst" + ? "This pass is still carrying live burst pressure." + : "Background churn is staying active without stealing focus."; + + return `${thread.title} ${phaseLabel.toLowerCase()} ${assistantMessageIndex}/${assistantMessageCount} for turn ${turnIndex}: ${topic}. ${threadBias}`; +} + +function buildProposedPlanMarkdown(thread: PerfSeedThreadScenario, turnIndex: number): string { + return [ + `## ${thread.title} plan ${turnIndex}`, + "", + "1. Measure the current thread switch path against a stable local budget.", + "2. Reduce avoidable render churn in the visible timeline window.", + "3. Validate websocket burst handling with real runtime events before tightening thresholds.", + ].join("\n"); +} + +function buildCheckpointFiles( + thread: PerfSeedThreadScenario, + threadOrdinal: number, + turnIndex: number, +): ReadonlyArray<{ + readonly path: string; + readonly kind: string; + readonly additions: number; + readonly deletions: number; +}> { + const nestedPathTemplates = [ + ["apps", "web", "src", "components", `thread-${threadOrdinal + 1}`, "TimelineVirtualizer.tsx"], + ["apps", "web", "src", "components", `thread-${threadOrdinal + 1}`, "ThreadSummaryPane.tsx"], + ["apps", "web", "src", "hooks", `thread-${threadOrdinal + 1}`, "useThreadViewport.ts"], + ["apps", "web", "src", "stores", `thread-${threadOrdinal + 1}`, "timelineStore.ts"], + [ + "apps", + "server", + "src", + "orchestration", + `thread-${threadOrdinal + 1}`, + "projectionPipeline.ts", + ], + ["apps", "server", "src", "provider", `thread-${threadOrdinal + 1}`, "runtimeBuffer.ts"], + ["packages", "shared", "src", "perf", `thread-${threadOrdinal + 1}`, "fixtureBuilders.ts"], + ["packages", "shared", "src", "perf", `thread-${threadOrdinal + 1}`, "scenarioCatalog.ts"], + ["packages", "contracts", "src", "orchestration", `thread-${threadOrdinal + 1}`, "schemas.ts"], + ["docs", "perf", `thread-${threadOrdinal + 1}`, "notes", "regression-findings.md"], + ["scripts", "perf", `thread-${threadOrdinal + 1}`, "capture-profile.ts"], + ["test", "perf", "fixtures", `thread-${threadOrdinal + 1}`, "workspace-state.json"], + ] as const; + const fileCount = thread.category === "heavy" ? 12 + (turnIndex % 7) : 7 + (turnIndex % 4); + + return Array.from({ length: fileCount }, (_, fileIndex) => { + const template = nestedPathTemplates[fileIndex % nestedPathTemplates.length]!; + const variant = Math.floor(fileIndex / nestedPathTemplates.length); + const baseSegments = [...template]; + const fileName = baseSegments.pop()!; + const variantFileName = + variant === 0 + ? fileName + : fileName.replace(/(\.[^.]*)$/, `-${(variant + 1).toString().padStart(2, "0")}$1`); + const path = [...baseSegments, variantFileName].join("/"); + const kind = + fileIndex % 9 === 0 + ? "deleted" + : fileIndex % 5 === 0 + ? "added" + : fileIndex % 4 === 0 + ? "renamed" + : "modified"; + + return { + path, + kind, + additions: kind === "deleted" ? 0 : 4 + ((turnIndex + fileIndex) % 11), + deletions: kind === "added" ? 0 : 1 + ((threadOrdinal + fileIndex + turnIndex) % 6), + }; + }); +} + +function buildThreadTurnEvents( + thread: PerfSeedThreadScenario, + threadOrdinal: number, + projectStartMs: number, +): ReadonlyArray> { + const events: Array> = []; + const threadStartMs = projectStartMs + threadOrdinal * 60_000; + const assistantMessageCountPlan = buildPerfAssistantMessageCountPlan(thread); + + for (let turnIndex = 1; turnIndex <= thread.turnCount; turnIndex += 1) { + const turnId = perfTurnIdForThread(thread, turnIndex); + const userMessageId = perfMessageIdForThread(thread, "user", turnIndex, 1); + const assistantMessageCount = assistantMessageCountPlan[turnIndex - 1] ?? 1; + const turnBaseMs = threadStartMs + turnIndex * 1_000; + const userOccurredAt = plusMs(turnBaseMs, 0); + const activityAssistantIndex = + thread.activityStride !== null && turnIndex % thread.activityStride === 0 + ? Math.max(2, Math.floor(assistantMessageCount * 0.35)) + : null; + const planAssistantIndex = + thread.planStride !== null && turnIndex % thread.planStride === 0 + ? Math.max(2, Math.floor(assistantMessageCount * 0.7)) + : null; + let lastAssistantMessageId = perfMessageIdForThread( + thread, + "assistant", + turnIndex, + assistantMessageCount, + ); + let turnEventOffset = 0; + const nextEventId = (prefix: string) => + perfEventId(prefix, thread.id, turnIndex * 100 + turnEventOffset++); + + events.push({ + type: "thread.message-sent", + eventId: nextEventId("perf-user-message"), + aggregateKind: "thread", + aggregateId: thread.id, + occurredAt: userOccurredAt, + commandId: makeCommandId("perf-user-message", String(thread.id), turnIndex), + causationEventId: null, + correlationId: makeCommandId("perf-turn", String(thread.id), turnIndex), + metadata: {}, + payload: { + threadId: thread.id, + messageId: userMessageId, + role: "user", + text: buildUserMessageText(thread, turnIndex), + attachments: [], + turnId, + streaming: false, + createdAt: userOccurredAt, + updatedAt: userOccurredAt, + }, + }); + + for ( + let assistantMessageIndex = 1; + assistantMessageIndex <= assistantMessageCount; + assistantMessageIndex += 1 + ) { + const assistantOccurredAt = plusMs(turnBaseMs, 120 + assistantMessageIndex * 40); + const assistantMessageId = perfMessageIdForThread( + thread, + "assistant", + turnIndex, + assistantMessageIndex, + ); + lastAssistantMessageId = assistantMessageId; + + events.push({ + type: "thread.message-sent", + eventId: nextEventId("perf-assistant-message"), + aggregateKind: "thread", + aggregateId: thread.id, + occurredAt: assistantOccurredAt, + commandId: makeCommandId("perf-assistant-message", String(thread.id), turnIndex), + causationEventId: null, + correlationId: makeCommandId("perf-turn", String(thread.id), turnIndex), + metadata: {}, + payload: { + threadId: thread.id, + messageId: assistantMessageId, + role: "assistant", + text: buildAssistantMessageText( + thread, + turnIndex, + assistantMessageIndex, + assistantMessageCount, + ), + attachments: [], + turnId, + streaming: false, + createdAt: assistantOccurredAt, + updatedAt: assistantOccurredAt, + }, + }); + + if (activityAssistantIndex === assistantMessageIndex) { + const activityOccurredAt = plusMs(turnBaseMs, 132 + assistantMessageIndex * 40); + events.push({ + type: "thread.activity-appended", + eventId: nextEventId("perf-activity"), + aggregateKind: "thread", + aggregateId: thread.id, + occurredAt: activityOccurredAt, + commandId: makeCommandId("perf-activity", String(thread.id), turnIndex), + causationEventId: null, + correlationId: makeCommandId("perf-turn", String(thread.id), turnIndex), + metadata: {}, + payload: { + threadId: thread.id, + activity: { + id: perfEventId("perf-activity-row", thread.id, turnIndex), + tone: "tool", + kind: "tool.completed", + summary: `Synthetic command batch ${turnIndex}.${assistantMessageIndex}`, + payload: { + command: "perf-simulated", + batch: turnIndex, + loop: assistantMessageIndex, + threadCategory: thread.category, + }, + turnId, + createdAt: activityOccurredAt, + }, + }, + }); + } + + if (planAssistantIndex === assistantMessageIndex) { + const planOccurredAt = plusMs(turnBaseMs, 140 + assistantMessageIndex * 40); + events.push({ + type: "thread.proposed-plan-upserted", + eventId: nextEventId("perf-plan"), + aggregateKind: "thread", + aggregateId: thread.id, + occurredAt: planOccurredAt, + commandId: makeCommandId("perf-plan", String(thread.id), turnIndex), + causationEventId: null, + correlationId: makeCommandId("perf-turn", String(thread.id), turnIndex), + metadata: {}, + payload: { + threadId: thread.id, + proposedPlan: { + id: `perf-plan:${String(thread.id)}:${turnIndex.toString().padStart(4, "0")}`, + turnId, + planMarkdown: buildProposedPlanMarkdown(thread, turnIndex), + implementedAt: null, + implementationThreadId: null, + createdAt: planOccurredAt, + updatedAt: planOccurredAt, + }, + }, + }); + } + } + + if (thread.diffStride !== null && turnIndex % thread.diffStride === 0) { + const diffOccurredAt = plusMs(turnBaseMs, 180 + assistantMessageCount * 40); + events.push({ + type: "thread.turn-diff-completed", + eventId: nextEventId("perf-diff"), + aggregateKind: "thread", + aggregateId: thread.id, + occurredAt: diffOccurredAt, + commandId: makeCommandId("perf-diff", String(thread.id), turnIndex), + causationEventId: null, + correlationId: makeCommandId("perf-turn", String(thread.id), turnIndex), + metadata: {}, + payload: { + threadId: thread.id, + turnId, + checkpointTurnCount: turnIndex, + checkpointRef: CheckpointRef.makeUnsafe( + `refs/perf/${String(thread.id)}/${turnIndex.toString().padStart(4, "0")}`, + ), + status: "ready", + files: buildCheckpointFiles(thread, threadOrdinal, turnIndex), + assistantMessageId: lastAssistantMessageId, + completedAt: diffOccurredAt, + }, + }); + } + } + + return events; +} + +function buildScenarioEvents( + scenario: PerfSeedScenario, + workspaceRootsByProjectId: ReadonlyMap, +): ReadonlyArray> { + const projectStartMs = Date.parse("2026-03-01T12:00:00.000Z"); + const threadsByProjectId = new Map(); + for (const thread of scenario.threads) { + const existing = threadsByProjectId.get(thread.projectId) ?? []; + existing.push(thread); + threadsByProjectId.set(thread.projectId, existing); + } + + let globalThreadOrdinal = 0; + const events: Array> = []; + + for (const [projectOrdinal, project] of scenario.projects.entries()) { + const projectWorkspaceRoot = workspaceRootsByProjectId.get(project.id); + if (!projectWorkspaceRoot) { + throw new Error(`Missing workspace root for perf project '${String(project.id)}'.`); + } + const projectBaseMs = projectStartMs + projectOrdinal * 6 * 60_000; + const projectCreatedAt = plusMs(projectBaseMs, 0); + const projectThreads = threadsByProjectId.get(project.id) ?? []; + events.push(buildProjectEvent(project, projectWorkspaceRoot, projectCreatedAt)); + + for (const [threadOrdinalWithinProject, thread] of projectThreads.entries()) { + const threadCreatedAt = plusMs(projectBaseMs, threadOrdinalWithinProject * 45_000 + 50); + const threadEvents = buildThreadTurnEvents(thread, globalThreadOrdinal, projectBaseMs); + globalThreadOrdinal += 1; + events.push(buildThreadCreatedEvent(thread, project, threadCreatedAt), ...threadEvents); + } + } + + return events; +} + +async function createTemplateDir(scenarioId: PerfSeedScenarioId): Promise { + const scenario = getPerfSeedScenario(scenarioId); + const baseDir = await mkdtemp(join(tmpdir(), `t3-perf-template-${scenarioId}-`)); + const primaryProject = scenario.projects[0]; + if (!primaryProject) { + throw new Error(`Perf scenario '${scenarioId}' has no projects.`); + } + const workspaceRoot = join(baseDir, primaryProject.workspaceDirectoryName); + const workspaceRootsByProjectId = new Map( + scenario.projects.map((project) => [project.id, join(baseDir, project.workspaceDirectoryName)]), + ); + + await Promise.all( + scenario.projects.map((project) => + initializeGitWorkspace(join(baseDir, project.workspaceDirectoryName)), + ), + ); + + const seedLayer = Layer.empty.pipe( + Layer.provideMerge(OrchestrationProjectionSnapshotQueryLive), + Layer.provideMerge(OrchestrationProjectionPipelineLive), + Layer.provideMerge(OrchestrationEventStoreLive), + Layer.provideMerge(ServerSettingsLive), + Layer.provideMerge(SqlitePersistenceLayerLive), + Layer.provideMerge(ServerConfig.layerTest(workspaceRoot, baseDir)), + Layer.provideMerge(NodeServices.layer), + ); + const runtime = ManagedRuntime.make(seedLayer); + + const snapshot = await runtime.runPromise( + Effect.gen(function* () { + const serverSettings = yield* ServerSettingsService; + const eventStore = yield* OrchestrationEventStore; + const projectionPipeline = yield* OrchestrationProjectionPipeline; + const snapshotQuery = yield* ProjectionSnapshotQuery; + + yield* serverSettings.updateSettings({ + enableAssistantStreaming: scenario.id === "burst_base", + }); + + for (const event of buildScenarioEvents(scenario, workspaceRootsByProjectId)) { + const storedEvent = yield* eventStore.append(event); + yield* projectionPipeline.projectEvent(storedEvent); + } + + return yield* snapshotQuery.getSnapshot(); + }), + ); + + const manifestPath = join(baseDir, "perf-seed-manifest.json"); + await writeFile( + manifestPath, + `${JSON.stringify( + { + scenarioId, + workspaceRoot, + snapshotSequence: snapshot.snapshotSequence, + projectCount: snapshot.projects.length, + threadCount: snapshot.threads.length, + }, + null, + 2, + )}\n`, + "utf8", + ); + + await runtime.dispose(); + return baseDir; +} + +async function getTemplateDir(scenarioId: PerfSeedScenarioId): Promise { + const existing = templateDirPromises.get(scenarioId); + if (existing) { + return existing; + } + const created = createTemplateDir(scenarioId); + templateDirPromises.set(scenarioId, created); + return created; +} + +export async function seedPerfState(scenarioId: PerfSeedScenarioId): Promise { + const scenario = getPerfSeedScenario(scenarioId); + const templateDir = await getTemplateDir(scenarioId); + const runParentDir = await mkdtemp(join(tmpdir(), `t3-perf-run-${scenarioId}-`)); + const baseDir = join(runParentDir, "base"); + await cp(templateDir, baseDir, { recursive: true, force: true }); + const primaryProject = scenario.projects[0]; + if (!primaryProject) { + throw new Error(`Perf scenario '${scenarioId}' has no projects.`); + } + const workspaceRoot = join(baseDir, primaryProject.workspaceDirectoryName); + + const snapshotLayer = Layer.empty.pipe( + Layer.provideMerge(OrchestrationProjectionSnapshotQueryLive), + Layer.provideMerge(ServerSettingsLive), + Layer.provideMerge(SqlitePersistenceLayerLive), + Layer.provideMerge(ServerConfig.layerTest(workspaceRoot, baseDir)), + Layer.provideMerge(NodeServices.layer), + ); + const runtime = ManagedRuntime.make(snapshotLayer); + const snapshot = await runtime.runPromise( + Effect.gen(function* () { + const snapshotQuery = yield* ProjectionSnapshotQuery; + return yield* snapshotQuery.getSnapshot(); + }), + ); + await runtime.dispose(); + + return { + scenarioId, + runParentDir, + baseDir, + workspaceRoot, + snapshot, + }; +} diff --git a/apps/server/integration/perf/serverLatency.perf.test.ts b/apps/server/integration/perf/serverLatency.perf.test.ts new file mode 100644 index 0000000000..1842dee813 --- /dev/null +++ b/apps/server/integration/perf/serverLatency.perf.test.ts @@ -0,0 +1,920 @@ +import { execFileSync } from "node:child_process"; +import { mkdir, writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import { performance } from "node:perf_hooks"; + +import { + type ClientOrchestrationCommand, + CommandId, + MessageId, + type ModelSelection, + ORCHESTRATION_WS_METHODS, + type OrchestrationEvent, + type ProjectId, + type TerminalEvent, + ThreadId, + type ThreadId as ThreadIdType, + WS_METHODS, +} from "@t3tools/contracts"; +import { PERF_CATALOG_IDS } from "@t3tools/shared/perf/scenarioCatalog"; +import { afterEach, describe, expect, it } from "vitest"; + +import { + type ServerCommandLatencyMeasurement, + startServerPerfHarness, + summarizeCommandLatencyMeasurements, + summarizeRpcLatencySeries, + type ServerPerfHarness, + type ServerRpcLatencySeries, +} from "./serverPerfHarness.ts"; +import type { PerfLatencySample } from "@t3tools/shared/perf/artifact"; + +const CONTROL_PLANE_SAMPLE_COUNT = 4; +const GIT_RPC_SAMPLE_COUNT = 6; +const STREAM_THREAD_COUNT = 5; +const SPAM_THREAD_COUNT = 8; +const TERMINAL_SESSION_COUNT = 3; +const TERMINAL_OUTPUT_LINE_COUNT = 260; +const TERMINAL_OUTPUT_SLEEP_SECONDS = 0.04; +const GIT_BRANCH_COUNT = 240; +const GIT_UNTRACKED_FILE_COUNT = 160; +const DEFAULT_WAIT_TIMEOUT_MS = 45_000; + +interface SubscriptionCleanup { + readonly dispose: () => void; +} + +interface EventWaiter { + readonly description: string; + readonly select: (values: ReadonlyArray) => TResult | null; + readonly resolve: (value: TResult) => void; + readonly reject: (error: Error) => void; + readonly timer: ReturnType; +} + +class BufferedEventFeed implements SubscriptionCleanup { + private readonly values: TValue[] = []; + private readonly waiters = new Set>(); + + push(value: TValue) { + this.values.push(value); + for (const waiter of this.waiters) { + const match = waiter.select(this.values); + if (match === null) { + continue; + } + clearTimeout(waiter.timer); + this.waiters.delete(waiter); + waiter.resolve(match); + } + } + + waitFor( + description: string, + select: (values: ReadonlyArray) => TResult | null, + timeoutMs = DEFAULT_WAIT_TIMEOUT_MS, + ): Promise { + const immediateMatch = select(this.values); + if (immediateMatch !== null) { + return Promise.resolve(immediateMatch); + } + + return new Promise((resolve, reject) => { + const waiter: EventWaiter = { + description, + select, + resolve, + reject, + timer: setTimeout(() => { + this.waiters.delete(waiter as EventWaiter); + reject(new Error(`Timed out waiting for ${description} after ${timeoutMs}ms.`)); + }, timeoutMs), + }; + this.waiters.add(waiter as EventWaiter); + }); + } + + waitForEvent( + description: string, + predicate: (value: TValue) => boolean, + timeoutMs = DEFAULT_WAIT_TIMEOUT_MS, + ): Promise { + return this.waitFor(description, (values) => values.find(predicate) ?? null, timeoutMs); + } + + waitForCount( + description: string, + predicate: (value: TValue) => boolean, + count: number, + timeoutMs = DEFAULT_WAIT_TIMEOUT_MS, + ): Promise> { + return this.waitFor( + description, + (values) => { + const matches = values.filter(predicate); + return matches.length >= count ? matches.slice(0, count) : null; + }, + timeoutMs, + ); + } + + waitForDistinct( + description: string, + predicate: (value: TValue) => boolean, + selectKey: (value: TValue) => TKey, + count: number, + timeoutMs = DEFAULT_WAIT_TIMEOUT_MS, + ): Promise> { + return this.waitFor( + description, + (values) => { + const distinctMatches = new Map(); + for (const value of values) { + if (!predicate(value)) { + continue; + } + const key = selectKey(value); + if (!distinctMatches.has(key)) { + distinctMatches.set(key, value); + } + } + const matches = [...distinctMatches.values()]; + return matches.length >= count ? matches : null; + }, + timeoutMs, + ); + } + + dispose() { + for (const waiter of this.waiters) { + clearTimeout(waiter.timer); + waiter.reject(new Error(`Buffered feed disposed while waiting for ${waiter.description}.`)); + } + this.waiters.clear(); + } +} + +function sleep(delayMs: number): Promise { + return new Promise((resolve) => setTimeout(resolve, delayMs)); +} + +function nowIso(): string { + return new Date().toISOString(); +} + +function runGit(cwd: string, args: ReadonlyArray) { + execFileSync("git", args, { + cwd, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf8", + }); +} + +function makeControlThreadId(label: string): ThreadIdType { + return ThreadId.makeUnsafe(`perf-control-${label}-${crypto.randomUUID()}`); +} + +function makeCommandId(label: string) { + return CommandId.makeUnsafe(`perf-command:${label}:${crypto.randomUUID()}`); +} + +function makeMessageId(label: string) { + return MessageId.makeUnsafe(`perf-message:${label}:${crypto.randomUUID()}`); +} + +async function dispatchCommand(harness: ServerPerfHarness, command: ClientOrchestrationCommand) { + return await harness.rpc.request((client) => + client[ORCHESTRATION_WS_METHODS.dispatchCommand](command), + ); +} + +async function measureCommandLatency(input: { + readonly harness: ServerPerfHarness; + readonly orchestrationEvents: BufferedEventFeed; + readonly command: ClientOrchestrationCommand; + readonly expectedEventType: OrchestrationEvent["type"]; + readonly loadProfile: string; + readonly timeoutMs?: number; + readonly metadata?: Record; +}): Promise { + const startedAt = nowIso(); + const startedAtMs = performance.now(); + const result = await dispatchCommand(input.harness, input.command); + const ackAtMs = performance.now(); + const event = await input.orchestrationEvents.waitForEvent( + `${input.expectedEventType} for ${input.command.type}`, + (candidate) => + candidate.type === input.expectedEventType && + String(candidate.commandId) === String(input.command.commandId), + input.timeoutMs, + ); + const eventAtMs = performance.now(); + + return { + commandType: input.command.type, + loadProfile: input.loadProfile, + startedAt, + dispatchToAckMs: ackAtMs - startedAtMs, + dispatchToEventMs: eventAtMs - startedAtMs, + ackToEventMs: eventAtMs - ackAtMs, + resultSequence: result.sequence, + eventSequence: event.sequence, + ...(input.metadata ? { metadata: input.metadata } : {}), + }; +} + +async function createSupportThread(input: { + readonly harness: ServerPerfHarness; + readonly orchestrationEvents: BufferedEventFeed; + readonly projectId: ProjectId; + readonly modelSelection: ModelSelection; + readonly label: string; +}): Promise { + const threadId = makeControlThreadId(`load-${input.label}`); + const command = { + type: "thread.create", + commandId: makeCommandId(`load-thread-create-${input.label}`), + threadId, + projectId: input.projectId, + title: `Perf Load ${input.label}`, + modelSelection: input.modelSelection, + interactionMode: "default", + runtimeMode: "full-access", + branch: null, + worktreePath: null, + createdAt: nowIso(), + } as const satisfies ClientOrchestrationCommand; + + await dispatchCommand(input.harness, command); + await input.orchestrationEvents.waitForEvent( + `thread.created for load support thread ${input.label}`, + (event) => event.type === "thread.created" && event.payload.threadId === threadId, + ); + return threadId; +} + +async function captureCreateArchiveSamples(input: { + readonly harness: ServerPerfHarness; + readonly orchestrationEvents: BufferedEventFeed; + readonly loadProfile: string; + readonly sampleCount: number; + readonly projectId: ProjectId; + readonly modelSelection: ModelSelection; +}): Promise> { + const samples: ServerCommandLatencyMeasurement[] = []; + + for (let sampleIndex = 0; sampleIndex < input.sampleCount; sampleIndex += 1) { + const threadId = makeControlThreadId(`${input.loadProfile}-${sampleIndex + 1}`); + const createCommand = { + type: "thread.create", + commandId: makeCommandId(`thread-create-${input.loadProfile}-${sampleIndex + 1}`), + threadId, + projectId: input.projectId, + title: `Perf ${input.loadProfile} ${sampleIndex + 1}`, + modelSelection: input.modelSelection, + interactionMode: "default", + runtimeMode: "full-access", + branch: null, + worktreePath: null, + createdAt: nowIso(), + } as const satisfies ClientOrchestrationCommand; + + samples.push( + await measureCommandLatency({ + harness: input.harness, + orchestrationEvents: input.orchestrationEvents, + command: createCommand, + expectedEventType: "thread.created", + loadProfile: input.loadProfile, + metadata: { + sampleIndex: sampleIndex + 1, + threadId, + }, + }), + ); + + const archiveCommand = { + type: "thread.archive", + commandId: makeCommandId(`thread-archive-${input.loadProfile}-${sampleIndex + 1}`), + threadId, + } as const satisfies ClientOrchestrationCommand; + + samples.push( + await measureCommandLatency({ + harness: input.harness, + orchestrationEvents: input.orchestrationEvents, + command: archiveCommand, + expectedEventType: "thread.archived", + loadProfile: input.loadProfile, + metadata: { + sampleIndex: sampleIndex + 1, + threadId, + }, + }), + ); + } + + return samples; +} + +async function ensureStreamingThreads(input: { + readonly harness: ServerPerfHarness; + readonly orchestrationEvents: BufferedEventFeed; + readonly projectId: ProjectId; + readonly modelSelection: ModelSelection; + readonly count: number; +}): Promise> { + const threadIds: ThreadIdType[] = [ + PERF_CATALOG_IDS.burstBase.burstThreadId, + PERF_CATALOG_IDS.burstBase.navigationThreadId, + PERF_CATALOG_IDS.burstBase.fillerThreadId, + ]; + + while (threadIds.length < input.count) { + threadIds.push( + await createSupportThread({ + harness: input.harness, + orchestrationEvents: input.orchestrationEvents, + projectId: input.projectId, + modelSelection: input.modelSelection, + label: `stream-${threadIds.length + 1}`, + }), + ); + } + + return threadIds; +} + +async function startAssistantStreamingLoad(input: { + readonly harness: ServerPerfHarness; + readonly orchestrationEvents: BufferedEventFeed; + readonly threadIds: ReadonlyArray; + readonly label: string; +}): Promise { + await Promise.all( + input.threadIds.map((threadId, index) => + dispatchCommand(input.harness, { + type: "thread.turn.start", + commandId: makeCommandId(`${input.label}-turn-start-${index + 1}`), + threadId, + message: { + messageId: makeMessageId(`${input.label}-turn-${index + 1}`), + role: "user", + text: `Perf load ${input.label} ${index + 1}`, + attachments: [], + }, + runtimeMode: "full-access", + interactionMode: "default", + createdAt: nowIso(), + } as const satisfies ClientOrchestrationCommand), + ), + ); + + const threadIds = new Set(input.threadIds.map(String)); + await input.orchestrationEvents.waitForDistinct( + `assistant streaming output for ${input.label}`, + (event) => + event.type === "thread.message-sent" && + event.payload.role === "assistant" && + event.payload.streaming && + threadIds.has(String(event.payload.threadId)), + (event) => (event.type === "thread.message-sent" ? String(event.payload.threadId) : ""), + input.threadIds.length, + ); +} + +async function startCreateTurnSpamLoad(input: { + readonly harness: ServerPerfHarness; + readonly orchestrationEvents: BufferedEventFeed; + readonly projectId: ProjectId; + readonly modelSelection: ModelSelection; + readonly count: number; + readonly label: string; +}): Promise<{ + readonly threadIds: ReadonlyArray; + readonly done: Promise>; +}> { + const threadIds = Array.from({ length: input.count }, (_, index) => + makeControlThreadId(`${input.label}-${index + 1}`), + ); + const threadIdStrings = new Set(threadIds.map(String)); + + const done = Promise.all( + threadIds.map(async (threadId, index) => { + const createCommand = { + type: "thread.create", + commandId: makeCommandId(`${input.label}-create-${index + 1}`), + threadId, + projectId: input.projectId, + title: `Perf Spam ${index + 1}`, + modelSelection: input.modelSelection, + interactionMode: "default", + runtimeMode: "full-access", + branch: null, + worktreePath: null, + createdAt: nowIso(), + } as const satisfies ClientOrchestrationCommand; + await dispatchCommand(input.harness, createCommand); + await input.orchestrationEvents.waitForEvent( + `thread.created for ${input.label}-${index + 1}`, + (event) => event.type === "thread.created" && event.payload.threadId === threadId, + ); + await dispatchCommand(input.harness, { + type: "thread.turn.start", + commandId: makeCommandId(`${input.label}-turn-start-${index + 1}`), + threadId, + message: { + messageId: makeMessageId(`${input.label}-turn-${index + 1}`), + role: "user", + text: `Perf spam ${input.label} ${index + 1}`, + attachments: [], + }, + runtimeMode: "full-access", + interactionMode: "default", + createdAt: nowIso(), + } as const satisfies ClientOrchestrationCommand); + return threadId; + }), + ); + + await input.orchestrationEvents.waitForCount( + `thread.create spam ${input.label}`, + (event) => + event.type === "thread.created" && threadIdStrings.has(String(event.payload.threadId)), + Math.min(4, input.count), + ); + await input.orchestrationEvents.waitForDistinct( + `assistant spam output ${input.label}`, + (event) => + event.type === "thread.message-sent" && + event.payload.role === "assistant" && + event.payload.streaming && + threadIdStrings.has(String(event.payload.threadId)), + (event) => (event.type === "thread.message-sent" ? String(event.payload.threadId) : ""), + Math.min(4, input.count), + ); + + return { + threadIds, + done, + }; +} + +async function startTerminalLoad(input: { + readonly harness: ServerPerfHarness; + readonly terminalEvents: BufferedEventFeed; + readonly workspaceRoot: string; + readonly threadIds: ReadonlyArray; +}): Promise> { + const sessions = await Promise.all( + input.threadIds.slice(0, TERMINAL_SESSION_COUNT).map(async (threadId, index) => { + const terminalId = `perf-${index + 1}`; + await input.harness.rpc.request((client) => + client[WS_METHODS.terminalOpen]({ + threadId, + terminalId, + cwd: input.workspaceRoot, + cols: 120, + rows: 32, + }), + ); + + const command = [ + "i=1", + `while [ $i -le ${TERMINAL_OUTPUT_LINE_COUNT} ]`, + "do", + ` printf 'perf-terminal-${index + 1}-%03d\\n' "$i"`, + " i=$((i + 1))", + ` sleep ${TERMINAL_OUTPUT_SLEEP_SECONDS.toFixed(2)}`, + "done", + "", + ].join("\n"); + + await input.harness.rpc.request((client) => + client[WS_METHODS.terminalWrite]({ + threadId, + terminalId, + data: command, + }), + ); + + return { + threadId, + terminalId, + }; + }), + ); + + const terminalIds = new Set(sessions.map((session) => session.terminalId)); + await input.terminalEvents.waitForDistinct( + "terminal output load", + (event) => event.type === "output" && terminalIds.has(event.terminalId), + (event) => event.terminalId, + sessions.length, + 30_000, + ); + + return sessions; +} + +async function closeTerminalLoad( + harness: ServerPerfHarness, + sessions: ReadonlyArray<{ readonly threadId: ThreadIdType; readonly terminalId: string }>, +): Promise { + await Promise.all( + sessions.map((session) => + harness.rpc.request((client) => + client[WS_METHODS.terminalClose]({ + threadId: session.threadId, + terminalId: session.terminalId, + deleteHistory: true, + }), + ), + ), + ); +} + +async function seedGitPressure(workspaceRoot: string): Promise { + const untrackedDir = join(workspaceRoot, "perf-git-pressure"); + await mkdir(untrackedDir, { recursive: true }); + await Promise.all( + Array.from({ length: GIT_UNTRACKED_FILE_COUNT }, (_, index) => + writeFile( + join(untrackedDir, `untracked-${(index + 1).toString().padStart(3, "0")}.ts`), + `export const perfFile${index + 1} = ${index + 1};\n`, + "utf8", + ), + ), + ); + + for (let index = 0; index < GIT_BRANCH_COUNT; index += 1) { + runGit(workspaceRoot, ["branch", `perf/latency-${(index + 1).toString().padStart(4, "0")}`]); + } +} + +async function measureRpcLatencySeries(input: { + readonly harness: ServerPerfHarness; + readonly name: string; + readonly loadProfile: string; + readonly iterations: number; + readonly execute: () => Promise; +}): Promise { + const samples: PerfLatencySample[] = []; + + for (let iteration = 0; iteration < input.iterations; iteration += 1) { + const startedAt = nowIso(); + const startedAtMs = performance.now(); + await input.execute(); + const endedAtMs = performance.now(); + samples.push({ + name: `${input.name}-${iteration + 1}`, + durationMs: endedAtMs - startedAtMs, + startedAt, + endedAt: nowIso(), + metadata: { + iteration: iteration + 1, + loadProfile: input.loadProfile, + }, + }); + } + + return { + name: input.name, + loadProfile: input.loadProfile, + summary: summarizeRpcLatencySeries(samples), + samples, + }; +} + +function groupCommandSeries( + samples: ReadonlyArray, + loadProfile: string, +) { + const createSamples = samples.filter((sample) => sample.commandType === "thread.create"); + const archiveSamples = samples.filter((sample) => sample.commandType === "thread.archive"); + + return [ + { + name: "thread.create", + loadProfile, + summary: summarizeCommandLatencyMeasurements(createSamples), + samples: createSamples, + }, + { + name: "thread.archive", + loadProfile, + summary: summarizeCommandLatencyMeasurements(archiveSamples), + samples: archiveSamples, + }, + ] as const; +} + +describe("server perf latency", () => { + const disposables: Array = []; + + afterEach(() => { + for (const disposable of disposables.splice(0)) { + disposable.dispose(); + } + }); + + it("records idle and assistant-stream command latency at the websocket boundary", async () => { + let harness: ServerPerfHarness | null = null; + + try { + harness = await startServerPerfHarness({ + suite: "server-latency-critical-commands", + seedScenarioId: "burst_base", + providerScenarioId: "parallel_assistant_stream", + }); + const runStartedAt = nowIso(); + + const burstProject = harness.seededState.snapshot.projects.find( + (project) => project.id === PERF_CATALOG_IDS.burstBase.burstProjectId, + ); + expect(burstProject).toBeTruthy(); + const modelSelection = burstProject!.defaultModelSelection; + expect(modelSelection).toBeTruthy(); + + const orchestrationEvents = new BufferedEventFeed(); + disposables.push(orchestrationEvents); + disposables.push({ + dispose: harness.rpc.subscribe( + (client) => client[WS_METHODS.subscribeOrchestrationDomainEvents]({}), + (event) => orchestrationEvents.push(event), + ), + }); + + await sleep(250); + + const idleSamples = await captureCreateArchiveSamples({ + harness, + orchestrationEvents, + loadProfile: "idle", + sampleCount: CONTROL_PLANE_SAMPLE_COUNT, + projectId: burstProject!.id, + modelSelection: modelSelection!, + }); + + const streamingThreadIds = await ensureStreamingThreads({ + harness, + orchestrationEvents, + projectId: burstProject!.id, + modelSelection: modelSelection!, + count: STREAM_THREAD_COUNT, + }); + await startAssistantStreamingLoad({ + harness, + orchestrationEvents, + threadIds: streamingThreadIds, + label: "stream-control-plane", + }); + + const streamingSamples = await captureCreateArchiveSamples({ + harness, + orchestrationEvents, + loadProfile: "assistant-stream-5x", + sampleCount: CONTROL_PLANE_SAMPLE_COUNT, + projectId: burstProject!.id, + modelSelection: modelSelection!, + }); + const spamLoad = await startCreateTurnSpamLoad({ + harness, + orchestrationEvents, + projectId: burstProject!.id, + modelSelection: modelSelection!, + count: SPAM_THREAD_COUNT, + label: "create-turn-spam-8x", + }); + const spamSamples = await captureCreateArchiveSamples({ + harness, + orchestrationEvents, + loadProfile: "create-turn-spam-8x", + sampleCount: CONTROL_PLANE_SAMPLE_COUNT, + projectId: burstProject!.id, + modelSelection: modelSelection!, + }); + await spamLoad.done; + + const commandLatency = [ + ...groupCommandSeries(idleSamples, "idle"), + ...groupCommandSeries(streamingSamples, "assistant-stream-5x"), + ...groupCommandSeries(spamSamples, "create-turn-spam-8x"), + ]; + const result = await harness.finishRun({ + artifactBasename: "control-plane-stream-baseline", + artifact: { + suite: "server-latency-critical-commands", + scenarioId: "burst_base_control_plane_stream_baseline", + startedAt: runStartedAt, + completedAt: nowIso(), + commandLatency, + rpcLatency: [], + metadata: { + sampleCountPerProfile: CONTROL_PLANE_SAMPLE_COUNT, + streamingThreadCount: STREAM_THREAD_COUNT, + spamThreadCount: SPAM_THREAD_COUNT, + }, + }, + }); + harness = null; + + expect(result.artifact.commandLatency).toHaveLength(6); + for (const series of result.artifact.commandLatency) { + expect(series.samples).toHaveLength(CONTROL_PLANE_SAMPLE_COUNT); + expect(series.summary.dispatchToEventMs.p50Ms).not.toBeNull(); + expect(series.summary.dispatchToEventMs.maxMs).toBeLessThan(30_000); + } + } finally { + await harness?.dispose(); + } + }, 180_000); + + it("records command and git rpc latency under terminal and mixed server load", async () => { + let harness: ServerPerfHarness | null = null; + let terminalSessions: ReadonlyArray<{ + readonly threadId: ThreadIdType; + readonly terminalId: string; + }> = []; + + try { + harness = await startServerPerfHarness({ + suite: "server-latency-critical-commands", + seedScenarioId: "burst_base", + providerScenarioId: "parallel_assistant_stream", + }); + const runStartedAt = nowIso(); + + const burstProject = harness.seededState.snapshot.projects.find( + (project) => project.id === PERF_CATALOG_IDS.burstBase.burstProjectId, + ); + expect(burstProject).toBeTruthy(); + const modelSelection = burstProject!.defaultModelSelection; + expect(modelSelection).toBeTruthy(); + + const orchestrationEvents = new BufferedEventFeed(); + const terminalEvents = new BufferedEventFeed(); + disposables.push(orchestrationEvents, terminalEvents); + disposables.push({ + dispose: harness.rpc.subscribe( + (client) => client[WS_METHODS.subscribeOrchestrationDomainEvents]({}), + (event) => orchestrationEvents.push(event), + ), + }); + disposables.push({ + dispose: harness.rpc.subscribe( + (client) => client[WS_METHODS.subscribeTerminalEvents]({}), + (event) => terminalEvents.push(event), + ), + }); + + await sleep(250); + await seedGitPressure(harness.seededState.workspaceRoot); + + const gitLatencyIdle = await Promise.all([ + measureRpcLatencySeries({ + harness, + name: "git.status", + loadProfile: "idle-repo-pressure", + iterations: GIT_RPC_SAMPLE_COUNT, + execute: () => + harness!.rpc.request((client) => + client[WS_METHODS.gitStatus]({ + cwd: harness!.seededState.workspaceRoot, + }), + ), + }), + measureRpcLatencySeries({ + harness, + name: "git.listBranches", + loadProfile: "idle-repo-pressure", + iterations: GIT_RPC_SAMPLE_COUNT, + execute: () => + harness!.rpc.request((client) => + client[WS_METHODS.gitListBranches]({ + cwd: harness!.seededState.workspaceRoot, + }), + ), + }), + ]); + + const streamingThreadIds = await ensureStreamingThreads({ + harness, + orchestrationEvents, + projectId: burstProject!.id, + modelSelection: modelSelection!, + count: STREAM_THREAD_COUNT, + }); + + terminalSessions = await startTerminalLoad({ + harness, + terminalEvents, + workspaceRoot: harness.seededState.workspaceRoot, + threadIds: streamingThreadIds, + }); + + const terminalSamples = await captureCreateArchiveSamples({ + harness, + orchestrationEvents, + loadProfile: "terminal-output-3x", + sampleCount: CONTROL_PLANE_SAMPLE_COUNT, + projectId: burstProject!.id, + modelSelection: modelSelection!, + }); + + await sleep(6_500); + await startAssistantStreamingLoad({ + harness, + orchestrationEvents, + threadIds: streamingThreadIds, + label: "mixed-load", + }); + await terminalEvents.waitForCount( + "continued terminal output during mixed load", + (event) => + event.type === "output" && + terminalSessions.some((session) => session.terminalId === event.terminalId), + TERMINAL_SESSION_COUNT, + 30_000, + ); + + const mixedSamples = await captureCreateArchiveSamples({ + harness, + orchestrationEvents, + loadProfile: "mixed-stream-terminal-git", + sampleCount: CONTROL_PLANE_SAMPLE_COUNT, + projectId: burstProject!.id, + modelSelection: modelSelection!, + }); + + const gitLatencyMixed = await Promise.all([ + measureRpcLatencySeries({ + harness, + name: "git.status", + loadProfile: "mixed-stream-terminal-git", + iterations: GIT_RPC_SAMPLE_COUNT, + execute: () => + harness!.rpc.request((client) => + client[WS_METHODS.gitStatus]({ + cwd: harness!.seededState.workspaceRoot, + }), + ), + }), + measureRpcLatencySeries({ + harness, + name: "git.listBranches", + loadProfile: "mixed-stream-terminal-git", + iterations: GIT_RPC_SAMPLE_COUNT, + execute: () => + harness!.rpc.request((client) => + client[WS_METHODS.gitListBranches]({ + cwd: harness!.seededState.workspaceRoot, + }), + ), + }), + ]); + + await closeTerminalLoad(harness, terminalSessions); + terminalSessions = []; + + const result = await harness.finishRun({ + artifactBasename: "terminal-mixed-git-baseline", + artifact: { + suite: "server-latency-critical-commands", + scenarioId: "burst_base_terminal_mixed_git_baseline", + startedAt: runStartedAt, + completedAt: nowIso(), + commandLatency: [ + ...groupCommandSeries(terminalSamples, "terminal-output-3x"), + ...groupCommandSeries(mixedSamples, "mixed-stream-terminal-git"), + ], + rpcLatency: [...gitLatencyIdle, ...gitLatencyMixed], + metadata: { + sampleCountPerProfile: CONTROL_PLANE_SAMPLE_COUNT, + gitRpcSampleCount: GIT_RPC_SAMPLE_COUNT, + branchCount: GIT_BRANCH_COUNT, + untrackedFileCount: GIT_UNTRACKED_FILE_COUNT, + terminalSessionCount: TERMINAL_SESSION_COUNT, + }, + }, + }); + harness = null; + + expect(result.artifact.commandLatency).toHaveLength(4); + expect(result.artifact.rpcLatency).toHaveLength(4); + for (const series of result.artifact.commandLatency) { + expect(series.samples).toHaveLength(CONTROL_PLANE_SAMPLE_COUNT); + expect(series.summary.dispatchToEventMs.maxMs).toBeLessThan(30_000); + } + for (const series of result.artifact.rpcLatency) { + expect(series.samples).toHaveLength(GIT_RPC_SAMPLE_COUNT); + expect(series.summary.maxMs).toBeLessThan(30_000); + } + } finally { + if (harness && terminalSessions.length > 0) { + await closeTerminalLoad(harness, terminalSessions).catch(() => undefined); + } + await harness?.dispose(); + } + }, 180_000); +}); diff --git a/apps/server/integration/perf/serverPerfHarness.ts b/apps/server/integration/perf/serverPerfHarness.ts new file mode 100644 index 0000000000..1f3897162f --- /dev/null +++ b/apps/server/integration/perf/serverPerfHarness.ts @@ -0,0 +1,393 @@ +import { spawn, type ChildProcess } from "node:child_process"; +import { mkdir, rm, writeFile } from "node:fs/promises"; +import { createServer } from "node:net"; +import { join, resolve } from "node:path"; +import { performance } from "node:perf_hooks"; +import { fileURLToPath } from "node:url"; + +import * as NodeSocket from "@effect/platform-node/NodeSocket"; +import { + type OrchestrationEvent, + type TerminalEvent, + WS_METHODS, + WsRpcGroup, +} from "@t3tools/contracts"; +import { + Effect, + Exit, + Layer, + ManagedRuntime, + Scope, + Stream, + type Scope as ScopeService, +} from "effect"; +import { RpcClient, RpcSerialization } from "effect/unstable/rpc"; +import { + summarizeLatencySamples, + summarizeLatencyValues, + type PerfLatencySample, + type PerfLatencySummary, + writeJsonArtifact, +} from "@t3tools/shared/perf/artifact"; +import type { + PerfProviderScenarioId, + PerfSeedScenarioId, +} from "@t3tools/shared/perf/scenarioCatalog"; +import { seedPerfState, type PerfSeededState } from "./seedPerfState.ts"; + +const repoRoot = fileURLToPath(new URL("../../../../", import.meta.url)); +const PERF_ARTIFACT_DIR_ENV = "T3CODE_PERF_ARTIFACT_DIR"; +const PERF_PROVIDER_ENV = "T3CODE_PERF_PROVIDER"; +const PERF_SCENARIO_ENV = "T3CODE_PERF_SCENARIO"; +const AUTO_BOOTSTRAP_PROJECT_ENV = "T3CODE_AUTO_BOOTSTRAP_PROJECT_FROM_CWD"; + +const makeWsRpcClient = RpcClient.make(WsRpcGroup); +type WsRpcClient = + typeof makeWsRpcClient extends Effect.Effect ? Client : never; + +export interface ServerCommandLatencyMeasurement { + readonly commandType: string; + readonly loadProfile: string; + readonly startedAt: string; + readonly dispatchToAckMs: number; + readonly dispatchToEventMs: number; + readonly ackToEventMs: number; + readonly resultSequence: number; + readonly eventSequence: number; + readonly metadata?: Record; +} + +export interface ServerCommandLatencySummary { + readonly count: number; + readonly dispatchToAckMs: PerfLatencySummary; + readonly dispatchToEventMs: PerfLatencySummary; + readonly ackToEventMs: PerfLatencySummary; +} + +export interface ServerCommandLatencySeries { + readonly name: string; + readonly loadProfile: string; + readonly summary: ServerCommandLatencySummary; + readonly samples: ReadonlyArray; +} + +export interface ServerRpcLatencySeries { + readonly name: string; + readonly loadProfile: string; + readonly summary: PerfLatencySummary; + readonly samples: ReadonlyArray; +} + +export interface ServerPerfRunArtifact { + readonly suite: string; + readonly scenarioId: string; + readonly startedAt: string; + readonly completedAt: string; + readonly commandLatency: ReadonlyArray; + readonly rpcLatency: ReadonlyArray; + readonly metadata?: Record; +} + +interface StartServerPerfHarnessOptions { + readonly suite: string; + readonly seedScenarioId: PerfSeedScenarioId; + readonly providerScenarioId?: PerfProviderScenarioId; +} + +interface FinishServerPerfRunOptions { + readonly artifactBasename?: string; + readonly artifact: ServerPerfRunArtifact; +} + +export interface ServerPerfHarness { + readonly seededState: PerfSeededState; + readonly wsUrl: string; + readonly artifactDir: string; + readonly rpc: PerfWsRpcClient; + readonly finishRun: (options: FinishServerPerfRunOptions) => Promise<{ + readonly artifactPath: string; + readonly artifact: ServerPerfRunArtifact; + }>; + readonly dispose: () => Promise; +} + +const wsRpcProtocolLayer = (wsUrl: string) => + RpcClient.layerProtocolSocket().pipe( + Layer.provide(NodeSocket.layerWebSocket(wsUrl)), + Layer.provide(RpcSerialization.layerJson), + ); + +async function pickFreePort(): Promise { + return await new Promise((resolvePort, reject) => { + const server = createServer(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + reject(new Error("Unable to resolve a free localhost port.")); + return; + } + const { port } = address; + server.close((closeError) => { + if (closeError) { + reject(closeError); + return; + } + resolvePort(port); + }); + }); + }); +} + +async function stopChildProcess(process: ChildProcess): Promise { + if (process.exitCode !== null) { + return; + } + + process.kill("SIGTERM"); + const exited = await new Promise((resolveExited) => { + const timer = setTimeout(() => resolveExited(false), 5_000); + process.once("exit", () => { + clearTimeout(timer); + resolveExited(true); + }); + }); + + if (!exited && process.exitCode === null) { + process.kill("SIGKILL"); + await new Promise((resolveExited) => { + process.once("exit", () => resolveExited()); + }); + } +} + +async function ensureArtifactDir(suite: string, scenarioId: string): Promise { + const baseArtifactDir = resolve( + process.env[PERF_ARTIFACT_DIR_ENV] ?? join(repoRoot, "artifacts/perf/server"), + ); + const runId = `${suite}-${scenarioId}-${Date.now().toString()}`; + const artifactDir = join(baseArtifactDir, runId); + await mkdir(artifactDir, { recursive: true }); + return artifactDir; +} + +async function writeServerLogs( + artifactDir: string, + stdout: string, + stderr: string, + basename: string, +): Promise { + await mkdir(artifactDir, { recursive: true }); + await Promise.all([ + writeFile(join(artifactDir, `${basename}.server.stdout.log`), stdout, "utf8"), + writeFile(join(artifactDir, `${basename}.server.stderr.log`), stderr, "utf8"), + ]); +} + +function buildPerfServerEnv( + baseEnv: NodeJS.ProcessEnv, + providerScenarioId?: PerfProviderScenarioId, +): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { + ...baseEnv, + [AUTO_BOOTSTRAP_PROJECT_ENV]: "false", + }; + + if (!providerScenarioId) { + delete env[PERF_PROVIDER_ENV]; + delete env[PERF_SCENARIO_ENV]; + return env; + } + + return { + ...env, + [PERF_PROVIDER_ENV]: "1", + [PERF_SCENARIO_ENV]: providerScenarioId, + }; +} + +export class PerfWsRpcClient { + private readonly runtime: ManagedRuntime.ManagedRuntime; + private readonly clientScope: Scope.Closeable; + private readonly clientPromise: Promise; + private disposed = false; + + constructor(private readonly wsUrl: string) { + this.runtime = ManagedRuntime.make(wsRpcProtocolLayer(wsUrl)); + this.clientScope = this.runtime.runSync(Scope.make()); + this.clientPromise = this.runtime.runPromise(Scope.provide(this.clientScope)(makeWsRpcClient)); + } + + async request( + execute: (client: WsRpcClient) => Effect.Effect, + ): Promise { + if (this.disposed) { + throw new Error(`WebSocket RPC client disposed for ${this.wsUrl}`); + } + + const client = await this.clientPromise; + return await this.runtime.runPromise(Effect.suspend(() => execute(client))); + } + + subscribe( + connect: (client: WsRpcClient) => Stream.Stream, + listener: (value: TValue) => void, + ): () => void { + if (this.disposed) { + return () => undefined; + } + + let active = true; + const cancel = this.runtime.runCallback( + Effect.promise(() => this.clientPromise).pipe( + Effect.flatMap((client) => + Stream.runForEach(connect(client), (value) => + Effect.sync(() => { + if (!active) { + return; + } + listener(value); + }), + ), + ), + ), + ); + + return () => { + active = false; + cancel(); + }; + } + + async dispose(): Promise { + if (this.disposed) { + return; + } + this.disposed = true; + await this.runtime.runPromise(Scope.close(this.clientScope, Exit.void)); + await this.runtime.dispose(); + } +} + +async function waitForRpcReady(wsUrl: string, process: ChildProcess): Promise { + const startedAtMs = performance.now(); + const timeoutMs = 45_000; + + while (performance.now() - startedAtMs < timeoutMs) { + if (process.exitCode !== null) { + throw new Error(`Perf server exited early with code ${process.exitCode}.`); + } + + const client = new PerfWsRpcClient(wsUrl); + try { + await client.request((rpcClient) => rpcClient[WS_METHODS.serverGetSettings]({})); + await client.dispose(); + return; + } catch { + await client.dispose().catch(() => undefined); + await new Promise((resolveDelay) => setTimeout(resolveDelay, 200)); + } + } + + throw new Error(`Timed out waiting for websocket readiness at ${wsUrl}.`); +} + +export function summarizeCommandLatencyMeasurements( + samples: ReadonlyArray, +): ServerCommandLatencySummary { + return { + count: samples.length, + dispatchToAckMs: summarizeLatencyValues(samples.map((sample) => sample.dispatchToAckMs)), + dispatchToEventMs: summarizeLatencyValues(samples.map((sample) => sample.dispatchToEventMs)), + ackToEventMs: summarizeLatencyValues(samples.map((sample) => sample.ackToEventMs)), + }; +} + +export function summarizeRpcLatencySeries( + samples: ReadonlyArray, +): PerfLatencySummary { + return summarizeLatencySamples(samples); +} + +export async function startServerPerfHarness( + options: StartServerPerfHarnessOptions, +): Promise { + const seededState = await seedPerfState(options.seedScenarioId); + const artifactDir = await ensureArtifactDir(options.suite, options.seedScenarioId); + const port = await pickFreePort(); + const wsUrl = `ws://127.0.0.1:${port}/ws`; + const env = buildPerfServerEnv(process.env, options.providerScenarioId); + + let stdoutBuffer = ""; + let stderrBuffer = ""; + let finished = false; + const serverProcess = spawn( + "bun", + [ + "run", + "apps/server/src/bin.ts", + "--mode", + "web", + "--host", + "127.0.0.1", + "--port", + `${port}`, + "--base-dir", + seededState.baseDir, + "--no-browser", + ], + { + cwd: repoRoot, + env, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + serverProcess.stdout?.on("data", (chunk) => { + stdoutBuffer += chunk.toString(); + }); + serverProcess.stderr?.on("data", (chunk) => { + stderrBuffer += chunk.toString(); + }); + + const cleanup = async () => { + if (finished) { + return; + } + finished = true; + await stopChildProcess(serverProcess); + await rm(seededState.runParentDir, { recursive: true, force: true }); + }; + + try { + await waitForRpcReady(wsUrl, serverProcess); + const rpc = new PerfWsRpcClient(wsUrl); + + return { + seededState, + wsUrl, + artifactDir, + rpc, + finishRun: async ({ artifactBasename, artifact }) => { + const basename = artifactBasename ?? `${artifact.suite}-${artifact.scenarioId}`; + await rpc.dispose(); + await cleanup(); + await writeServerLogs(artifactDir, stdoutBuffer, stderrBuffer, basename); + const artifactPath = join(artifactDir, `${basename}.json`); + await writeJsonArtifact(artifactPath, artifact); + return { + artifactPath, + artifact, + }; + }, + dispose: async () => { + await rpc.dispose().catch(() => undefined); + await cleanup(); + }, + }; + } catch (error) { + await cleanup(); + throw error; + } +} + +export type { OrchestrationEvent, ScopeService, TerminalEvent, WsRpcClient }; diff --git a/apps/server/package.json b/apps/server/package.json index e59c7c208c..a86666ce2c 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -20,7 +20,8 @@ "start": "node dist/bin.mjs", "prepare": "effect-language-service patch", "typecheck": "tsc --noEmit", - "test": "vitest run" + "test": "vitest run", + "test:perf": "vitest run integration/perf" }, "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.77", diff --git a/apps/server/scripts/seedPerfState.ts b/apps/server/scripts/seedPerfState.ts new file mode 100644 index 0000000000..891b0f1a9e --- /dev/null +++ b/apps/server/scripts/seedPerfState.ts @@ -0,0 +1,52 @@ +import { join } from "node:path"; + +import { seedPerfState } from "../integration/perf/seedPerfState.ts"; +import { getPerfSeedScenario } from "@t3tools/shared/perf/scenarioCatalog"; + +const PERF_SEED_JSON_START = "__T3_PERF_SEED_JSON_START__"; +const PERF_SEED_JSON_END = "__T3_PERF_SEED_JSON_END__"; +const scenarioId = process.argv[2]; + +if (scenarioId !== "large_threads" && scenarioId !== "burst_base") { + console.error(`Expected a perf seed scenario id, received '${scenarioId ?? ""}'.`); + process.exit(1); +} + +const seeded = await seedPerfState(scenarioId); +const scenario = getPerfSeedScenario(scenarioId); +const scenarioProjectById = new Map(scenario.projects.map((project) => [project.id, project])); +const scenarioThreadById = new Map(scenario.threads.map((thread) => [thread.id, thread])); +const projectById = new Map(seeded.snapshot.projects.map((project) => [project.id, project])); +const payload = JSON.stringify( + { + scenarioId: seeded.scenarioId, + runParentDir: seeded.runParentDir, + baseDir: seeded.baseDir, + workspaceRoot: seeded.workspaceRoot, + projectTitle: seeded.snapshot.projects[0]?.title ?? null, + projectSummaries: seeded.snapshot.projects.map((project) => ({ + id: project.id, + title: project.title, + workspaceRoot: + scenarioProjectById.get(project.id)?.workspaceDirectoryName !== undefined + ? join(seeded.baseDir, scenarioProjectById.get(project.id)?.workspaceDirectoryName ?? "") + : project.workspaceRoot, + threadCount: seeded.snapshot.threads.filter((thread) => thread.projectId === project.id) + .length, + })), + threadSummaries: seeded.snapshot.threads.map((thread) => ({ + id: thread.id, + projectId: thread.projectId, + projectTitle: projectById.get(thread.projectId)?.title ?? null, + title: thread.title, + turnCount: scenarioThreadById.get(thread.id)?.turnCount ?? null, + messageCount: thread.messages.length, + activityCount: thread.activities.length, + proposedPlanCount: thread.proposedPlans.length, + checkpointCount: thread.checkpoints.length, + })), + }, + null, + 2, +); +process.stdout.write(`${PERF_SEED_JSON_START}\n${payload}\n${PERF_SEED_JSON_END}\n`); diff --git a/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts b/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts index 781a0025e2..655342fcff 100644 --- a/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts +++ b/apps/server/src/orchestration/Layers/CheckpointReactor.test.ts @@ -95,7 +95,9 @@ function createProviderServiceHarness( listSessions, getCapabilities: () => Effect.succeed({ sessionModelSwitch: "in-session" }), rollbackConversation, - streamEvents: Stream.fromPubSub(runtimeEventPubSub), + get streamEvents() { + return Stream.fromPubSub(runtimeEventPubSub); + }, }; const emit = (event: LegacyProviderRuntimeEvent): void => { diff --git a/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts b/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts index 506d6d2864..227315da87 100644 --- a/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderCommandReactor.test.ts @@ -207,7 +207,9 @@ describe("ProviderCommandReactor", () => { sessionModelSwitch: input?.sessionModelSwitch ?? "in-session", }), rollbackConversation: () => unsupported(), - streamEvents: Stream.fromPubSub(runtimeEventPubSub), + get streamEvents() { + return Stream.fromPubSub(runtimeEventPubSub); + }, }; const orchestrationLayer = OrchestrationEngineLive.pipe( diff --git a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts index 529eae2444..89e4061a19 100644 --- a/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts +++ b/apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts @@ -97,7 +97,9 @@ function createProviderServiceHarness() { listSessions: () => Effect.succeed([...runtimeSessions]), getCapabilities: () => Effect.succeed({ sessionModelSwitch: "in-session" }), rollbackConversation: () => unsupported(), - streamEvents: Stream.fromPubSub(runtimeEventPubSub), + get streamEvents() { + return Stream.fromPubSub(runtimeEventPubSub); + }, }; const setSession = (session: ProviderSession): void => { diff --git a/apps/server/src/orchestration/Layers/RuntimeReceiptBus.ts b/apps/server/src/orchestration/Layers/RuntimeReceiptBus.ts index 56c526c08b..e314b5df69 100644 --- a/apps/server/src/orchestration/Layers/RuntimeReceiptBus.ts +++ b/apps/server/src/orchestration/Layers/RuntimeReceiptBus.ts @@ -11,7 +11,9 @@ const makeRuntimeReceiptBus = Effect.gen(function* () { return { publish: (receipt) => PubSub.publish(pubSub, receipt).pipe(Effect.asVoid), - stream: Stream.fromPubSub(pubSub), + get stream() { + return Stream.fromPubSub(pubSub); + }, } satisfies RuntimeReceiptBusShape; }); diff --git a/apps/server/src/perf/PerfProviderAdapter.test.ts b/apps/server/src/perf/PerfProviderAdapter.test.ts new file mode 100644 index 0000000000..16c56f88e4 --- /dev/null +++ b/apps/server/src/perf/PerfProviderAdapter.test.ts @@ -0,0 +1,188 @@ +import { afterEach, describe, expect, it } from "vitest"; +import { Effect, Stream } from "effect"; +import { PERF_CATALOG_IDS } from "@t3tools/shared/perf/scenarioCatalog"; + +import { makePerfProviderAdapter } from "./PerfProviderAdapter.ts"; + +const PERF_SCENARIO_ENV = "T3CODE_PERF_SCENARIO"; +const STREAM_SAMPLE_EVENT_COUNT = 96; + +describe("PerfProviderAdapter", () => { + const previousScenarioEnv = process.env[PERF_SCENARIO_ENV]; + + afterEach(() => { + if (previousScenarioEnv === undefined) { + delete process.env[PERF_SCENARIO_ENV]; + return; + } + process.env[PERF_SCENARIO_ENV] = previousScenarioEnv; + }); + + it("emits canonical runtime events for the dense assistant stream scenario", async () => { + process.env[PERF_SCENARIO_ENV] = "dense_assistant_stream"; + const adapter = await Effect.runPromise(makePerfProviderAdapter); + const threadId = PERF_CATALOG_IDS.burstBase.burstThreadId; + + await Effect.runPromise( + adapter.startSession({ + threadId, + provider: "codex", + runtimeMode: "full-access", + }), + ); + + const firstEventsPromise = Effect.runPromise( + Stream.runCollect(Stream.take(adapter.streamEvents, STREAM_SAMPLE_EVENT_COUNT)), + ); + await Effect.runPromise( + adapter.sendTurn({ + threadId, + input: "exercise the dense perf scenario", + attachments: [], + }), + ); + + const firstEvents = Array.from(await firstEventsPromise); + expect(firstEvents.filter((event) => event.type === "turn.started")).toHaveLength(3); + expect(new Set(firstEvents.slice(0, 18).map((event) => String(event.threadId)))).toEqual( + new Set([ + String(threadId), + String(PERF_CATALOG_IDS.burstBase.navigationThreadId), + String(PERF_CATALOG_IDS.burstBase.fillerThreadId), + ]), + ); + expect( + firstEvents.some( + (event) => + event.threadId === PERF_CATALOG_IDS.burstBase.navigationThreadId && + event.type === "content.delta" && + event.payload.delta.includes("Navigation lane"), + ), + ).toBe(true); + expect( + firstEvents.some( + (event) => + event.threadId === threadId && + event.type === "item.completed" && + event.payload.itemType === "assistant_message", + ), + ).toBe(true); + expect( + firstEvents.some( + (event) => + event.threadId === PERF_CATALOG_IDS.burstBase.fillerThreadId && + event.type === "item.updated" && + event.payload.itemType === "command_execution", + ), + ).toBe(true); + + const firstBurstFollowupCompletionIndex = firstEvents.findIndex( + (event) => + event.threadId === threadId && + event.type === "item.completed" && + event.payload.itemType === "assistant_message" && + String(event.itemId ?? "").includes("followup"), + ); + const burstWorklogIdsBeforeFollowup = new Set( + firstEvents + .slice(0, firstBurstFollowupCompletionIndex) + .filter( + (event) => + event.threadId === threadId && + event.type === "item.started" && + event.payload.itemType === "command_execution", + ) + .map((event) => String(event.itemId)), + ); + const burstAssistantMessageLengths = firstEvents + .flatMap((event) => { + if ( + event.threadId !== threadId || + event.type !== "item.completed" || + event.payload.itemType !== "assistant_message" || + event.payload.detail === undefined + ) { + return []; + } + return [event.payload.detail.length]; + }) + .slice(0, 4); + + expect(firstBurstFollowupCompletionIndex).toBeGreaterThan(0); + expect(burstWorklogIdsBeforeFollowup.size).toBeGreaterThanOrEqual(3); + expect(new Set(burstAssistantMessageLengths).size).toBeGreaterThan(1); + }); + + it("assigns fresh runtime ids when the same burst thread is sent twice", async () => { + process.env[PERF_SCENARIO_ENV] = "dense_assistant_stream"; + const adapter = await Effect.runPromise(makePerfProviderAdapter); + const threadId = PERF_CATALOG_IDS.burstBase.burstThreadId; + + await Effect.runPromise( + adapter.startSession({ + threadId, + provider: "codex", + runtimeMode: "full-access", + }), + ); + + const firstEventsPromise = Effect.runPromise( + Stream.runCollect(Stream.take(adapter.streamEvents, STREAM_SAMPLE_EVENT_COUNT)), + ); + await Effect.runPromise( + adapter.sendTurn({ + threadId, + input: "first dense perf pass", + attachments: [], + }), + ); + + const firstEvents = Array.from(await firstEventsPromise); + + const secondEventsPromise = Effect.runPromise( + Stream.runCollect(Stream.take(adapter.streamEvents, STREAM_SAMPLE_EVENT_COUNT)), + ); + await Effect.runPromise( + adapter.sendTurn({ + threadId, + input: "second dense perf pass", + attachments: [], + }), + ); + + const secondEvents = Array.from(await secondEventsPromise); + + const firstNavigationTurnStarted = firstEvents.find( + (event) => + event.threadId === PERF_CATALOG_IDS.burstBase.navigationThreadId && + event.type === "turn.started", + ); + const secondNavigationTurnStarted = secondEvents.find( + (event) => + event.threadId === PERF_CATALOG_IDS.burstBase.navigationThreadId && + event.type === "turn.started", + ); + const firstBurstAssistantCompletion = firstEvents.find( + (event) => + event.threadId === threadId && + event.type === "item.completed" && + event.payload.itemType === "assistant_message", + ); + const secondBurstAssistantCompletion = secondEvents.find( + (event) => + event.threadId === threadId && + event.type === "item.completed" && + event.payload.itemType === "assistant_message", + ); + const secondRunNamespaceSuffix = `--perf-run-${String(threadId)}-0002`; + + expect(firstNavigationTurnStarted?.turnId).toBe(PERF_CATALOG_IDS.provider.navigationLiveTurnId); + expect(firstBurstAssistantCompletion?.itemId).toBeDefined(); + expect(secondNavigationTurnStarted?.turnId).toBeDefined(); + expect(secondBurstAssistantCompletion?.itemId).toBeDefined(); + expect(secondNavigationTurnStarted?.turnId).not.toBe(firstNavigationTurnStarted?.turnId); + expect(secondBurstAssistantCompletion?.itemId).not.toBe(firstBurstAssistantCompletion?.itemId); + expect(String(secondNavigationTurnStarted?.turnId)).toContain(secondRunNamespaceSuffix); + expect(String(secondBurstAssistantCompletion?.itemId)).toContain(secondRunNamespaceSuffix); + }); +}); diff --git a/apps/server/src/perf/PerfProviderAdapter.ts b/apps/server/src/perf/PerfProviderAdapter.ts new file mode 100644 index 0000000000..bc28a929d7 --- /dev/null +++ b/apps/server/src/perf/PerfProviderAdapter.ts @@ -0,0 +1,456 @@ +import { + EventId, + RuntimeItemId, + RuntimeRequestId, + RuntimeSessionId, + TurnId, + type ApprovalRequestId, + type ProviderApprovalDecision, + type ProviderRuntimeEvent, + type ProviderSendTurnInput, + type ProviderSession, + type ProviderSessionStartInput, + type ProviderTurnStartResult, + type ProviderUserInputAnswers, + type ThreadId, +} from "@t3tools/contracts"; +import { Effect, Queue, Stream } from "effect"; + +import { + getPerfProviderScenario, + type PerfProviderScenario, + type TimedFixtureProviderRuntimeEvent, +} from "@t3tools/shared/perf/scenarioCatalog"; +import { + ProviderAdapterSessionNotFoundError, + ProviderAdapterValidationError, + type ProviderAdapterError, +} from "../provider/Errors.ts"; +import type { + ProviderAdapterShape, + ProviderThreadSnapshot, + ProviderThreadTurnSnapshot, +} from "../provider/Services/ProviderAdapter.ts"; +import { getPerfProviderScenarioId } from "./config.ts"; + +interface PerfSessionState { + session: ProviderSession; + snapshot: ProviderThreadSnapshot; + turnCount: number; + pendingTimers: Set>; +} + +function nowIso(): string { + return new Date().toISOString(); +} + +function sessionNotFound(threadId: ThreadId): ProviderAdapterSessionNotFoundError { + return new ProviderAdapterSessionNotFoundError({ + provider: "codex", + threadId: String(threadId), + }); +} + +function resolvePerfScenario(inputText: string | undefined): PerfProviderScenario { + const scenarioId = getPerfProviderScenarioId(); + if (scenarioId) { + return getPerfProviderScenario(scenarioId); + } + + const trimmedInput = inputText?.trim() || "perf request"; + return { + id: "dense_assistant_stream", + provider: "codex", + sentinelText: `PERF_STREAM_SENTINEL:fallback:${trimmedInput}`, + totalDurationMs: 48, + events: [ + { + delayMs: 0, + type: "turn.started", + payload: { + model: "gpt-5.4", + }, + }, + { + delayMs: 16, + type: "content.delta", + payload: { + streamKind: "assistant_text", + delta: `Perf fallback response for: ${trimmedInput}. `, + }, + }, + { + delayMs: 32, + type: "content.delta", + payload: { + streamKind: "assistant_text", + delta: `PERF_STREAM_SENTINEL:fallback:${trimmedInput}`, + }, + }, + { + delayMs: 48, + type: "turn.completed", + payload: { + state: "completed", + }, + }, + ], + }; +} + +function toIdleSession(session: ProviderSession, updatedAt: string): ProviderSession { + const { activeTurnId: _activeTurnId, ...rest } = session; + return { + ...rest, + status: "ready", + updatedAt, + }; +} + +function namespacePerfFixtureId( + baseId: string, + sourceThreadId: ThreadId, + runIndex: number, +): string { + return `${baseId}--perf-run-${String(sourceThreadId)}-${runIndex.toString().padStart(4, "0")}`; +} + +function buildRuntimeEvent(input: { + readonly template: TimedFixtureProviderRuntimeEvent; + readonly threadId: ThreadId; + readonly turnId: TurnId; + readonly startedAtMs: number; + readonly index: number; + readonly runIndex: number; +}): ProviderRuntimeEvent { + const eventThreadId = input.template.threadId ?? input.threadId; + const eventTurnId = + input.template.turnId === undefined + ? input.turnId + : input.runIndex === 1 + ? input.template.turnId + : TurnId.makeUnsafe( + namespacePerfFixtureId(String(input.template.turnId), input.threadId, input.runIndex), + ); + const createdAt = new Date(input.startedAtMs + (input.template.delayMs ?? 0)).toISOString(); + return { + type: input.template.type, + eventId: EventId.makeUnsafe( + `perf-runtime:${String(eventThreadId)}:${String(eventTurnId)}:${input.index.toString().padStart(4, "0")}`, + ), + provider: "codex", + threadId: eventThreadId, + turnId: eventTurnId, + createdAt, + ...(input.template.itemId + ? { + itemId: RuntimeItemId.makeUnsafe( + input.runIndex === 1 + ? input.template.itemId + : namespacePerfFixtureId(input.template.itemId, input.threadId, input.runIndex), + ), + } + : {}), + ...(input.template.requestId + ? { + requestId: RuntimeRequestId.makeUnsafe( + input.runIndex === 1 + ? input.template.requestId + : namespacePerfFixtureId(input.template.requestId, input.threadId, input.runIndex), + ), + } + : {}), + payload: input.template.payload, + } as ProviderRuntimeEvent; +} + +export const makePerfProviderAdapter = Effect.gen(function* () { + const runtimeEvents = yield* Queue.unbounded(); + const sessions = new Map(); + + const clearPendingTimers = (threadId: ThreadId) => + Effect.sync(() => { + const state = sessions.get(threadId); + if (!state) { + return; + } + for (const timer of state.pendingTimers) { + clearTimeout(timer); + } + state.pendingTimers.clear(); + }); + + const scheduleRuntimeEvent = (input: { + readonly state: PerfSessionState; + readonly event: ProviderRuntimeEvent; + readonly delayMs: number; + readonly onAfterEmit?: () => void; + }) => + Effect.sync(() => { + const timer = setTimeout(() => { + input.state.pendingTimers.delete(timer); + Effect.runFork( + Queue.offer(runtimeEvents, input.event).pipe( + Effect.tap(() => Effect.sync(() => input.onAfterEmit?.())), + Effect.asVoid, + ), + ); + }, input.delayMs); + input.state.pendingTimers.add(timer); + }); + + const startSession: ProviderAdapterShape["startSession"] = ( + input: ProviderSessionStartInput, + ) => + Effect.gen(function* () { + if (input.provider !== undefined && input.provider !== "codex") { + return yield* new ProviderAdapterValidationError({ + provider: "codex", + operation: "startSession", + issue: `Perf provider only supports codex sessions, received '${input.provider}'.`, + }); + } + + const createdAt = nowIso(); + const session: ProviderSession = { + provider: "codex", + status: "ready", + runtimeMode: input.runtimeMode, + threadId: input.threadId, + ...(input.cwd ? { cwd: input.cwd } : {}), + ...(input.modelSelection?.model ? { model: input.modelSelection.model } : {}), + resumeCursor: + input.resumeCursor ?? + RuntimeSessionId.makeUnsafe(`perf-resume:${String(input.threadId)}:${Date.now()}`), + createdAt, + updatedAt: createdAt, + }; + + sessions.set(input.threadId, { + session, + snapshot: { + threadId: input.threadId, + turns: [], + }, + turnCount: 0, + pendingTimers: new Set(), + }); + + return session; + }); + + const sendTurn: ProviderAdapterShape["sendTurn"] = ( + input: ProviderSendTurnInput, + ) => + Effect.gen(function* () { + const state = sessions.get(input.threadId); + if (!state) { + return yield* Effect.fail(sessionNotFound(input.threadId)); + } + + yield* clearPendingTimers(input.threadId); + + state.turnCount += 1; + const turnId = TurnId.makeUnsafe( + `perf-turn:${String(input.threadId)}:${state.turnCount.toString().padStart(4, "0")}`, + ); + const scenario = resolvePerfScenario(input.input); + const startedAtMs = Date.now(); + const sentAt = new Date(startedAtMs).toISOString(); + + state.session = { + ...state.session, + status: "running", + activeTurnId: turnId, + updatedAt: sentAt, + }; + + const userTurnItem = { + type: "userMessage", + content: [{ type: "text", text: input.input ?? "" }], + } as const; + const nextTurn: ProviderThreadTurnSnapshot = { + id: turnId, + items: [userTurnItem], + }; + state.snapshot = { + threadId: state.snapshot.threadId, + turns: [...state.snapshot.turns, nextTurn], + }; + + let assistantText = ""; + const updateAssistantSnapshot = (completedAt: string) => { + state.session = toIdleSession(state.session, completedAt); + state.snapshot = { + threadId: state.snapshot.threadId, + turns: state.snapshot.turns.map((turn) => + turn.id === turnId + ? { + ...turn, + items: + assistantText.length > 0 + ? [...turn.items, { type: "agentMessage", text: assistantText }] + : turn.items, + } + : turn, + ), + }; + }; + + yield* Effect.forEach( + scenario.events, + (template, index) => { + const event = buildRuntimeEvent({ + template, + threadId: input.threadId, + turnId, + startedAtMs, + index, + runIndex: state.turnCount, + }); + const delayMs = template.delayMs ?? 0; + return scheduleRuntimeEvent({ + state, + event, + delayMs, + onAfterEmit: () => { + if ( + event.threadId === input.threadId && + event.type === "content.delta" && + event.payload.streamKind === "assistant_text" + ) { + assistantText += event.payload.delta; + } + if (event.threadId === input.threadId && event.type === "turn.completed") { + updateAssistantSnapshot(event.createdAt); + } + }, + }); + }, + { concurrency: 1 }, + ); + + return { + threadId: input.threadId, + turnId, + } satisfies ProviderTurnStartResult; + }); + + const interruptTurn: ProviderAdapterShape["interruptTurn"] = ( + threadId, + turnId, + ) => + Effect.gen(function* () { + const state = sessions.get(threadId); + if (!state) { + return yield* Effect.fail(sessionNotFound(threadId)); + } + yield* clearPendingTimers(threadId); + const interruptedTurnId = turnId ?? state.session.activeTurnId; + state.session = toIdleSession(state.session, nowIso()); + if (interruptedTurnId) { + yield* Queue.offer(runtimeEvents, { + type: "turn.completed", + eventId: EventId.makeUnsafe( + `perf-runtime:${String(threadId)}:${String(interruptedTurnId)}:interrupted`, + ), + provider: "codex", + threadId, + turnId: interruptedTurnId, + createdAt: nowIso(), + payload: { + state: "interrupted", + }, + } satisfies ProviderRuntimeEvent); + } + }); + + const respondToRequest: ProviderAdapterShape["respondToRequest"] = ( + threadId, + _requestId: ApprovalRequestId, + _decision: ProviderApprovalDecision, + ) => (sessions.has(threadId) ? Effect.void : Effect.fail(sessionNotFound(threadId))); + + const respondToUserInput: ProviderAdapterShape["respondToUserInput"] = ( + threadId, + _requestId, + _answers: ProviderUserInputAnswers, + ) => (sessions.has(threadId) ? Effect.void : Effect.fail(sessionNotFound(threadId))); + + const stopSession: ProviderAdapterShape["stopSession"] = (threadId) => + Effect.gen(function* () { + if (!sessions.has(threadId)) { + return yield* Effect.fail(sessionNotFound(threadId)); + } + yield* clearPendingTimers(threadId); + sessions.delete(threadId); + }); + + const listSessions: ProviderAdapterShape["listSessions"] = () => + Effect.sync(() => Array.from(sessions.values(), (state) => state.session)); + + const hasSession: ProviderAdapterShape["hasSession"] = (threadId) => + Effect.succeed(sessions.has(threadId)); + + const readThread: ProviderAdapterShape["readThread"] = (threadId) => + sessions.has(threadId) + ? Effect.succeed(sessions.get(threadId)!.snapshot) + : Effect.fail(sessionNotFound(threadId)); + + const rollbackThread: ProviderAdapterShape["rollbackThread"] = ( + threadId, + numTurns, + ) => + Effect.gen(function* () { + const state = sessions.get(threadId); + if (!state) { + return yield* Effect.fail(sessionNotFound(threadId)); + } + if (!Number.isInteger(numTurns) || numTurns < 0 || numTurns > state.snapshot.turns.length) { + return yield* new ProviderAdapterValidationError({ + provider: "codex", + operation: "rollbackThread", + issue: "numTurns must be an integer between 0 and the current turn count.", + }); + } + state.snapshot = { + threadId: state.snapshot.threadId, + turns: state.snapshot.turns.slice(0, state.snapshot.turns.length - numTurns), + }; + state.turnCount = state.snapshot.turns.length; + return state.snapshot; + }); + + const stopAll: ProviderAdapterShape["stopAll"] = () => + Effect.gen(function* () { + yield* Effect.forEach( + Array.from(sessions.keys()), + (threadId) => clearPendingTimers(threadId), + { + concurrency: "unbounded", + }, + ); + sessions.clear(); + }); + + return { + provider: "codex", + capabilities: { + sessionModelSwitch: "in-session", + }, + startSession, + sendTurn, + interruptTurn, + respondToRequest, + respondToUserInput, + stopSession, + listSessions, + hasSession, + readThread, + rollbackThread, + stopAll, + get streamEvents() { + return Stream.fromQueue(runtimeEvents); + }, + } satisfies ProviderAdapterShape; +}); diff --git a/apps/server/src/perf/PerfProviderLayers.ts b/apps/server/src/perf/PerfProviderLayers.ts new file mode 100644 index 0000000000..d26f480e00 --- /dev/null +++ b/apps/server/src/perf/PerfProviderLayers.ts @@ -0,0 +1,29 @@ +import { Effect, Layer } from "effect"; + +import { ProviderSessionRuntimeRepositoryLive } from "../persistence/Layers/ProviderSessionRuntime.ts"; +import { ProviderUnsupportedError } from "../provider/Errors.ts"; +import { makeProviderServiceLive } from "../provider/Layers/ProviderService.ts"; +import { ProviderSessionDirectoryLive } from "../provider/Layers/ProviderSessionDirectory.ts"; +import { ProviderAdapterRegistry } from "../provider/Services/ProviderAdapterRegistry.ts"; +import { makePerfProviderAdapter } from "./PerfProviderAdapter.ts"; + +export const PerfProviderLayerLive = Layer.unwrap( + Effect.gen(function* () { + const providerSessionDirectoryLayer = ProviderSessionDirectoryLive.pipe( + Layer.provide(ProviderSessionRuntimeRepositoryLive), + ); + const adapter = yield* makePerfProviderAdapter; + const adapterRegistryLayer = Layer.succeed(ProviderAdapterRegistry, { + getByProvider: (provider) => + provider === adapter.provider + ? Effect.succeed(adapter) + : Effect.fail(new ProviderUnsupportedError({ provider })), + listProviders: () => Effect.succeed([adapter.provider]), + } as typeof ProviderAdapterRegistry.Service); + + return makeProviderServiceLive().pipe( + Layer.provide(adapterRegistryLayer), + Layer.provide(providerSessionDirectoryLayer), + ); + }), +); diff --git a/apps/server/src/perf/PerfProviderRegistry.ts b/apps/server/src/perf/PerfProviderRegistry.ts new file mode 100644 index 0000000000..9d0f4243d9 --- /dev/null +++ b/apps/server/src/perf/PerfProviderRegistry.ts @@ -0,0 +1,160 @@ +import { + DEFAULT_MODEL_BY_PROVIDER, + type ServerProvider, + type ServerProviderModel, +} from "@t3tools/contracts"; +import { Effect, Equal, Layer, PubSub, Ref, Stream } from "effect"; + +import { getClaudeModelCapabilities } from "../provider/Layers/ClaudeProvider.ts"; +import { getCodexModelCapabilities } from "../provider/Layers/CodexProvider.ts"; +import { ServerSettingsService } from "../serverSettings.ts"; +import { + ProviderRegistry, + type ProviderRegistryShape, +} from "../provider/Services/ProviderRegistry.ts"; + +const makeProviderModel = (input: { + readonly slug: string; + readonly name: string; + readonly capabilities: ServerProviderModel["capabilities"]; +}): ServerProviderModel => ({ + slug: input.slug, + name: input.name, + isCustom: false, + capabilities: input.capabilities, +}); + +const CODEX_MODELS: ReadonlyArray = [ + makeProviderModel({ + slug: DEFAULT_MODEL_BY_PROVIDER.codex, + name: "GPT-5.4", + capabilities: getCodexModelCapabilities(DEFAULT_MODEL_BY_PROVIDER.codex), + }), +]; + +const CLAUDE_MODELS: ReadonlyArray = [ + makeProviderModel({ + slug: DEFAULT_MODEL_BY_PROVIDER.claudeAgent, + name: "Claude Sonnet 4.6", + capabilities: getClaudeModelCapabilities(DEFAULT_MODEL_BY_PROVIDER.claudeAgent), + }), +]; + +const makeProviderSnapshot = (input: { + readonly provider: ServerProvider["provider"]; + readonly enabled: boolean; + readonly checkedAt: string; +}): ServerProvider => { + if (input.provider === "codex") { + return { + provider: "codex", + enabled: input.enabled, + installed: true, + version: "perf-fixture", + status: input.enabled ? "ready" : "disabled", + auth: input.enabled + ? { + status: "authenticated", + type: "perf", + label: "Local perf harness", + } + : { + status: "unknown", + }, + checkedAt: input.checkedAt, + message: input.enabled ? "Perf fixture provider active." : "Disabled in T3 Code settings.", + models: CODEX_MODELS, + }; + } + + return { + provider: "claudeAgent", + enabled: input.enabled, + installed: false, + version: null, + status: input.enabled ? "warning" : "disabled", + auth: { + status: "unknown", + }, + checkedAt: input.checkedAt, + message: input.enabled + ? "Perf harness only stubs Codex runtime sessions." + : "Disabled in T3 Code settings.", + models: CLAUDE_MODELS, + }; +}; + +const loadPerfProviderSnapshots = Effect.fn("loadPerfProviderSnapshots")(function* (input: { + readonly serverSettings: ServerSettingsService["Service"]; +}) { + const settings = yield* input.serverSettings.getSettings; + const checkedAt = new Date().toISOString(); + return [ + makeProviderSnapshot({ + provider: "codex", + enabled: settings.providers.codex.enabled, + checkedAt, + }), + makeProviderSnapshot({ + provider: "claudeAgent", + enabled: settings.providers.claudeAgent.enabled, + checkedAt, + }), + ] as const; +}); + +const loadPerfProviderSnapshotsSafely = ( + serverSettings: ServerSettingsService["Service"], + fallback: ReadonlyArray, +) => + loadPerfProviderSnapshots({ serverSettings }).pipe( + Effect.tapError(Effect.logError), + Effect.orElseSucceed(() => fallback), + ); + +export const PerfProviderRegistryLive = Layer.effect( + ProviderRegistry, + Effect.gen(function* () { + const serverSettings = yield* ServerSettingsService; + const changesPubSub = yield* Effect.acquireRelease( + PubSub.unbounded>(), + PubSub.shutdown, + ); + const providersRef = yield* Ref.make>( + yield* loadPerfProviderSnapshotsSafely(serverSettings, [ + makeProviderSnapshot({ + provider: "codex", + enabled: true, + checkedAt: new Date().toISOString(), + }), + makeProviderSnapshot({ + provider: "claudeAgent", + enabled: false, + checkedAt: new Date().toISOString(), + }), + ]), + ); + + const refreshProviders = Effect.fn("refreshPerfProviders")(function* () { + const previous = yield* Ref.get(providersRef); + const next = yield* loadPerfProviderSnapshotsSafely(serverSettings, previous); + yield* Ref.set(providersRef, next); + if (!Equal.equals(previous, next)) { + yield* PubSub.publish(changesPubSub, next); + } + return next; + }); + + yield* Stream.runForEach(serverSettings.streamChanges, () => refreshProviders()).pipe( + Effect.forkScoped, + ); + + return { + getProviders: Ref.get(providersRef), + refresh: (_provider) => refreshProviders(), + get streamChanges() { + return Stream.fromPubSub(changesPubSub); + }, + } satisfies ProviderRegistryShape; + }), +); diff --git a/apps/server/src/perf/config.ts b/apps/server/src/perf/config.ts new file mode 100644 index 0000000000..2759fecf54 --- /dev/null +++ b/apps/server/src/perf/config.ts @@ -0,0 +1,18 @@ +import type { PerfProviderScenarioId } from "@t3tools/shared/perf/scenarioCatalog"; + +export const PERF_PROVIDER_ENV = "T3CODE_PERF_PROVIDER"; +export const PERF_SCENARIO_ENV = "T3CODE_PERF_SCENARIO"; +export const PERF_ARTIFACT_DIR_ENV = "T3CODE_PERF_ARTIFACT_DIR"; +export const PERF_HEADFUL_ENV = "T3CODE_PERF_HEADFUL"; + +export function isPerfProviderEnabled(): boolean { + return process.env[PERF_PROVIDER_ENV] === "1"; +} + +export function getPerfProviderScenarioId(): PerfProviderScenarioId | null { + const rawScenarioId = process.env[PERF_SCENARIO_ENV]?.trim(); + if (rawScenarioId === "dense_assistant_stream" || rawScenarioId === "parallel_assistant_stream") { + return rawScenarioId; + } + return null; +} diff --git a/apps/server/src/provider/Layers/ClaudeAdapter.ts b/apps/server/src/provider/Layers/ClaudeAdapter.ts index d99e2ad203..9f2eeb014e 100644 --- a/apps/server/src/provider/Layers/ClaudeAdapter.ts +++ b/apps/server/src/provider/Layers/ClaudeAdapter.ts @@ -3054,7 +3054,9 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* ( listSessions, hasSession, stopAll, - streamEvents: Stream.fromQueue(runtimeEventQueue), + get streamEvents() { + return Stream.fromQueue(runtimeEventQueue); + }, } satisfies ClaudeAdapterShape; }); diff --git a/apps/server/src/provider/Layers/CodexAdapter.ts b/apps/server/src/provider/Layers/CodexAdapter.ts index cee6bca6ed..8b9f3b59e7 100644 --- a/apps/server/src/provider/Layers/CodexAdapter.ts +++ b/apps/server/src/provider/Layers/CodexAdapter.ts @@ -1631,7 +1631,9 @@ const makeCodexAdapter = Effect.fn("makeCodexAdapter")(function* ( listSessions, hasSession, stopAll, - streamEvents: Stream.fromQueue(runtimeEventQueue), + get streamEvents() { + return Stream.fromQueue(runtimeEventQueue); + }, } satisfies CodexAdapterShape; }); diff --git a/apps/server/src/provider/Layers/ProviderRegistry.test.ts b/apps/server/src/provider/Layers/ProviderRegistry.test.ts index 116c008d67..ca27371b61 100644 --- a/apps/server/src/provider/Layers/ProviderRegistry.test.ts +++ b/apps/server/src/provider/Layers/ProviderRegistry.test.ts @@ -115,7 +115,9 @@ function makeMutableServerSettingsService( yield* PubSub.publish(changes, next); return next; }), - streamChanges: Stream.fromPubSub(changes), + get streamChanges() { + return Stream.fromPubSub(changes); + }, } satisfies ServerSettingsShape; }); } diff --git a/apps/server/src/provider/Layers/ProviderService.test.ts b/apps/server/src/provider/Layers/ProviderService.test.ts index 651a611649..115d8f8750 100644 --- a/apps/server/src/provider/Layers/ProviderService.test.ts +++ b/apps/server/src/provider/Layers/ProviderService.test.ts @@ -191,7 +191,9 @@ function makeFakeCodexAdapter(provider: ProviderKind = "codex") { readThread, rollbackThread, stopAll, - streamEvents: Stream.fromPubSub(runtimeEventPubSub), + get streamEvents() { + return Stream.fromPubSub(runtimeEventPubSub); + }, }; const emit = (event: LegacyProviderRuntimeEvent): void => { diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index 40a8eb09bc..aafa0b5e4d 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -42,6 +42,9 @@ import { ProjectFaviconResolverLive } from "./project/Layers/ProjectFaviconResol import { WorkspaceEntriesLive } from "./workspace/Layers/WorkspaceEntries"; import { WorkspaceFileSystemLive } from "./workspace/Layers/WorkspaceFileSystem"; import { WorkspacePathsLive } from "./workspace/Layers/WorkspacePaths"; +import { isPerfProviderEnabled } from "./perf/config"; +import { PerfProviderRegistryLive } from "./perf/PerfProviderRegistry"; +import { PerfProviderLayerLive } from "./perf/PerfProviderLayers"; const PtyAdapterLive = Layer.unwrap( Effect.gen(function* () { @@ -112,7 +115,7 @@ const CheckpointingLayerLive = Layer.empty.pipe( Layer.provideMerge(CheckpointStoreLive), ); -const ProviderLayerLive = Layer.unwrap( +const DefaultProviderLayerLive = Layer.unwrap( Effect.gen(function* () { const { providerEventLogPath } = yield* ServerConfig; const nativeEventLogger = yield* makeEventNdjsonLogger(providerEventLogPath, { @@ -141,6 +144,14 @@ const ProviderLayerLive = Layer.unwrap( }), ); +const SelectedProviderLayerLive = isPerfProviderEnabled() + ? PerfProviderLayerLive + : DefaultProviderLayerLive; + +const SelectedProviderRegistryLayerLive = isPerfProviderEnabled() + ? PerfProviderRegistryLive + : ProviderRegistryLive; + const PersistenceLayerLive = Layer.empty.pipe(Layer.provideMerge(SqlitePersistenceLayerLive)); const GitLayerLive = Layer.empty.pipe( @@ -172,12 +183,12 @@ const RuntimeServicesLive = Layer.empty.pipe( // Core Services Layer.provideMerge(CheckpointingLayerLive), Layer.provideMerge(OrchestrationLayerLive), - Layer.provideMerge(ProviderLayerLive), + Layer.provideMerge(SelectedProviderLayerLive), Layer.provideMerge(GitLayerLive), Layer.provideMerge(TerminalLayerLive), Layer.provideMerge(PersistenceLayerLive), Layer.provideMerge(KeybindingsLive), - Layer.provideMerge(ProviderRegistryLive), + Layer.provideMerge(SelectedProviderRegistryLayerLive), Layer.provideMerge(ServerSettingsLive), Layer.provideMerge(WorkspaceLayerLive), Layer.provideMerge(ProjectFaviconResolverLive), diff --git a/apps/web/package.json b/apps/web/package.json index 499943c3f0..7d563371af 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -11,7 +11,9 @@ "typecheck": "tsc --noEmit", "test": "vitest run --passWithNoTests", "test:browser": "vitest run --config vitest.browser.config.ts", - "test:browser:install": "playwright install --with-deps chromium" + "test:browser:install": "playwright install --with-deps chromium", + "test:perf": "vitest run --config vitest.perf.config.ts", + "test:perf:install": "playwright install --with-deps chromium" }, "dependencies": { "@base-ui/react": "^1.2.0", diff --git a/apps/web/src/components/ChatView.tsx b/apps/web/src/components/ChatView.tsx index 76133712d4..499371d337 100644 --- a/apps/web/src/components/ChatView.tsx +++ b/apps/web/src/components/ChatView.tsx @@ -3750,7 +3750,10 @@ export default function ChatView({ threadId }: ChatViewProps) { } return ( -
+
{/* Top bar */}
; + readonly threadSummaries: ReadonlyArray; +} + +interface StartPerfAppHarnessOptions { + readonly suite: string; + readonly seedScenarioId: PerfSeedScenarioId; + readonly providerScenarioId?: PerfProviderScenarioId; + readonly serverSampler?: PerfServerSampler; +} + +interface FinishPerfRunOptions { + readonly suite: string; + readonly scenarioId: string; + readonly thresholds: PerfThresholdProfile; + readonly metadata?: Record; + readonly actionSummary?: { + readonly threadSwitchActionPrefix?: string; + readonly burstActionName?: string; + }; + readonly artifactBasename?: string; +} + +export interface PerfAppHarness { + readonly seededState: PerfSeededState; + readonly page: Page; + readonly url: string; + readonly artifactDir: string; + readonly startAction: (name: string) => Promise; + readonly endAction: (name: string) => Promise; + readonly resetBrowserMetrics: () => Promise; + readonly sampleMountedRows: (label: string) => Promise; + readonly snapshotBrowserMetrics: () => Promise; + readonly finishRun: (options: FinishPerfRunOptions) => Promise<{ + readonly artifactPath: string; + readonly artifact: PerfRunArtifact; + readonly browserMetrics: BrowserPerfMetrics; + readonly serverMetrics: ReadonlyArray | null; + }>; +} + +async function pickFreePort(): Promise { + return await new Promise((resolvePort, reject) => { + const server = createServer(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + reject(new Error("Unable to resolve a free localhost port.")); + return; + } + const { port } = address; + server.close((closeError) => { + if (closeError) { + reject(closeError); + return; + } + resolvePort(port); + }); + }); + }); +} + +async function waitForServerReady(url: string, process: ChildProcess): Promise { + const startedAt = Date.now(); + const timeoutMs = 45_000; + const requestTimeoutMs = 1_000; + + while (Date.now() - startedAt < timeoutMs) { + if (process.exitCode !== null) { + throw new Error(`Perf server exited early with code ${process.exitCode}.`); + } + try { + const response = await fetch(url, { + redirect: "manual", + signal: AbortSignal.timeout(requestTimeoutMs), + }); + if (response.ok) { + return; + } + } catch { + // Ignore connection races while the server is still starting. + } + await new Promise((resolveDelay) => setTimeout(resolveDelay, 200)); + } + + throw new Error(`Timed out waiting for perf server readiness at ${url}.`); +} + +async function verifyBuiltArtifacts(): Promise { + await Promise.all([access(serverBinPath), access(serverClientIndexPath)]).catch(() => { + throw new Error( + `Built perf artifacts are missing. Expected ${serverBinPath} and ${serverClientIndexPath}. Run bun run test:perf:web or build the app first.`, + ); + }); +} + +async function stopChildProcess(process: ChildProcess): Promise { + if (process.exitCode !== null) { + return; + } + + process.kill("SIGTERM"); + const exited = await new Promise((resolveExited) => { + const timer = setTimeout(() => resolveExited(false), 5_000); + process.once("exit", () => { + clearTimeout(timer); + resolveExited(true); + }); + }); + + if (!exited && process.exitCode === null) { + process.kill("SIGKILL"); + await new Promise((resolveExited) => { + process.once("exit", () => resolveExited()); + }); + } +} + +async function ensureArtifactDir(suite: string, scenarioId: string): Promise { + const baseArtifactDir = resolve( + process.env[PERF_ARTIFACT_DIR_ENV] ?? join(repoRoot, "artifacts/perf"), + ); + const runId = `${suite}-${scenarioId}-${Date.now().toString()}`; + const artifactDir = join(baseArtifactDir, runId); + await mkdir(artifactDir, { recursive: true }); + return artifactDir; +} + +async function cleanupPerfRunDir(runParentDir: string): Promise { + await rm(runParentDir, { recursive: true, force: true }); +} + +async function writeServerLogs( + artifactDir: string, + stdout: string, + stderr: string, + basename: string, +): Promise { + await mkdir(artifactDir, { recursive: true }); + await Promise.all([ + writeFile(join(artifactDir, `${basename}.server.stdout.log`), stdout, "utf8"), + writeFile(join(artifactDir, `${basename}.server.stderr.log`), stderr, "utf8"), + ]); +} + +async function invokeBrowserCollector( + page: Page, + fn: (collectorName: string, ...args: ReadonlyArray) => T, + ...args: ReadonlyArray +): Promise { + return await page.evaluate( + ({ collectorName, args: serializedArgs, fnSource }) => { + const runtimeFn = new Function( + "collectorName", + "args", + `return (${fnSource})(collectorName, ...args);`, + ) as (collectorName: string, args: ReadonlyArray) => T; + return runtimeFn(collectorName, serializedArgs); + }, + { + collectorName: PERF_BROWSER_GLOBAL, + args, + fnSource: fn.toString(), + }, + ); +} + +function parsePerfSeededState(stdout: string): PerfSeededState { + const startIndex = stdout.lastIndexOf(PERF_SEED_JSON_START); + const endIndex = stdout.lastIndexOf(PERF_SEED_JSON_END); + + if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) { + const payload = stdout.slice(startIndex + PERF_SEED_JSON_START.length, endIndex).trim(); + return JSON.parse(payload) as PerfSeededState; + } + + return JSON.parse(stdout) as PerfSeededState; +} + +export async function startPerfAppHarness( + options: StartPerfAppHarnessOptions, +): Promise { + await verifyBuiltArtifacts(); + + const seededState = await (async () => { + const seedProcess = spawn( + "bun", + ["run", "apps/server/scripts/seedPerfState.ts", options.seedScenarioId], + { + cwd: repoRoot, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + let stdout = ""; + let stderr = ""; + seedProcess.stdout?.on("data", (chunk) => { + stdout += chunk.toString(); + }); + seedProcess.stderr?.on("data", (chunk) => { + stderr += chunk.toString(); + }); + const [exitCode] = (await once(seedProcess, "exit")) as [number | null]; + if (exitCode !== 0) { + throw new Error(`Perf seed command failed with code ${exitCode ?? "unknown"}.\n${stderr}`); + } + return parsePerfSeededState(stdout); + })(); + const artifactDir = await ensureArtifactDir(options.suite, options.seedScenarioId); + const port = await pickFreePort(); + const url = `http://127.0.0.1:${port}/`; + const env = buildPerfServerEnv(process.env, options.providerScenarioId); + + let stdoutBuffer = ""; + let stderrBuffer = ""; + const serverProcess = spawn( + process.execPath, + [ + serverBinPath, + "--mode", + "web", + "--host", + "127.0.0.1", + "--port", + `${port}`, + "--base-dir", + seededState.baseDir, + "--no-browser", + ], + { + cwd: repoRoot, + env, + stdio: ["ignore", "pipe", "pipe"], + }, + ); + serverProcess.stdout?.on("data", (chunk) => { + stdoutBuffer += chunk.toString(); + }); + serverProcess.stderr?.on("data", (chunk) => { + stderrBuffer += chunk.toString(); + }); + let browser: Browser | null = null; + let context: BrowserContext | null = null; + let page: Page | null = null; + + try { + await waitForServerReady(url, serverProcess); + + browser = await chromium.launch({ + headless: process.env[PERF_HEADFUL_ENV] !== "1", + }); + context = await browser.newContext({ + viewport: { width: 1440, height: 960 }, + }); + await context.addInitScript(installBrowserPerfCollector, "[data-timeline-row-kind]"); + page = await context.newPage(); + await page.goto(url, { waitUntil: "domcontentloaded" }); + + const firstProjectTitle = + seededState.projectSummaries[0]?.title ?? + seededState.projectTitle ?? + getPerfSeedScenario(options.seedScenarioId).projects[0]?.title; + if (!firstProjectTitle) { + throw new Error(`Seed scenario '${options.seedScenarioId}' produced no projects.`); + } + await page.getByText(firstProjectTitle, { exact: true }).first().waitFor({ timeout: 45_000 }); + + const sampler = options.serverSampler ?? new NoopServerSampler(); + if (serverProcess.pid) { + await sampler.start({ pid: serverProcess.pid }); + } + const runStartedAt = new Date().toISOString(); + const readyPage = page; + if (!readyPage) { + throw new Error("Perf app harness did not initialize a browser page."); + } + + let finishPromise: + | Promise<{ + readonly artifactPath: string; + readonly artifact: PerfRunArtifact; + readonly browserMetrics: BrowserPerfMetrics; + readonly serverMetrics: ReadonlyArray | null; + }> + | undefined; + + const teardown = async () => { + await Promise.allSettled([ + context ? context.close() : Promise.resolve(), + browser ? browser.close() : Promise.resolve(), + ]); + await stopChildProcess(serverProcess); + await cleanupPerfRunDir(seededState.runParentDir); + }; + + return { + seededState, + page: readyPage, + url, + artifactDir, + startAction: (name) => + invokeBrowserCollector( + readyPage, + (collectorName, actionName) => { + (window as Window & Record)[collectorName]?.startAction( + actionName as string, + ); + }, + name, + ), + endAction: (name) => + invokeBrowserCollector( + readyPage, + (collectorName, actionName) => { + return ( + (window as Window & Record)[collectorName]?.endAction( + actionName as string, + ) ?? null + ); + }, + name, + ), + resetBrowserMetrics: () => + invokeBrowserCollector(readyPage, (collectorName) => { + (window as Window & Record)[collectorName]?.reset(); + }), + sampleMountedRows: (label) => + invokeBrowserCollector( + readyPage, + (collectorName, sampleLabel) => { + return ( + (window as Window & Record)[collectorName]?.sampleMountedRows( + sampleLabel as string, + ) ?? 0 + ); + }, + label, + ), + snapshotBrowserMetrics: () => + invokeBrowserCollector(readyPage, (collectorName) => { + return ((window as Window & Record)[collectorName]?.snapshot() ?? { + actions: [], + longTasks: [], + rafGapsMs: [], + mountedRowSamples: [], + }) as BrowserPerfMetrics; + }), + finishRun: async (finishOptions) => { + if (finishPromise) { + return await finishPromise; + } + + finishPromise = (async () => { + const completedAt = new Date().toISOString(); + const browserMetrics: BrowserPerfMetrics = await (async () => { + try { + return await invokeBrowserCollector(readyPage, (collectorName) => { + return ((window as Window & Record)[collectorName]?.snapshot() ?? { + actions: [], + longTasks: [], + rafGapsMs: [], + mountedRowSamples: [], + }) as BrowserPerfMetrics; + }); + } catch { + return { + actions: [], + longTasks: [], + rafGapsMs: [], + mountedRowSamples: [], + } satisfies BrowserPerfMetrics; + } + })(); + const serverMetrics = await sampler.stop(); + await teardown(); + + const basename = + finishOptions.artifactBasename ?? `${finishOptions.suite}-${finishOptions.scenarioId}`; + await writeServerLogs(artifactDir, stdoutBuffer, stderrBuffer, basename); + const artifact: PerfRunArtifact = { + suite: finishOptions.suite, + scenarioId: finishOptions.scenarioId, + startedAt: runStartedAt, + completedAt, + thresholds: finishOptions.thresholds, + summary: summarizeBrowserPerfMetrics(browserMetrics, finishOptions.actionSummary), + browserMetrics, + serverMetrics, + ...(finishOptions.metadata ? { metadata: finishOptions.metadata } : {}), + }; + const artifactPath = join(artifactDir, `${basename}.json`); + await writePerfArtifact(artifactPath, artifact); + + return { + artifactPath, + artifact, + browserMetrics, + serverMetrics, + }; + })(); + + return await finishPromise; + }, + }; + } catch (error) { + await Promise.allSettled([ + context ? context.close() : Promise.resolve(), + browser ? browser.close() : Promise.resolve(), + ]); + await stopChildProcess(serverProcess); + await cleanupPerfRunDir(seededState.runParentDir); + throw error; + } +} diff --git a/apps/web/test/perf/pagePerfHelpers.ts b/apps/web/test/perf/pagePerfHelpers.ts new file mode 100644 index 0000000000..1a51126d3b --- /dev/null +++ b/apps/web/test/perf/pagePerfHelpers.ts @@ -0,0 +1,103 @@ +import type { Page } from "playwright"; + +import type { PerfAppHarness } from "./appHarness"; + +export async function ensureThreadRowVisible( + page: Page, + projectTitle: string, + threadId: string, +): Promise { + const threadRow = page.getByTestId(`thread-row-${threadId}`); + if (await threadRow.isVisible().catch(() => false)) { + return; + } + + const projectToggle = page.getByText(projectTitle, { exact: true }).first(); + await projectToggle.click(); + await threadRow.waitFor({ state: "visible", timeout: 20_000 }); +} + +export async function waitForThreadRoute( + page: Page, + input: { + readonly threadId: string; + readonly messageId?: string; + readonly extraSelector?: string; + }, +): Promise { + const path = `/${encodeURIComponent(input.threadId)}`; + const threadSelector = `[data-testid="chat-thread-${input.threadId}"]`; + await page.waitForFunction( + ({ expectedPath, threadSelector, messageSelector, extraSelector }) => { + const pathMatches = window.location.pathname === expectedPath; + if (!pathMatches) { + return false; + } + if (!document.querySelector(threadSelector)) { + return false; + } + + if (messageSelector && !document.querySelector(messageSelector)) { + return false; + } + if (extraSelector && !document.querySelector(extraSelector)) { + return false; + } + return true; + }, + { + expectedPath: path, + threadSelector, + messageSelector: input.messageId ? `[data-message-id="${input.messageId}"]` : null, + extraSelector: input.extraSelector ?? null, + }, + { timeout: 45_000 }, + ); +} + +export async function measureThreadSwitch( + harness: PerfAppHarness, + input: { + readonly actionName: string; + readonly projectTitle: string; + readonly threadId: string; + readonly messageId?: string; + readonly extraSelector?: string; + }, +): Promise { + await ensureThreadRowVisible(harness.page, input.projectTitle, input.threadId); + await harness.startAction(input.actionName); + await harness.page.getByTestId(`thread-row-${input.threadId}`).click(); + await waitForThreadRoute(harness.page, { + threadId: input.threadId, + ...(input.messageId ? { messageId: input.messageId } : {}), + ...(input.extraSelector ? { extraSelector: input.extraSelector } : {}), + }); + return await harness.endAction(input.actionName); +} + +export async function scrollTimelineTo(page: Page, position: "top" | "bottom"): Promise { + await page.evaluate(async (targetPosition) => { + const timelineRoot = document.querySelector('[data-timeline-root="true"]'); + const scrollContainer = timelineRoot?.parentElement; + if (!scrollContainer) { + throw new Error("Messages scroll container not found."); + } + + scrollContainer.scrollTo({ + top: targetPosition === "bottom" ? scrollContainer.scrollHeight : 0, + behavior: "auto", + }); + + await new Promise((resolve) => { + requestAnimationFrame(() => requestAnimationFrame(() => resolve())); + }); + }, position); +} + +export async function typeIntoComposerAndSend(page: Page, message: string): Promise { + const editor = page.getByTestId("composer-editor"); + await editor.click(); + await page.keyboard.type(message); + await page.getByRole("button", { name: "Send message" }).click(); +} diff --git a/apps/web/test/perf/serverEnv.ts b/apps/web/test/perf/serverEnv.ts new file mode 100644 index 0000000000..b6799cb94e --- /dev/null +++ b/apps/web/test/perf/serverEnv.ts @@ -0,0 +1,27 @@ +import type { PerfProviderScenarioId } from "@t3tools/shared/perf/scenarioCatalog"; + +export const PERF_PROVIDER_ENV = "T3CODE_PERF_PROVIDER"; +export const PERF_SCENARIO_ENV = "T3CODE_PERF_SCENARIO"; +const AUTO_BOOTSTRAP_PROJECT_ENV = "T3CODE_AUTO_BOOTSTRAP_PROJECT_FROM_CWD"; + +export function buildPerfServerEnv( + baseEnv: NodeJS.ProcessEnv, + providerScenarioId?: PerfProviderScenarioId, +): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { + ...baseEnv, + [AUTO_BOOTSTRAP_PROJECT_ENV]: "false", + }; + + if (!providerScenarioId) { + delete env[PERF_PROVIDER_ENV]; + delete env[PERF_SCENARIO_ENV]; + return env; + } + + return { + ...env, + [PERF_PROVIDER_ENV]: "1", + [PERF_SCENARIO_ENV]: providerScenarioId, + }; +} diff --git a/apps/web/test/perf/supportHelpers.test.ts b/apps/web/test/perf/supportHelpers.test.ts new file mode 100644 index 0000000000..00b574d601 --- /dev/null +++ b/apps/web/test/perf/supportHelpers.test.ts @@ -0,0 +1,76 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +import { percentile } from "../../../../test/perf/support/artifact"; +import { installBrowserPerfCollector } from "../../../../test/perf/support/browserMetrics"; +import { buildPerfServerEnv, PERF_PROVIDER_ENV, PERF_SCENARIO_ENV } from "./serverEnv"; + +describe("percentile", () => { + it("returns the minimum value for the zero percentile", () => { + expect(percentile([9, 3, 6], 0)).toBe(3); + }); +}); + +describe("installBrowserPerfCollector", () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it("cancels the previous animation frame loop before reset starts a new one", () => { + let nextHandle = 1; + const callbacks = new Map(); + const requestAnimationFrame = vi.fn((callback: FrameRequestCallback) => { + const handle = nextHandle++; + callbacks.set(handle, callback); + return handle; + }); + const cancelAnimationFrame = vi.fn((handle: number) => { + callbacks.delete(handle); + }); + + vi.stubGlobal("window", { + requestAnimationFrame, + cancelAnimationFrame, + } as unknown as Window & typeof globalThis); + vi.stubGlobal("document", { + querySelectorAll: vi.fn(() => []), + } as unknown as Document); + vi.stubGlobal("PerformanceObserver", undefined); + + installBrowserPerfCollector(); + + const collector = window.__t3PerfCollector; + expect(collector).toBeDefined(); + expect(requestAnimationFrame).toHaveBeenCalledTimes(1); + + collector?.reset(); + expect(cancelAnimationFrame).toHaveBeenCalledWith(1); + expect(requestAnimationFrame).toHaveBeenCalledTimes(2); + + collector?.reset(); + expect(cancelAnimationFrame).toHaveBeenLastCalledWith(2); + expect(requestAnimationFrame).toHaveBeenCalledTimes(3); + }); +}); + +describe("buildPerfServerEnv", () => { + it("does not enable the perf provider when no live provider scenario is requested", () => { + const env = buildPerfServerEnv({ + [PERF_PROVIDER_ENV]: "1", + [PERF_SCENARIO_ENV]: "dense_assistant_stream", + KEEP_ME: "yes", + }); + + expect(env.T3CODE_AUTO_BOOTSTRAP_PROJECT_FROM_CWD).toBe("false"); + expect(env[PERF_PROVIDER_ENV]).toBeUndefined(); + expect(env[PERF_SCENARIO_ENV]).toBeUndefined(); + expect(env.KEEP_ME).toBe("yes"); + }); + + it("enables the perf provider only when a live provider scenario is requested", () => { + const env = buildPerfServerEnv({}, "dense_assistant_stream"); + + expect(env.T3CODE_AUTO_BOOTSTRAP_PROJECT_FROM_CWD).toBe("false"); + expect(env[PERF_PROVIDER_ENV]).toBe("1"); + expect(env[PERF_SCENARIO_ENV]).toBe("dense_assistant_stream"); + }); +}); diff --git a/apps/web/test/perf/virtualization.perf.test.ts b/apps/web/test/perf/virtualization.perf.test.ts new file mode 100644 index 0000000000..34b3e8fd88 --- /dev/null +++ b/apps/web/test/perf/virtualization.perf.test.ts @@ -0,0 +1,181 @@ +import { expect, test } from "vitest"; + +import { summarizeBrowserPerfMetrics } from "../../../../test/perf/support/artifact"; +import { PERF_CATALOG_IDS } from "@t3tools/shared/perf/scenarioCatalog"; +import { PERF_THRESHOLDS } from "../../../../test/perf/support/thresholds"; +import { startPerfAppHarness, type PerfAppHarness } from "./appHarness"; +import { + ensureThreadRowVisible, + measureThreadSwitch, + scrollTimelineTo, + waitForThreadRoute, +} from "./pagePerfHelpers"; + +test("virtualization stays bounded and heavy thread switches remain snappy", async () => { + const thresholds = PERF_THRESHOLDS.local; + let harness: PerfAppHarness | null = null; + let finished = false; + + try { + harness = await startPerfAppHarness({ + suite: "virtualization", + seedScenarioId: "large_threads", + }); + + const heavyAThreadSummary = harness.seededState.threadSummaries.find( + (thread) => thread.id === PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + ); + const heavyBThreadSummary = harness.seededState.threadSummaries.find( + (thread) => thread.id === PERF_CATALOG_IDS.largeThreads.heavyBThreadId, + ); + const heavyAProjectTitle = + heavyAThreadSummary?.projectTitle ?? PERF_CATALOG_IDS.largeThreads.heavyAProjectTitle; + const heavyBProjectTitle = + heavyBThreadSummary?.projectTitle ?? PERF_CATALOG_IDS.largeThreads.heavyBProjectTitle; + const heavyThreadMessageCount = heavyAThreadSummary?.messageCount ?? 0; + expect(heavyAThreadSummary).toBeDefined(); + expect(heavyBThreadSummary).toBeDefined(); + expect(heavyThreadMessageCount).toBeGreaterThanOrEqual(2_000); + expect(heavyAThreadSummary?.turnCount ?? 0).toBeLessThan(100); + expect(heavyBThreadSummary?.turnCount ?? 0).toBeLessThan(100); + + await ensureThreadRowVisible( + harness.page, + heavyAProjectTitle, + PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + ); + await harness.page + .getByTestId(`thread-row-${PERF_CATALOG_IDS.largeThreads.heavyAThreadId}`) + .click(); + await waitForThreadRoute(harness.page, { + threadId: PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyATerminalMessageId, + }); + await scrollTimelineTo(harness.page, "bottom"); + + await ensureThreadRowVisible( + harness.page, + heavyBProjectTitle, + PERF_CATALOG_IDS.largeThreads.heavyBThreadId, + ); + await harness.page + .getByTestId(`thread-row-${PERF_CATALOG_IDS.largeThreads.heavyBThreadId}`) + .click(); + await waitForThreadRoute(harness.page, { + threadId: PERF_CATALOG_IDS.largeThreads.heavyBThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyBTerminalMessageId, + }); + await scrollTimelineTo(harness.page, "bottom"); + + await harness.resetBrowserMetrics(); + + await measureThreadSwitch(harness, { + actionName: "thread-switch-warmup-a", + projectTitle: heavyAProjectTitle, + threadId: PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyATerminalMessageId, + }); + + const mountedRows = await harness.sampleMountedRows("heavy-a-open"); + expect(mountedRows).toBeLessThanOrEqual(thresholds.maxMountedTimelineRows); + + const measuredTargets = [ + { + actionName: "thread-switch-1", + threadId: PERF_CATALOG_IDS.largeThreads.heavyBThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyBTerminalMessageId, + }, + { + actionName: "thread-switch-2", + threadId: PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyATerminalMessageId, + }, + { + actionName: "thread-switch-3", + threadId: PERF_CATALOG_IDS.largeThreads.heavyBThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyBTerminalMessageId, + }, + { + actionName: "thread-switch-4", + threadId: PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyATerminalMessageId, + }, + { + actionName: "thread-switch-5", + threadId: PERF_CATALOG_IDS.largeThreads.heavyBThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyBTerminalMessageId, + }, + { + actionName: "thread-switch-6", + threadId: PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + messageId: PERF_CATALOG_IDS.largeThreads.heavyATerminalMessageId, + }, + ] as const; + + for (const target of measuredTargets) { + await measureThreadSwitch(harness, { + actionName: target.actionName, + projectTitle: + target.threadId === PERF_CATALOG_IDS.largeThreads.heavyAThreadId + ? heavyAProjectTitle + : heavyBProjectTitle, + threadId: target.threadId, + messageId: target.messageId, + }); + await harness.sampleMountedRows(`${target.actionName}-rows`); + } + + await scrollTimelineTo(harness.page, "bottom"); + await harness.sampleMountedRows("scroll-start"); + await scrollTimelineTo(harness.page, "top"); + await harness.sampleMountedRows("scroll-top"); + await scrollTimelineTo(harness.page, "bottom"); + await harness.sampleMountedRows("scroll-bottom"); + + const browserMetrics = await harness.snapshotBrowserMetrics(); + const summary = summarizeBrowserPerfMetrics(browserMetrics, { + threadSwitchActionPrefix: "thread-switch", + }); + + expect(summary.threadSwitchP50Ms).not.toBeNull(); + expect(summary.threadSwitchP95Ms).not.toBeNull(); + expect(summary.threadSwitchP50Ms ?? Number.POSITIVE_INFINITY).toBeLessThanOrEqual( + thresholds.threadSwitchP50Ms, + ); + expect(summary.threadSwitchP95Ms ?? Number.POSITIVE_INFINITY).toBeLessThanOrEqual( + thresholds.threadSwitchP95Ms, + ); + expect(summary.maxMountedTimelineRows).toBeLessThanOrEqual(thresholds.maxMountedTimelineRows); + expect(summary.maxLongTaskMs).toBeLessThanOrEqual(thresholds.maxLongTaskMs); + expect(summary.maxRafGapMs).toBeLessThanOrEqual(thresholds.maxRafGapMs); + + await harness.finishRun({ + suite: "virtualization", + scenarioId: "large_threads", + thresholds, + metadata: { + heavyThreadMessageCount, + }, + actionSummary: { + threadSwitchActionPrefix: "thread-switch", + }, + }); + finished = true; + } finally { + if (harness && !finished) { + await harness.finishRun({ + suite: "virtualization", + scenarioId: "large_threads", + thresholds, + metadata: { + heavyThreadMessageCount: harness.seededState.threadSummaries.find( + (thread) => thread.id === PERF_CATALOG_IDS.largeThreads.heavyAThreadId, + )?.messageCount, + }, + actionSummary: { + threadSwitchActionPrefix: "thread-switch", + }, + }); + } + } +}); diff --git a/apps/web/test/perf/websocket-application.perf.test.ts b/apps/web/test/perf/websocket-application.perf.test.ts new file mode 100644 index 0000000000..29b53fa0df --- /dev/null +++ b/apps/web/test/perf/websocket-application.perf.test.ts @@ -0,0 +1,140 @@ +import { expect, test } from "vitest"; + +import { summarizeBrowserPerfMetrics } from "../../../../test/perf/support/artifact"; +import { PERF_CATALOG_IDS, PERF_PROVIDER_SCENARIOS } from "@t3tools/shared/perf/scenarioCatalog"; +import { PERF_THRESHOLDS } from "../../../../test/perf/support/thresholds"; +import { startPerfAppHarness, type PerfAppHarness } from "./appHarness"; +import { + ensureThreadRowVisible, + measureThreadSwitch, + typeIntoComposerAndSend, + waitForThreadRoute, +} from "./pagePerfHelpers"; + +test("high-frequency websocket events stay responsive under real built-app flow", async () => { + const thresholds = PERF_THRESHOLDS.local; + let harness: PerfAppHarness | null = null; + let finished = false; + + try { + harness = await startPerfAppHarness({ + suite: "websocket-application", + seedScenarioId: "burst_base", + providerScenarioId: "dense_assistant_stream", + }); + + const projectTitle = harness.seededState.projectTitle ?? "Performance Workspace"; + const streamScenario = PERF_PROVIDER_SCENARIOS.dense_assistant_stream; + expect(streamScenario.totalDurationMs).toBeGreaterThanOrEqual(10_000); + + await ensureThreadRowVisible( + harness.page, + projectTitle, + PERF_CATALOG_IDS.burstBase.burstThreadId, + ); + await harness.page + .getByTestId(`thread-row-${PERF_CATALOG_IDS.burstBase.burstThreadId}`) + .click(); + await waitForThreadRoute(harness.page, { + threadId: PERF_CATALOG_IDS.burstBase.burstThreadId, + messageId: PERF_CATALOG_IDS.burstBase.burstTerminalMessageId, + }); + + await harness.resetBrowserMetrics(); + await harness.startAction("burst-completion"); + await typeIntoComposerAndSend(harness.page, "Run the dense websocket perf burst."); + + await harness.page.waitForTimeout(900); + + await measureThreadSwitch(harness, { + actionName: "thread-switch-burst-nav", + projectTitle, + threadId: PERF_CATALOG_IDS.burstBase.navigationThreadId, + }); + await harness.page.waitForFunction( + (messageNeedle) => document.body.textContent?.includes(messageNeedle as string) ?? false, + "Navigation lane", + { timeout: 10_000 }, + ); + const navigationTimelineText = await harness.page + .locator('[data-timeline-root="true"]') + .textContent(); + expect(navigationTimelineText ?? "").toContain("Navigation lane"); + + await measureThreadSwitch(harness, { + actionName: "thread-switch-burst-return", + projectTitle, + threadId: PERF_CATALOG_IDS.burstBase.burstThreadId, + }); + + await harness.page.waitForFunction( + (sentinelText) => document.body.textContent?.includes(sentinelText as string) ?? false, + streamScenario.sentinelText, + { timeout: streamScenario.totalDurationMs + 15_000 }, + ); + await harness.endAction("burst-completion"); + + const browserMetrics = await harness.snapshotBrowserMetrics(); + const summary = summarizeBrowserPerfMetrics(browserMetrics, { + threadSwitchActionPrefix: "thread-switch", + burstActionName: "burst-completion", + }); + + expect(summary.burstCompletionMs).not.toBeNull(); + expect(summary.burstCompletionMs ?? Number.POSITIVE_INFINITY).toBeLessThanOrEqual( + thresholds.burstCompletionMs, + ); + expect(summary.maxLongTaskMs).toBeLessThanOrEqual(thresholds.maxLongTaskMs); + expect(summary.longTasksOver50Ms).toBeLessThanOrEqual(thresholds.longTasksOver50MsMax); + expect(summary.maxRafGapMs).toBeLessThanOrEqual(thresholds.maxRafGapMs); + + const burstNavActions = browserMetrics.actions.filter((action) => + action.name.startsWith("thread-switch-burst"), + ); + expect(burstNavActions).toHaveLength(2); + for (const action of burstNavActions) { + expect(action.durationMs).toBeLessThanOrEqual(thresholds.threadSwitchP95Ms); + } + + await harness.finishRun({ + suite: "websocket-application", + scenarioId: "dense_assistant_stream", + thresholds, + metadata: { + burstSeedThreadId: PERF_CATALOG_IDS.burstBase.burstThreadId, + navigationThreadId: PERF_CATALOG_IDS.burstBase.navigationThreadId, + fillerThreadId: PERF_CATALOG_IDS.burstBase.fillerThreadId, + navigationLiveAssistantMessageId: + PERF_CATALOG_IDS.provider.navigationLiveAssistantMessageId, + burstLiveAssistantMessageId: PERF_CATALOG_IDS.provider.burstLiveAssistantMessageId, + sentinelText: streamScenario.sentinelText, + }, + actionSummary: { + threadSwitchActionPrefix: "thread-switch", + burstActionName: "burst-completion", + }, + }); + finished = true; + } finally { + if (harness && !finished) { + await harness.finishRun({ + suite: "websocket-application", + scenarioId: "dense_assistant_stream", + thresholds, + metadata: { + burstSeedThreadId: PERF_CATALOG_IDS.burstBase.burstThreadId, + navigationThreadId: PERF_CATALOG_IDS.burstBase.navigationThreadId, + fillerThreadId: PERF_CATALOG_IDS.burstBase.fillerThreadId, + navigationLiveAssistantMessageId: + PERF_CATALOG_IDS.provider.navigationLiveAssistantMessageId, + burstLiveAssistantMessageId: PERF_CATALOG_IDS.provider.burstLiveAssistantMessageId, + sentinelText: PERF_PROVIDER_SCENARIOS.dense_assistant_stream.sentinelText, + }, + actionSummary: { + threadSwitchActionPrefix: "thread-switch", + burstActionName: "burst-completion", + }, + }); + } + } +}); diff --git a/apps/web/tsconfig.json b/apps/web/tsconfig.json index 178f4bcbab..bd141a14a9 100644 --- a/apps/web/tsconfig.json +++ b/apps/web/tsconfig.json @@ -21,5 +21,14 @@ } ] }, - "include": ["src", "vite.config.ts", "test"] + "include": [ + "src", + "vite.config.ts", + "test", + "../../packages/shared/src/perf/artifact.ts", + "../../test/perf/support/artifact.ts", + "../../test/perf/support/browserMetrics.ts", + "../../test/perf/support/serverSampler.ts", + "../../test/perf/support/thresholds.ts" + ] } diff --git a/apps/web/vite.config.ts b/apps/web/vite.config.ts index 56b138d331..913e549eda 100644 --- a/apps/web/vite.config.ts +++ b/apps/web/vite.config.ts @@ -2,6 +2,7 @@ import tailwindcss from "@tailwindcss/vite"; import react, { reactCompilerPreset } from "@vitejs/plugin-react"; import babel from "@rolldown/plugin-babel"; import { tanstackRouter } from "@tanstack/router-plugin/vite"; +import { configDefaults } from "vitest/config"; import { defineConfig } from "vite"; import pkg from "./package.json" with { type: "json" }; @@ -56,4 +57,7 @@ export default defineConfig({ emptyOutDir: true, sourcemap: buildSourcemap, }, + test: { + exclude: [...configDefaults.exclude, "test/perf/**/*.perf.test.ts"], + }, }); diff --git a/apps/web/vitest.perf.config.ts b/apps/web/vitest.perf.config.ts new file mode 100644 index 0000000000..79230041a9 --- /dev/null +++ b/apps/web/vitest.perf.config.ts @@ -0,0 +1,12 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + include: ["test/perf/**/*.perf.test.ts"], + environment: "node", + fileParallelism: false, + maxConcurrency: 1, + testTimeout: 180_000, + hookTimeout: 180_000, + }, +}); diff --git a/docs/perf-benchmarks.md b/docs/perf-benchmarks.md new file mode 100644 index 0000000000..1de3aa9179 --- /dev/null +++ b/docs/perf-benchmarks.md @@ -0,0 +1,267 @@ +# Performance Benchmarks + +This repository has a local performance regression harness for the built web app and built server. It is intentionally separate from the normal unit and browser test suites. + +The benchmark is meant to answer two questions: + +- Does large-thread rendering and thread-to-thread navigation stay snappy? +- Does high-frequency websocket event application stay responsive under a realistic built-app flow? + +## What it runs + +The perf suite does not use Vite dev mode. Each run does this: + +1. Seeds a real server base dir with deterministic fixture data using the normal event store and projection pipeline. +2. Starts the built `t3` server from `apps/server/dist/bin.mjs`. +3. Serves the built web bundle from `apps/server/dist/client`. +4. Launches Chromium with Playwright. +5. Installs an in-page collector for action timings, long tasks, `requestAnimationFrame` gaps, and mounted timeline row counts. +6. Writes a JSON artifact plus server stdout and stderr logs under `artifacts/perf/`. + +The current implementation is browser-focused, but the harness already reserves a server sampling hook so later server-only CPU and memory benchmarks can share the same scenarios and artifact format. + +## Main pieces + +- `packages/shared/src/perf/scenarioCatalog.ts` + - Shared scenario catalog for both seeded state and live provider traffic. +- `apps/server/integration/perf/seedPerfState.ts` + - Creates the temporary sqlite-backed base dir using real orchestration events and projections. +- `apps/server/src/perf/PerfProviderAdapter.ts` + - Perf-only mock provider that emits paced runtime events over the normal provider -> websocket -> client path. +- `apps/web/test/perf/appHarness.ts` + - Starts the built app, launches Chromium, installs the browser collector, and writes artifacts. +- `test/perf/support/browserMetrics.ts` + - Collects action durations, long tasks, `requestAnimationFrame` gaps, and mounted row samples inside the page. +- `test/perf/support/artifact.ts` + - Defines the JSON artifact format and summary math. +- `test/perf/support/thresholds.ts` + - Local budgets that currently gate the suite. +- `test/perf/support/serverSampler.ts` + - Extension point for future CPU and memory sampling. The current implementation is `NoopServerSampler`, so `serverMetrics` is `null` today. + +## Scenarios + +### `large_threads` + +Seeded app state used by the virtualization benchmark. + +- 5 projects +- 30 threads total +- 2 heavy threads +- 1 burst thread +- 27 light filler threads spread across all projects + +The two heavy threads currently seed: + +- `84` and `96` turns +- `2,000` messages +- periodic worklog/activity rows +- periodic proposed-plan rows +- frequent checkpoint rows with larger changed-file trees + +This keeps the thread count and project list closer to a real workspace mix: many projects, many sidebar rows, and heavy conversations that stay under `100` turns while still retaining thousands of messages because each turn fans out into multiple assistant messages. The heavy timelines also include the non-message rows that tend to stress grouping, virtualization, and diff-tree rendering. + +### `burst_base` + +Seeded app state used by the websocket benchmark. + +- 1 burst target thread +- 1 navigation thread +- 1 filler thread + +This smaller seed keeps the focus on live websocket application while still covering cross-thread background activity. + +### `dense_assistant_stream` + +Live provider scenario used by the websocket benchmark. + +- runs for about `10` seconds +- spans `24` cycles +- updates `3` lanes at once: `burst`, `navigation`, and `filler` +- interleaves assistant messages with multiple worklog/tool lifecycles + +Each cycle emits, per lane: + +1. an assistant intro message +2. three worklog/tool command lifecycles with file payloads +3. an assistant followup message + +The message fragments vary in length on purpose so the live stream does not render as uniformly sized chunks. + +## What each benchmark asserts + +### Virtualization benchmark + +File: `apps/web/test/perf/virtualization.perf.test.ts` + +This benchmark: + +- opens the large-thread seed on the built app +- navigates between the two heavy threads through the real sidebar +- measures a warmup switch and then six measured thread switches +- samples mounted timeline row counts +- scrolls bottom -> top -> bottom to catch jank + +It currently fails if the local budgets in `test/perf/support/thresholds.ts` are exceeded: + +- max mounted timeline rows: `140` +- thread switch p50: `250ms` +- thread switch p95: `500ms` +- max long task: `120ms` +- max `requestAnimationFrame` gap: `120ms` + +### Websocket benchmark + +File: `apps/web/test/perf/websocket-application.perf.test.ts` + +This benchmark: + +- starts the built app with the perf provider enabled +- opens the burst thread +- sends one real composer message +- lets the mock provider emit the live multi-thread websocket burst +- switches to another thread during the burst and back again +- waits for the sentinel text that marks the end of the scenario + +It currently fails if these budgets are exceeded: + +- burst completion: `14,000ms` +- max long task: `120ms` +- long tasks over `50ms`: `2` +- max `requestAnimationFrame` gap: `120ms` +- burst-time thread switches: `500ms` max each + +## Commands + +### One-time browser setup + +Install Chromium for the perf suite: + +```bash +cd apps/web +bun run test:perf:install +``` + +### Run the full automated benchmark suite + +From the repo root: + +```bash +bun run test:perf:web +``` + +That command builds `@t3tools/web`, builds `t3`, and then runs the dedicated perf Vitest config. + +### Re-run the perf suite without rebuilding + +If the built artifacts already exist and you have not changed the built app since the last build: + +```bash +cd apps/web +bun run test:perf +``` + +### Watch the automated run in a live browser + +```bash +T3CODE_PERF_HEADFUL=1 bun run test:perf:web +``` + +If you already have fresh build artifacts: + +```bash +cd apps/web +T3CODE_PERF_HEADFUL=1 bun run test:perf +``` + +### Open the seeded app manually for exploration + +Large-thread virtualization state: + +```bash +bun run perf:open:build -- --scenario large_threads --open +``` + +Websocket burst state: + +```bash +bun run perf:open:build -- --scenario burst_base --provider dense_assistant_stream --open +``` + +If the app is already built, use the faster commands: + +```bash +bun run perf:open -- --scenario large_threads --open +bun run perf:open -- --scenario burst_base --provider dense_assistant_stream --open +``` + +For the websocket scenario, open the burst thread and send one message to start the live stream. + +### Inspect the seeded topology directly + +The seed script prints the generated project and thread summaries as JSON: + +```bash +bun run apps/server/scripts/seedPerfState.ts large_threads +bun run apps/server/scripts/seedPerfState.ts burst_base +``` + +## Artifacts + +By default each run writes to: + +```text +artifacts/perf/--/ +``` + +Each run currently includes: + +- `-.json` +- `-.server.stdout.log` +- `-.server.stderr.log` + +The JSON artifact contains: + +- run metadata +- threshold profile +- summary metrics +- raw browser metrics +- `serverMetrics`, reserved for later server sampling + +Current summary fields: + +- `maxMountedTimelineRows` +- `threadSwitchP50Ms` +- `threadSwitchP95Ms` +- `maxLongTaskMs` +- `longTasksOver50Ms` +- `maxRafGapMs` +- `burstCompletionMs` + +To change the artifact output directory for one run: + +```bash +T3CODE_PERF_ARTIFACT_DIR=/tmp/t3-perf bun run test:perf:web +``` + +## Internal env vars + +These are the perf-specific env vars in the current harness: + +- `T3CODE_PERF_HEADFUL=1` + - Launch Chromium headed instead of headless. +- `T3CODE_PERF_ARTIFACT_DIR=/path/to/output` + - Override the artifact directory. +- `T3CODE_PERF_PROVIDER=1` + - Enables the perf provider path on the server. +- `T3CODE_PERF_SCENARIO=dense_assistant_stream` + - Selects the live perf provider scenario. + +In normal usage, the automated harness and `perf:open` script set the provider env vars for you. + +## Notes and limitations + +- This is a local benchmark suite. It is not wired into CI yet. +- The normal `apps/web` test suite excludes `test/perf/**/*.perf.test.ts`. Perf tests only run through `apps/web/vitest.perf.config.ts`. +- The budgets are intentionally conservative first-pass tripwires, not tuned production SLOs. +- The current harness measures browser responsiveness only. Server sampling is an explicit extension point, not implemented metrics yet. diff --git a/package.json b/package.json index a26a359c03..afa745d082 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,10 @@ "typecheck": "turbo run typecheck", "lint": "oxlint --report-unused-disable-directives", "test": "turbo run test", + "test:perf:server": "cd apps/server && bun run test:perf", + "test:perf:web": "turbo run build --filter=@t3tools/web && turbo run build --filter=t3 && cd apps/web && bun run test:perf", + "perf:open": "node scripts/open-perf-app.ts", + "perf:open:build": "turbo run build --filter=@t3tools/web && turbo run build --filter=t3 && node scripts/open-perf-app.ts", "test:desktop-smoke": "turbo run smoke-test --filter=@t3tools/desktop", "fmt": "oxfmt", "fmt:check": "oxfmt --check", diff --git a/packages/shared/package.json b/packages/shared/package.json index b35d23ef15..5e3f05e4ba 100644 --- a/packages/shared/package.json +++ b/packages/shared/package.json @@ -43,6 +43,14 @@ "./String": { "types": "./src/String.ts", "import": "./src/String.ts" + }, + "./perf/scenarioCatalog": { + "types": "./src/perf/scenarioCatalog.ts", + "import": "./src/perf/scenarioCatalog.ts" + }, + "./perf/artifact": { + "types": "./src/perf/artifact.ts", + "import": "./src/perf/artifact.ts" } }, "scripts": { diff --git a/packages/shared/src/perf/artifact.ts b/packages/shared/src/perf/artifact.ts new file mode 100644 index 0000000000..1ba886153e --- /dev/null +++ b/packages/shared/src/perf/artifact.ts @@ -0,0 +1,73 @@ +import { mkdir, writeFile } from "node:fs/promises"; +import { dirname, resolve } from "node:path"; + +export interface PerfLatencySample { + readonly name: string; + readonly durationMs: number; + readonly startedAt: string; + readonly endedAt: string; + readonly metadata?: Record; +} + +export interface PerfLatencySummary { + readonly count: number; + readonly minMs: number | null; + readonly avgMs: number | null; + readonly p50Ms: number | null; + readonly p95Ms: number | null; + readonly maxMs: number | null; +} + +export function percentile(values: ReadonlyArray, target: number): number | null { + if (values.length === 0) { + return null; + } + const sorted = values.toSorted((left, right) => left - right); + const clampedTarget = Math.min(Math.max(target, 0), 1); + const index = Math.min( + sorted.length - 1, + Math.max(0, Math.ceil(sorted.length * clampedTarget) - 1), + ); + return sorted[index] ?? null; +} + +export function summarizeLatencyValues(values: ReadonlyArray): PerfLatencySummary { + if (values.length === 0) { + return { + count: 0, + minMs: null, + avgMs: null, + p50Ms: null, + p95Ms: null, + maxMs: null, + }; + } + + const minMs = values.reduce((currentMin, value) => Math.min(currentMin, value), values[0]!); + const maxMs = values.reduce((currentMax, value) => Math.max(currentMax, value), values[0]!); + const avgMs = values.reduce((sum, value) => sum + value, 0) / values.length; + + return { + count: values.length, + minMs, + avgMs, + p50Ms: percentile(values, 0.5), + p95Ms: percentile(values, 0.95), + maxMs, + }; +} + +export function summarizeLatencySamples( + samples: ReadonlyArray, +): PerfLatencySummary { + return summarizeLatencyValues(samples.map((sample) => sample.durationMs)); +} + +export async function writeJsonArtifact( + outputPath: string, + artifact: TArtifact, +): Promise { + const resolvedOutputPath = resolve(outputPath); + await mkdir(dirname(resolvedOutputPath), { recursive: true }); + await writeFile(`${resolvedOutputPath}`, `${JSON.stringify(artifact, null, 2)}\n`, "utf8"); +} diff --git a/packages/shared/src/perf/scenarioCatalog.ts b/packages/shared/src/perf/scenarioCatalog.ts new file mode 100644 index 0000000000..1c0acfc6ac --- /dev/null +++ b/packages/shared/src/perf/scenarioCatalog.ts @@ -0,0 +1,1221 @@ +import { + DEFAULT_MODEL_BY_PROVIDER, + EventId, + MessageId, + ProjectId, + ThreadId, + TurnId, + type ModelSelection, + type ProviderKind, + type ProviderRuntimeEvent, +} from "@t3tools/contracts"; + +export type PerfSeedScenarioId = "large_threads" | "burst_base"; +export type PerfProviderScenarioId = "dense_assistant_stream" | "parallel_assistant_stream"; +export type PerfScenarioId = PerfSeedScenarioId | PerfProviderScenarioId; + +export interface PerfProjectScenario { + readonly id: ProjectId; + readonly title: string; + readonly workspaceDirectoryName: string; + readonly defaultModelSelection: ModelSelection; +} + +export interface PerfSeedThreadScenario { + readonly id: ThreadId; + readonly projectId: ProjectId; + readonly title: string; + readonly category: "heavy" | "burst" | "light"; + readonly turnCount: number; + readonly messageCount: number; + readonly anchorMessageId: MessageId; + readonly terminalMessageId: MessageId; + readonly planStride: number | null; + readonly activityStride: number | null; + readonly diffStride: number | null; +} + +export interface PerfSeedScenario { + readonly id: PerfSeedScenarioId; + readonly projects: ReadonlyArray; + readonly threads: ReadonlyArray; +} + +export interface TimedFixtureProviderRuntimeEvent { + readonly delayMs?: number; + readonly threadId?: ThreadId; + readonly turnId?: TurnId; + readonly type: ProviderRuntimeEvent["type"]; + readonly itemId?: string; + readonly requestId?: string; + readonly payload: unknown; +} + +export interface PerfProviderScenario { + readonly id: PerfProviderScenarioId; + readonly provider: ProviderKind; + readonly sentinelText: string; + readonly totalDurationMs: number; + readonly events: ReadonlyArray; +} + +const PERF_MODEL_SELECTION: ModelSelection = { + provider: "codex", + model: DEFAULT_MODEL_BY_PROVIDER.codex, +}; + +const makeProjectId = (slug: string) => ProjectId.makeUnsafe(`perf-project-${slug}`); +const makeProject = (slug: string, title: string): PerfProjectScenario => ({ + id: makeProjectId(slug), + title, + workspaceDirectoryName: `perf-workspace-${slug}`, + defaultModelSelection: PERF_MODEL_SELECTION, +}); + +const makeThreadId = (slug: string) => ThreadId.makeUnsafe(`perf-thread-${slug}`); +const makeTurnId = (threadSlug: string, index: number) => + TurnId.makeUnsafe(`perf-turn-${threadSlug}-${index.toString().padStart(4, "0")}`); +const makeMessageId = ( + threadSlug: string, + role: "user" | "assistant", + turnIndex: number, + messageIndex = 1, +) => + MessageId.makeUnsafe( + `perf-message-${threadSlug}-${role}-${turnIndex.toString().padStart(4, "0")}-${messageIndex.toString().padStart(2, "0")}`, + ); +const makeLiveTurnId = (slug: string) => TurnId.makeUnsafe(`perf-live-turn-${slug}`); +const makeLiveAssistantItemId = ( + laneKey: string, + cycleIndex: number, + segment: "intro" | "followup", +) => `perf-assistant-${laneKey}-${cycleIndex.toString().padStart(2, "0")}-${segment}`; +const makeLiveAssistantMessageId = (itemId: string) => MessageId.makeUnsafe(`assistant:${itemId}`); +const threadSlugFromId = (threadId: ThreadId) => threadId.replace("perf-thread-", ""); + +function threadSeedValue(threadSlug: string): number { + return Array.from(threadSlug).reduce( + (sum, character, index) => sum + character.charCodeAt(0) * (index + 1), + 0, + ); +} + +function buildAssistantMessageCountPlan( + input: Pick & { + readonly threadSlug: string; + }, +): ReadonlyArray { + const assistantMessageCount = input.messageCount - input.turnCount; + if (assistantMessageCount < input.turnCount) { + throw new Error( + `Perf thread '${input.threadSlug}' must retain at least one assistant message per turn.`, + ); + } + + const averageAssistantMessages = Math.floor(assistantMessageCount / input.turnCount); + const minPerTurn = Math.max( + 1, + averageAssistantMessages - + (input.category === "heavy" ? 4 : input.category === "burst" ? 2 : 1), + ); + const residualAssistantMessages = assistantMessageCount - minPerTurn * input.turnCount; + const threadSeed = threadSeedValue(input.threadSlug); + const weights = Array.from({ length: input.turnCount }, (_, index) => { + const burstBias = input.category === "heavy" && index % 9 === 0 ? 3 : index % 6 === 0 ? 1 : 0; + return 1 + ((threadSeed + (index + 1) * 11) % 7) + burstBias; + }); + const weightTotal = weights.reduce((sum, weight) => sum + weight, 0); + const quotas = weights.map((weight) => (residualAssistantMessages * weight) / weightTotal); + const counts = quotas.map((quota) => minPerTurn + Math.floor(quota)); + let remainingMessages = assistantMessageCount - counts.reduce((sum, count) => sum + count, 0); + + if (remainingMessages > 0) { + const residualOrder = quotas + .map((quota, index) => ({ + index, + remainder: quota - Math.floor(quota), + })) + .toSorted((left, right) => right.remainder - left.remainder || right.index - left.index); + + for (let index = 0; index < residualOrder.length && remainingMessages > 0; index += 1) { + const targetIndex = residualOrder[index]?.index; + if (targetIndex === undefined) { + break; + } + counts[targetIndex] = (counts[targetIndex] ?? minPerTurn) + 1; + remainingMessages -= 1; + } + } + + return counts; +} + +export function buildPerfAssistantMessageCountPlan( + thread: Pick, +): ReadonlyArray { + return buildAssistantMessageCountPlan({ + threadSlug: threadSlugFromId(thread.id), + category: thread.category, + turnCount: thread.turnCount, + messageCount: thread.messageCount, + }); +} + +function makeSeedThreadScenario(input: { + readonly slug: string; + readonly projectId: ProjectId; + readonly title: string; + readonly category: PerfSeedThreadScenario["category"]; + readonly turnCount: number; + readonly messageCount: number; + readonly planStride: number | null; + readonly activityStride: number | null; + readonly diffStride: number | null; +}): PerfSeedThreadScenario { + const assistantMessageCountPlan = buildAssistantMessageCountPlan({ + threadSlug: input.slug, + category: input.category, + turnCount: input.turnCount, + messageCount: input.messageCount, + }); + + return { + id: makeThreadId(input.slug), + projectId: input.projectId, + title: input.title, + category: input.category, + turnCount: input.turnCount, + messageCount: input.messageCount, + anchorMessageId: makeMessageId(input.slug, "user", 1, 1), + terminalMessageId: makeMessageId( + input.slug, + "assistant", + input.turnCount, + assistantMessageCountPlan.at(-1) ?? 1, + ), + planStride: input.planStride, + activityStride: input.activityStride, + diffStride: input.diffStride, + }; +} + +const PERF_PROJECTS = { + inbox: makeProject("inbox", "Inbox Refactor Workspace"), + desktop: makeProject("desktop", "Desktop Release Workspace"), + runtime: makeProject("runtime", "Runtime Orchestration Workspace"), + marketing: makeProject("marketing", "Marketing Site Workspace"), + ops: makeProject("ops", "Ops Automation Workspace"), + burstBase: makeProject("burst-base", "Burst Harness Workspace"), +} as const; + +const PERF_PROVIDER_LIVE_TURNS = { + navigation: makeLiveTurnId("navigation"), + filler: makeLiveTurnId("filler"), +} as const; + +const LARGE_THREAD_DEFINITIONS = { + heavyA: makeSeedThreadScenario({ + slug: "heavy-a", + projectId: PERF_PROJECTS.inbox.id, + title: "Inbox Search Regression", + category: "heavy", + turnCount: 84, + messageCount: 2_000, + planStride: 11, + activityStride: 4, + diffStride: 3, + }), + heavyB: makeSeedThreadScenario({ + slug: "heavy-b", + projectId: PERF_PROJECTS.desktop.id, + title: "Desktop Update Rollout", + category: "heavy", + turnCount: 96, + messageCount: 2_000, + planStride: 12, + activityStride: 5, + diffStride: 4, + }), + burst: makeSeedThreadScenario({ + slug: "large-burst", + projectId: PERF_PROJECTS.runtime.id, + title: "Runtime Burst Coordination", + category: "burst", + turnCount: 48, + messageCount: 640, + planStride: 12, + activityStride: 4, + diffStride: 5, + }), +} as const satisfies Record; + +const LARGE_THREAD_LIGHT_LAYOUT = [ + { + project: PERF_PROJECTS.inbox, + label: "Inbox", + count: 5, + }, + { + project: PERF_PROJECTS.desktop, + label: "Desktop", + count: 6, + }, + { + project: PERF_PROJECTS.runtime, + label: "Runtime", + count: 5, + }, + { + project: PERF_PROJECTS.marketing, + label: "Marketing", + count: 6, + }, + { + project: PERF_PROJECTS.ops, + label: "Ops", + count: 5, + }, +] as const; + +const LARGE_THREAD_LIGHT_THREADS: ReadonlyArray = + LARGE_THREAD_LIGHT_LAYOUT.flatMap((layout, projectIndex) => + Array.from({ length: layout.count }, (_, localIndex) => { + const globalIndex = + LARGE_THREAD_LIGHT_LAYOUT.slice(0, projectIndex).reduce( + (sum, entry) => sum + entry.count, + 0, + ) + localIndex; + const threadNumber = localIndex + 1; + const turnCount = 18 + ((globalIndex * 7 + projectIndex * 5 + localIndex * 3) % 8) * 7; + const messageDensity = 4 + ((globalIndex + projectIndex + localIndex) % 5); + const messageCount = Math.min( + 900, + turnCount * messageDensity + 48 + ((globalIndex + projectIndex) % 5) * 18, + ); + + return makeSeedThreadScenario({ + slug: `${layout.label.toLowerCase()}-light-${threadNumber.toString().padStart(2, "0")}`, + projectId: layout.project.id, + title: `${layout.label} Thread ${threadNumber}`, + category: "light", + turnCount, + messageCount, + planStride: globalIndex % 4 === 0 ? 14 + ((globalIndex + localIndex) % 5) : null, + activityStride: 5 + ((globalIndex + localIndex) % 4), + diffStride: globalIndex % 3 === 0 ? 6 + ((projectIndex + localIndex) % 4) : null, + }); + }), + ); + +const BURST_BASE_THREAD_DEFINITIONS = { + burst: makeSeedThreadScenario({ + slug: "burst", + projectId: PERF_PROJECTS.burstBase.id, + title: "Burst Target Thread", + category: "burst", + turnCount: 36, + messageCount: 220, + planStride: 12, + activityStride: 4, + diffStride: 6, + }), + navigation: makeSeedThreadScenario({ + slug: "burst-navigation", + projectId: PERF_PROJECTS.burstBase.id, + title: "Burst Navigation Thread", + category: "light", + turnCount: 28, + messageCount: 112, + planStride: null, + activityStride: 5, + diffStride: null, + }), + filler: makeSeedThreadScenario({ + slug: "burst-filler", + projectId: PERF_PROJECTS.burstBase.id, + title: "Burst Filler Thread", + category: "light", + turnCount: 24, + messageCount: 96, + planStride: null, + activityStride: 6, + diffStride: null, + }), +} as const satisfies Record; + +const BURST_NAVIGATION_THREAD = BURST_BASE_THREAD_DEFINITIONS.navigation; +const BURST_FILLER_THREAD = BURST_BASE_THREAD_DEFINITIONS.filler; + +export const PERF_SEED_SCENARIOS = { + large_threads: { + id: "large_threads", + projects: [ + PERF_PROJECTS.inbox, + PERF_PROJECTS.desktop, + PERF_PROJECTS.runtime, + PERF_PROJECTS.marketing, + PERF_PROJECTS.ops, + ], + threads: [ + LARGE_THREAD_DEFINITIONS.heavyA, + LARGE_THREAD_DEFINITIONS.heavyB, + LARGE_THREAD_DEFINITIONS.burst, + ...LARGE_THREAD_LIGHT_THREADS, + ], + }, + burst_base: { + id: "burst_base", + projects: [PERF_PROJECTS.burstBase], + threads: [BURST_BASE_THREAD_DEFINITIONS.burst, BURST_NAVIGATION_THREAD, BURST_FILLER_THREAD], + }, +} as const satisfies Record; + +const DENSE_ASSISTANT_STREAM_SENTINEL = "PERF_STREAM_SENTINEL:dense_assistant_stream:completed"; +const DENSE_ASSISTANT_STREAM_CYCLE_COUNT = 24; +const DENSE_ASSISTANT_STREAM_CYCLE_INTERVAL_MS = 520; +const DENSE_ASSISTANT_STREAM_LANE_STAGGER_MS = 12; +const DENSE_ASSISTANT_STREAM_TURN_COMPLETION_GAP_MS = 36; +const DENSE_ASSISTANT_STREAM_WORKLOG_ITEMS_PER_CYCLE = 3; +const DENSE_ASSISTANT_STREAM_MESSAGE_FRAGMENT_GAP_MS = 24; +const DENSE_ASSISTANT_STREAM_MESSAGE_COMPLETION_GAP_MS = 28; +const DENSE_ASSISTANT_STREAM_WORKLOG_STARTED_GAP_MS = 20; +const DENSE_ASSISTANT_STREAM_WORKLOG_UPDATED_GAP_MS = 24; +const DENSE_ASSISTANT_STREAM_WORKLOG_COMPLETED_GAP_MS = 28; +const DENSE_ASSISTANT_STREAM_WORKLOG_GROUP_GAP_MS = 12; +const PARALLEL_ASSISTANT_STREAM_SENTINEL = + "PERF_STREAM_SENTINEL:parallel_assistant_stream:completed"; +const PARALLEL_ASSISTANT_STREAM_FRAGMENT_COUNT = 240; +const PARALLEL_ASSISTANT_STREAM_FRAGMENT_GAP_MS = 24; +const PARALLEL_ASSISTANT_STREAM_COMPLETION_GAP_MS = 48; + +type DenseAssistantStreamLaneKey = "burst" | "navigation" | "filler"; + +interface DenseAssistantStreamLane { + readonly key: DenseAssistantStreamLaneKey; + readonly title: string; + readonly threadId?: ThreadId; + readonly turnId?: TurnId; +} + +const DENSE_ASSISTANT_STREAM_LANES: ReadonlyArray = [ + { + key: "burst", + title: "Burst thread", + }, + { + key: "navigation", + title: "Navigation thread", + threadId: BURST_NAVIGATION_THREAD.id, + turnId: PERF_PROVIDER_LIVE_TURNS.navigation, + }, + { + key: "filler", + title: "Filler thread", + threadId: BURST_FILLER_THREAD.id, + turnId: PERF_PROVIDER_LIVE_TURNS.filler, + }, +]; + +type DenseAssistantSegment = ReadonlyArray; + +interface DenseAssistantToolSpec { + readonly title: string; + readonly detail: string; + readonly command: ReadonlyArray; + readonly files: ReadonlyArray; +} + +type DenseAssistantSegmentStage = "intro" | "followup"; + +function denseAssistantLaneSeed(laneKey: DenseAssistantStreamLaneKey): number { + switch (laneKey) { + case "burst": + return 3; + case "navigation": + return 7; + case "filler": + return 11; + } +} + +function buildDenseAssistantSegmentVariation( + laneKey: DenseAssistantStreamLaneKey, + cycleIndex: number, + stage: DenseAssistantSegmentStage, + baseSegment: readonly [string, string], +): DenseAssistantSegment { + const seed = denseAssistantLaneSeed(laneKey) + cycleIndex * 5 + (stage === "followup" ? 2 : 0); + const fragments: string[] = [baseSegment[0]]; + + if (laneKey === "burst") { + if (seed % 2 === 0) { + fragments.push( + "I am keeping the reducer hot path narrow so the live burst still feels incremental. ", + ); + } + if (seed % 5 === 0) { + fragments.push( + "The queue still has enough buffered work to show whether the main thread starts to fan out. ", + ); + } + } else if (laneKey === "navigation") { + if (seed % 2 !== 0) { + fragments.push( + "This pass is deliberately touching selection state, unread counts, and sidebar summaries at the same time. ", + ); + } + if (seed % 4 === 0) { + fragments.push( + "I want route updates to land while the burst thread keeps painting without forcing a reset. ", + ); + } + } else { + if (seed % 3 !== 0) { + fragments.push( + "Hidden threads are still taking background work here so the test is not accidentally single-threaded. ", + ); + } + if (seed % 5 === 1) { + fragments.push( + "The background file tree is large enough that badge and projection churn should stay visible in the worklog. ", + ); + } + } + + if (stage === "followup" && seed % 3 === 0) { + fragments.push( + "I am holding the rest of the queue steady until this narrower update settles cleanly. ", + ); + } + + fragments.push(baseSegment[1]); + return fragments; +} + +function joinDenseAssistantSegment(segment: DenseAssistantSegment): string { + return segment.join("").trim(); +} + +function mutateDenseAssistantFilePath(filePath: string, salt: number): string { + return filePath.replace(/\.(tsx?)$/, `.${(salt % 4) + 1}.$1`); +} + +function selectDenseAssistantFiles( + baseFiles: ReadonlyArray, + cycleIndex: number, + toolIndex: number, + baseCount: number, +): ReadonlyArray { + const count = Math.min(baseFiles.length, baseCount + ((cycleIndex + toolIndex) % 3)); + const startIndex = (cycleIndex * 2 + toolIndex * 3) % baseFiles.length; + return Array.from({ length: count }, (_, index) => { + const baseFile = baseFiles[(startIndex + index) % baseFiles.length]!; + return index % 2 === 0 + ? baseFile + : mutateDenseAssistantFilePath(baseFile, cycleIndex + toolIndex + index); + }); +} + +function buildDenseAssistantIntroSegment( + laneKey: DenseAssistantStreamLaneKey, + cycleIndex: number, +): DenseAssistantSegment { + const pass = cycleIndex + 1; + + if (laneKey === "burst") { + switch (cycleIndex % 4) { + case 0: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Reviewing websocket burst slice ${pass} and checking the render queue. `, + "I am about to patch the hottest reducer path before the next flush. ", + ]); + case 1: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Re-reading the active thread around viewport checkpoint ${pass}. `, + "I want the next command to touch the rows that are actually visible. ", + ]); + case 2: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Inspecting the event fan-out for burst batch ${pass}. `, + "The next command should trim duplicate projections before they hit React. ", + ]); + default: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Checking the optimistic state that landed after websocket batch ${pass}. `, + "I am lining up another targeted update instead of doing a full recompute. ", + ]); + } + } + + if (laneKey === "navigation") { + switch (cycleIndex % 3) { + case 0: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Navigation lane is reconciling sidebar counts for pass ${pass}. `, + "I am checking whether the selected thread can stay interactive during the burst. ", + ]); + case 1: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Navigation lane is refreshing route state for pass ${pass}. `, + "The next command is scoped to sidebar metadata and unread markers. ", + ]); + default: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Navigation lane is merging background thread summaries for pass ${pass}. `, + "I am verifying that thread switches stay cheap while other turns keep moving. ", + ]); + } + } + + switch (cycleIndex % 3) { + case 0: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Filler lane is compacting idle thread state for pass ${pass}. `, + "I am keeping background reconciliation active so the burst is not single-threaded. ", + ]); + case 1: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Filler lane is reconciling file tree badges for pass ${pass}. `, + "The next command updates background state without stealing focus from the active lane. ", + ]); + default: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "intro", [ + `Filler lane is sweeping deferred projections for pass ${pass}. `, + "I am checking that hidden threads can absorb more websocket traffic without stalling. ", + ]); + } +} + +function buildDenseAssistantFollowupSegment( + laneKey: DenseAssistantStreamLaneKey, + cycleIndex: number, + cycleCount: number, +): DenseAssistantSegment { + const pass = cycleIndex + 1; + const isLastCycle = cycleIndex === cycleCount - 1; + + if (laneKey === "burst") { + if (isLastCycle) { + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + "Applied the last reducer update and finished the visible-thread verification pass. ", + `Streaming workload drained cleanly. ${DENSE_ASSISTANT_STREAM_SENTINEL}`, + ]); + } + switch (cycleIndex % 4) { + case 0: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Queued the reducer patch for burst slice ${pass}. `, + "The viewport stayed responsive, so I am moving straight to the next live diff. ", + ]); + case 1: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Patched the visible rows touched by burst slice ${pass}. `, + "I can keep streaming without forcing a full timeline rebuild. ", + ]); + case 2: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Folded the duplicate projection work produced by burst slice ${pass}. `, + "The next command will keep pressure on the event path instead of idling. ", + ]); + default: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Settled the optimistic state from burst slice ${pass}. `, + "I am continuing with another narrow update while background threads keep progressing. ", + ]); + } + } + + if (laneKey === "navigation") { + if (isLastCycle) { + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + "Navigation lane finished its background reconciliation pass. ", + "Thread switching stayed live while the burst completed. ", + ]); + } + switch (cycleIndex % 3) { + case 0: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Navigation lane merged sidebar counters for pass ${pass}. `, + "Selection state still looks stable under concurrent updates. ", + ]); + case 1: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Navigation lane applied the route metadata refresh for pass ${pass}. `, + "Unread state is still moving without forcing a navigation reset. ", + ]); + default: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Navigation lane committed background thread summaries for pass ${pass}. `, + "The sidebar stayed interactive while the active turn kept streaming. ", + ]); + } + } + + if (isLastCycle) { + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + "Filler lane finished compacting deferred background work. ", + "Hidden threads stayed caught up through the end of the websocket burst. ", + ]); + } + switch (cycleIndex % 3) { + case 0: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Filler lane compacted idle thread state for pass ${pass}. `, + "Background threads are still accepting updates without starving the visible thread. ", + ]); + case 1: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Filler lane refreshed file tree badges for pass ${pass}. `, + "The background projection load stayed incremental instead of spiking. ", + ]); + default: + return buildDenseAssistantSegmentVariation(laneKey, cycleIndex, "followup", [ + `Filler lane drained deferred projections for pass ${pass}. `, + "There is still enough background traffic here to catch cross-thread regressions. ", + ]); + } +} + +function buildDenseAssistantToolSpec( + laneKey: DenseAssistantStreamLaneKey, + cycleIndex: number, + toolIndex: number, +): DenseAssistantToolSpec { + const pass = cycleIndex + 1; + + if (laneKey === "burst") { + const files = selectDenseAssistantFiles( + [ + "apps/web/src/store.ts", + "apps/web/src/session-logic.ts", + "apps/web/src/components/chat/MessagesTimeline.tsx", + "apps/web/src/components/ChatView.tsx", + "apps/web/src/lib/providerReactQuery.ts", + "apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts", + "apps/server/src/wsServer.ts", + "packages/shared/src/perf/scenarioCatalog.ts", + "apps/web/src/components/sidebar/ThreadList.tsx", + "apps/server/src/orchestration/Layers/ProjectionPipeline.ts", + ], + cycleIndex, + toolIndex, + 5, + ); + + switch (toolIndex) { + case 0: + return { + title: `Burst thread scan ${pass}`, + detail: `Scanned reducer fan-out and live queue pressure for websocket batch ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=burst", + `--batch=${pass}`, + "--step=scan", + "--touch=queue,projection", + ], + files, + }; + case 1: + return { + title: `Burst thread patch ${pass}`, + detail: `Patched the visible reducer path and timeline projection for websocket batch ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=burst", + `--batch=${pass}`, + "--step=patch", + "--touch=render,store", + ], + files, + }; + default: + return { + title: `Burst thread verify ${pass}`, + detail: `Verified viewport stability and render cadence after websocket batch ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=burst", + `--batch=${pass}`, + "--step=verify", + "--touch=viewport,metrics", + ], + files, + }; + } + } + + if (laneKey === "navigation") { + const files = selectDenseAssistantFiles( + [ + "apps/web/src/components/sidebar/ThreadList.tsx", + "apps/web/src/components/sidebar/ProjectSidebar.tsx", + "apps/web/src/routes/threadRoute.ts", + "apps/web/src/store.ts", + "apps/web/src/session-logic.ts", + "apps/server/src/wsServer.ts", + "apps/web/src/components/ChatView.tsx", + "apps/web/src/lib/providerReactQuery.ts", + ], + cycleIndex, + toolIndex, + 4, + ); + + switch (toolIndex) { + case 0: + return { + title: `Navigation thread sync ${pass}`, + detail: `Synced route metadata and selected-thread state for navigation pass ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=navigation", + `--batch=${pass}`, + "--step=sync", + "--touch=route,selection", + ], + files, + }; + case 1: + return { + title: `Navigation thread merge ${pass}`, + detail: `Merged sidebar counters, unread markers, and project summaries for pass ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=navigation", + `--batch=${pass}`, + "--step=merge", + "--touch=sidebar,unread", + ], + files, + }; + default: + return { + title: `Navigation thread settle ${pass}`, + detail: `Settled thread-list focus state and background summaries for navigation pass ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=navigation", + `--batch=${pass}`, + "--step=settle", + "--touch=focus,summary", + ], + files, + }; + } + } + + const files = selectDenseAssistantFiles( + [ + "apps/web/src/store.ts", + "apps/web/src/components/chat/MessagesTimeline.tsx", + "apps/web/src/components/ChatView.tsx", + "apps/server/src/orchestration/Layers/ProjectionPipeline.ts", + "apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts", + "packages/shared/src/perf/scenarioCatalog.ts", + "apps/web/src/components/sidebar/ThreadList.tsx", + "apps/server/src/wsServer.ts", + ], + cycleIndex, + toolIndex, + 4, + ); + + switch (toolIndex) { + case 0: + return { + title: `Filler thread compact ${pass}`, + detail: `Compacted deferred background projections and idle thread state for pass ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=filler", + `--batch=${pass}`, + "--step=compact", + "--touch=background,projection", + ], + files, + }; + case 1: + return { + title: `Filler thread refresh ${pass}`, + detail: `Refreshed background file tree badges and hidden-thread summaries for pass ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=filler", + `--batch=${pass}`, + "--step=refresh", + "--touch=file-tree,badges", + ], + files, + }; + default: + return { + title: `Filler thread drain ${pass}`, + detail: `Drained deferred background churn without stealing focus from the visible thread on pass ${pass}.`, + command: [ + "bun", + "x", + "perf-loop", + "--lane=filler", + `--batch=${pass}`, + "--step=drain", + "--touch=background,queue", + ], + files, + }; + } +} + +function buildDenseAssistantToolPayload( + toolSpec: DenseAssistantToolSpec, + cycleIndex: number, + toolIndex: number, +) { + const files = toolSpec.files.map((filePath, fileIndex) => ({ + path: filePath, + status: + (fileIndex + cycleIndex + toolIndex) % 4 === 0 + ? "modified" + : (fileIndex + cycleIndex + toolIndex) % 4 === 1 + ? "added" + : (fileIndex + cycleIndex + toolIndex) % 4 === 2 + ? "deleted" + : "renamed", + additions: 8 + fileIndex * 3 + toolIndex * 2, + deletions: 2 + fileIndex + (cycleIndex % 3), + })); + + return { + command: toolSpec.command, + item: { + command: toolSpec.command, + input: { + command: toolSpec.command, + }, + result: { + command: toolSpec.command, + exitCode: 0, + files, + }, + }, + files, + operations: files.map((file) => ({ + type: file.status, + path: file.path, + })), + }; +} + +function buildLaneScope(lane: DenseAssistantStreamLane) { + return { + ...(lane.threadId ? { threadId: lane.threadId } : {}), + ...(lane.turnId ? { turnId: lane.turnId } : {}), + } as const; +} + +function buildDenseAssistantStreamScenario(): PerfProviderScenario { + const events: TimedFixtureProviderRuntimeEvent[] = []; + const cycleCount = DENSE_ASSISTANT_STREAM_CYCLE_COUNT; + const finalCycleStartMs = DENSE_ASSISTANT_STREAM_CYCLE_INTERVAL_MS * (cycleCount - 1); + let maxCycleEventOffsetMs = 0; + + DENSE_ASSISTANT_STREAM_LANES.forEach((lane, laneIndex) => { + events.push({ + delayMs: laneIndex, + ...buildLaneScope(lane), + type: "turn.started", + payload: { + model: DEFAULT_MODEL_BY_PROVIDER.codex, + }, + }); + }); + + for (let cycleIndex = 0; cycleIndex < cycleCount; cycleIndex += 1) { + const cycleOffsetMs = cycleIndex * DENSE_ASSISTANT_STREAM_CYCLE_INTERVAL_MS; + + DENSE_ASSISTANT_STREAM_LANES.forEach((lane, laneIndex) => { + const laneScope = buildLaneScope(lane); + const laneOffsetMs = laneIndex * DENSE_ASSISTANT_STREAM_LANE_STAGGER_MS; + const introItemId = makeLiveAssistantItemId(lane.key, cycleIndex, "intro"); + const followupItemId = makeLiveAssistantItemId(lane.key, cycleIndex, "followup"); + const introSegment = buildDenseAssistantIntroSegment(lane.key, cycleIndex); + const followupSegment = buildDenseAssistantFollowupSegment(lane.key, cycleIndex, cycleCount); + let laneEventOffsetMs = 4; + + for (const fragment of introSegment) { + events.push({ + delayMs: cycleOffsetMs + laneOffsetMs + laneEventOffsetMs, + ...laneScope, + type: "content.delta", + itemId: introItemId, + payload: { + streamKind: "assistant_text", + delta: fragment, + }, + }); + laneEventOffsetMs += DENSE_ASSISTANT_STREAM_MESSAGE_FRAGMENT_GAP_MS; + } + + events.push({ + delayMs: cycleOffsetMs + laneOffsetMs + laneEventOffsetMs, + ...laneScope, + type: "item.completed", + itemId: introItemId, + payload: { + itemType: "assistant_message", + status: "completed", + detail: joinDenseAssistantSegment(introSegment), + }, + }); + laneEventOffsetMs += DENSE_ASSISTANT_STREAM_MESSAGE_COMPLETION_GAP_MS; + + for ( + let toolIndex = 0; + toolIndex < DENSE_ASSISTANT_STREAM_WORKLOG_ITEMS_PER_CYCLE; + toolIndex += 1 + ) { + const toolItemId = `perf-command-${lane.key}-${cycleIndex.toString().padStart(3, "0")}-${toolIndex.toString().padStart(2, "0")}`; + const toolSpec = buildDenseAssistantToolSpec(lane.key, cycleIndex, toolIndex); + const toolPayload = buildDenseAssistantToolPayload(toolSpec, cycleIndex, toolIndex); + + events.push({ + delayMs: cycleOffsetMs + laneOffsetMs + laneEventOffsetMs, + ...laneScope, + type: "item.started", + itemId: toolItemId, + payload: { + itemType: "command_execution", + title: toolSpec.title, + detail: toolSpec.detail, + }, + }); + laneEventOffsetMs += DENSE_ASSISTANT_STREAM_WORKLOG_STARTED_GAP_MS; + + events.push({ + delayMs: cycleOffsetMs + laneOffsetMs + laneEventOffsetMs, + ...laneScope, + type: "item.updated", + itemId: toolItemId, + payload: { + itemType: "command_execution", + status: "inProgress", + title: toolSpec.title, + detail: toolSpec.detail, + data: toolPayload, + }, + }); + laneEventOffsetMs += DENSE_ASSISTANT_STREAM_WORKLOG_UPDATED_GAP_MS; + + events.push({ + delayMs: cycleOffsetMs + laneOffsetMs + laneEventOffsetMs, + ...laneScope, + type: "item.completed", + itemId: toolItemId, + payload: { + itemType: "command_execution", + status: "completed", + title: toolSpec.title, + detail: toolSpec.detail, + data: toolPayload, + }, + }); + laneEventOffsetMs += DENSE_ASSISTANT_STREAM_WORKLOG_COMPLETED_GAP_MS; + laneEventOffsetMs += DENSE_ASSISTANT_STREAM_WORKLOG_GROUP_GAP_MS; + } + + for (const fragment of followupSegment) { + events.push({ + delayMs: cycleOffsetMs + laneOffsetMs + laneEventOffsetMs, + ...laneScope, + type: "content.delta", + itemId: followupItemId, + payload: { + streamKind: "assistant_text", + delta: fragment, + }, + }); + laneEventOffsetMs += DENSE_ASSISTANT_STREAM_MESSAGE_FRAGMENT_GAP_MS; + } + + const followupCompletionDelayMs = cycleOffsetMs + laneOffsetMs + laneEventOffsetMs; + events.push({ + delayMs: followupCompletionDelayMs, + ...laneScope, + type: "item.completed", + itemId: followupItemId, + payload: { + itemType: "assistant_message", + status: "completed", + detail: joinDenseAssistantSegment(followupSegment), + }, + }); + maxCycleEventOffsetMs = Math.max(maxCycleEventOffsetMs, laneOffsetMs + laneEventOffsetMs); + }); + } + + const finalLaneRunStartMs = finalCycleStartMs + maxCycleEventOffsetMs; + const totalDurationMs = + finalLaneRunStartMs + + (DENSE_ASSISTANT_STREAM_LANES.length - 1) * DENSE_ASSISTANT_STREAM_TURN_COMPLETION_GAP_MS; + + DENSE_ASSISTANT_STREAM_LANES.forEach((lane, laneIndex) => { + events.push({ + delayMs: finalLaneRunStartMs + laneIndex * DENSE_ASSISTANT_STREAM_TURN_COMPLETION_GAP_MS, + ...buildLaneScope(lane), + type: "turn.completed", + payload: { + state: "completed", + }, + }); + }); + + return { + id: "dense_assistant_stream", + provider: "codex", + sentinelText: DENSE_ASSISTANT_STREAM_SENTINEL, + totalDurationMs, + events, + }; +} + +function buildParallelAssistantStreamScenario(): PerfProviderScenario { + const events: Array = [ + { + delayMs: 0, + type: "turn.started", + payload: { + model: "gpt-5.4", + }, + }, + ]; + + let delayMs = 0; + for ( + let fragmentIndex = 0; + fragmentIndex < PARALLEL_ASSISTANT_STREAM_FRAGMENT_COUNT; + fragmentIndex += 1 + ) { + delayMs += PARALLEL_ASSISTANT_STREAM_FRAGMENT_GAP_MS; + const isFinalFragment = fragmentIndex === PARALLEL_ASSISTANT_STREAM_FRAGMENT_COUNT - 1; + const cycleLabel = fragmentIndex.toString().padStart(3, "0"); + const delta = isFinalFragment + ? `parallel-cycle-${cycleLabel} ${PARALLEL_ASSISTANT_STREAM_SENTINEL}` + : `parallel-cycle-${cycleLabel} keeping provider ingestion and projection busy. `; + + events.push({ + delayMs, + type: "content.delta", + payload: { + streamKind: "assistant_text", + delta, + }, + }); + } + + delayMs += PARALLEL_ASSISTANT_STREAM_COMPLETION_GAP_MS; + events.push({ + delayMs, + type: "turn.completed", + payload: { + state: "completed", + }, + }); + + return { + id: "parallel_assistant_stream", + provider: "codex", + sentinelText: PARALLEL_ASSISTANT_STREAM_SENTINEL, + totalDurationMs: delayMs, + events, + }; +} + +export const PERF_PROVIDER_SCENARIOS = { + dense_assistant_stream: buildDenseAssistantStreamScenario(), + parallel_assistant_stream: buildParallelAssistantStreamScenario(), +} as const satisfies Record; + +export const PERF_CATALOG_IDS = { + projectId: PERF_PROJECTS.inbox.id, + largeThreads: { + heavyAThreadId: LARGE_THREAD_DEFINITIONS.heavyA.id, + heavyBThreadId: LARGE_THREAD_DEFINITIONS.heavyB.id, + heavyAProjectId: LARGE_THREAD_DEFINITIONS.heavyA.projectId, + heavyBProjectId: LARGE_THREAD_DEFINITIONS.heavyB.projectId, + heavyAProjectTitle: PERF_PROJECTS.inbox.title, + heavyBProjectTitle: PERF_PROJECTS.desktop.title, + heavyAAnchorMessageId: LARGE_THREAD_DEFINITIONS.heavyA.anchorMessageId, + heavyBAnchorMessageId: LARGE_THREAD_DEFINITIONS.heavyB.anchorMessageId, + heavyATerminalMessageId: LARGE_THREAD_DEFINITIONS.heavyA.terminalMessageId, + heavyBTerminalMessageId: LARGE_THREAD_DEFINITIONS.heavyB.terminalMessageId, + }, + burstBase: { + burstProjectId: PERF_PROJECTS.burstBase.id, + burstProjectTitle: PERF_PROJECTS.burstBase.title, + burstThreadId: BURST_BASE_THREAD_DEFINITIONS.burst.id, + burstAnchorMessageId: BURST_BASE_THREAD_DEFINITIONS.burst.anchorMessageId, + burstTerminalMessageId: BURST_BASE_THREAD_DEFINITIONS.burst.terminalMessageId, + navigationThreadId: BURST_NAVIGATION_THREAD.id, + navigationAnchorMessageId: BURST_NAVIGATION_THREAD.anchorMessageId, + navigationTerminalMessageId: BURST_NAVIGATION_THREAD.terminalMessageId, + fillerThreadId: BURST_FILLER_THREAD.id, + }, + provider: { + denseAssistantStreamSentinel: DENSE_ASSISTANT_STREAM_SENTINEL, + parallelAssistantStreamSentinel: PARALLEL_ASSISTANT_STREAM_SENTINEL, + navigationLiveTurnId: PERF_PROVIDER_LIVE_TURNS.navigation, + fillerLiveTurnId: PERF_PROVIDER_LIVE_TURNS.filler, + navigationLiveAssistantMessageId: makeLiveAssistantMessageId( + makeLiveAssistantItemId("navigation", 1, "followup"), + ), + burstLiveAssistantMessageId: makeLiveAssistantMessageId( + makeLiveAssistantItemId("burst", 2, "intro"), + ), + fillerLiveAssistantMessageId: makeLiveAssistantMessageId( + makeLiveAssistantItemId("filler", 1, "followup"), + ), + }, +} as const; + +export function getPerfSeedScenario(scenarioId: PerfSeedScenarioId): PerfSeedScenario { + return PERF_SEED_SCENARIOS[scenarioId]; +} + +export function getPerfProviderScenario(scenarioId: PerfProviderScenarioId): PerfProviderScenario { + return PERF_PROVIDER_SCENARIOS[scenarioId]; +} + +export function perfTurnIdForThread(thread: PerfSeedThreadScenario, turnIndex: number): TurnId { + const threadSlug = threadSlugFromId(thread.id); + return makeTurnId(threadSlug, turnIndex); +} + +export function perfMessageIdForThread( + thread: PerfSeedThreadScenario, + role: "user" | "assistant", + turnIndex: number, + messageIndex = 1, +): MessageId { + const threadSlug = threadSlugFromId(thread.id); + return makeMessageId(threadSlug, role, turnIndex, messageIndex); +} + +export function perfEventId(prefix: string, threadId: ThreadId, index: number) { + return EventId.makeUnsafe(`${prefix}:${threadId}:${index.toString().padStart(4, "0")}`); +} diff --git a/scripts/open-perf-app.ts b/scripts/open-perf-app.ts new file mode 100644 index 0000000000..ddb77629ce --- /dev/null +++ b/scripts/open-perf-app.ts @@ -0,0 +1,402 @@ +import { spawn, type ChildProcess } from "node:child_process"; +import { once } from "node:events"; +import { access, rm } from "node:fs/promises"; +import { createServer } from "node:net"; +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +const repoRoot = fileURLToPath(new URL("../", import.meta.url)); +const serverBinPath = resolve(repoRoot, "apps/server/dist/bin.mjs"); +const serverClientIndexPath = resolve(repoRoot, "apps/server/dist/client/index.html"); +const PERF_PROVIDER_ENV = "T3CODE_PERF_PROVIDER"; +const PERF_SCENARIO_ENV = "T3CODE_PERF_SCENARIO"; +const PERF_SEED_JSON_START = "__T3_PERF_SEED_JSON_START__"; +const PERF_SEED_JSON_END = "__T3_PERF_SEED_JSON_END__"; + +type PerfSeedScenarioId = "large_threads" | "burst_base"; +type PerfProviderScenarioId = "dense_assistant_stream"; + +interface PerfSeedThreadSummary { + readonly id: string; + readonly projectId: string; + readonly projectTitle: string | null; + readonly title: string; + readonly turnCount: number | null; + readonly messageCount: number; + readonly activityCount: number; + readonly proposedPlanCount: number; + readonly checkpointCount: number; +} + +interface PerfSeedProjectSummary { + readonly id: string; + readonly title: string; + readonly workspaceRoot: string; + readonly threadCount: number; +} + +interface PerfSeededState { + readonly scenarioId: PerfSeedScenarioId; + readonly runParentDir: string; + readonly baseDir: string; + readonly workspaceRoot: string; + readonly projectTitle: string | null; + readonly projectSummaries: ReadonlyArray; + readonly threadSummaries: ReadonlyArray; +} + +interface CliOptions { + readonly scenarioId: PerfSeedScenarioId; + readonly providerScenarioId: PerfProviderScenarioId | null; + readonly host: string; + readonly port: number; + readonly openBrowser: boolean; +} + +function printHelp(): void { + process.stdout.write( + [ + "Usage: bun run perf:open -- [options]", + "", + "Options:", + " --scenario Seed scenario to launch (default: large_threads)", + " --provider Enable perf provider burst mode", + " --host Host to bind (default: 127.0.0.1)", + " --port Port to bind (default: random free port)", + " --open Open the URL in your default browser", + " --help Show this help", + "", + "Examples:", + " bun run perf:open -- --scenario large_threads --open", + " bun run perf:open -- --scenario burst_base --provider dense_assistant_stream --open", + "", + "Notes:", + " - This launches the built app, not Vite dev mode.", + " - Build artifacts must already exist. Run `bun run test:perf:web` once, or build `@t3tools/web` and `t3` manually.", + " - With `--provider dense_assistant_stream`, open the burst thread and send one message to trigger the live multi-thread websocket burst.", + "", + ].join("\n"), + ); +} + +function parseArgs(argv: ReadonlyArray): CliOptions { + let scenarioId: PerfSeedScenarioId = "large_threads"; + let providerScenarioId: PerfProviderScenarioId | null = null; + let host = "127.0.0.1"; + let port = 0; + let openBrowser = false; + + for (let index = 0; index < argv.length; index += 1) { + const argument = argv[index]; + switch (argument) { + case "--scenario": { + const next = argv[index + 1]; + if (next !== "large_threads" && next !== "burst_base") { + throw new Error( + `Expected a valid perf seed scenario after --scenario, received '${next ?? ""}'.`, + ); + } + scenarioId = next; + index += 1; + break; + } + case "--provider": { + const next = argv[index + 1]; + if (next !== "dense_assistant_stream") { + throw new Error( + `Expected a valid perf provider scenario after --provider, received '${next ?? ""}'.`, + ); + } + providerScenarioId = next; + index += 1; + break; + } + case "--host": { + const next = argv[index + 1]; + if (!next) { + throw new Error("Expected a host value after --host."); + } + host = next; + index += 1; + break; + } + case "--port": { + const next = argv[index + 1]; + const parsed = next ? Number.parseInt(next, 10) : Number.NaN; + if (!Number.isInteger(parsed) || parsed < 0 || parsed > 65535) { + throw new Error(`Expected a valid port after --port, received '${next ?? ""}'.`); + } + port = parsed; + index += 1; + break; + } + case "--open": + openBrowser = true; + break; + case "--help": + case "-h": + printHelp(); + process.exit(0); + default: + throw new Error(`Unknown argument '${argument}'. Use --help for usage.`); + } + } + + return { + scenarioId, + providerScenarioId, + host, + port, + openBrowser, + }; +} + +async function pickFreePort(): Promise { + return await new Promise((resolvePort, reject) => { + const server = createServer(); + server.on("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + reject(new Error("Unable to resolve a free localhost port.")); + return; + } + server.close((closeError) => { + if (closeError) { + reject(closeError); + return; + } + resolvePort(address.port); + }); + }); + }); +} + +async function verifyBuiltArtifacts(): Promise { + await Promise.all([access(serverBinPath), access(serverClientIndexPath)]).catch(() => { + throw new Error( + `Built perf artifacts are missing. Expected ${serverBinPath} and ${serverClientIndexPath}. Run bun run test:perf:web or build the app first.`, + ); + }); +} + +function parsePerfSeededState(stdout: string): PerfSeededState { + const startIndex = stdout.lastIndexOf(PERF_SEED_JSON_START); + const endIndex = stdout.lastIndexOf(PERF_SEED_JSON_END); + + if (startIndex === -1 || endIndex === -1 || endIndex <= startIndex) { + throw new Error(`Perf seed command did not emit the expected JSON markers.\n${stdout}`); + } + + const payload = stdout.slice(startIndex + PERF_SEED_JSON_START.length, endIndex).trim(); + return JSON.parse(payload) as PerfSeededState; +} + +async function seedPerfState(scenarioId: PerfSeedScenarioId): Promise { + const seedProcess = spawn("bun", ["run", "apps/server/scripts/seedPerfState.ts", scenarioId], { + cwd: repoRoot, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + seedProcess.stdout?.on("data", (chunk) => { + stdout += chunk.toString(); + }); + seedProcess.stderr?.on("data", (chunk) => { + stderr += chunk.toString(); + }); + + const [exitCode] = (await once(seedProcess, "exit")) as [number | null]; + if (exitCode !== 0) { + throw new Error(`Perf seed command failed with code ${exitCode ?? "unknown"}.\n${stderr}`); + } + + return parsePerfSeededState(stdout); +} + +async function waitForServerReady(url: string, process: ChildProcess): Promise { + const startedAt = Date.now(); + const timeoutMs = 45_000; + const requestTimeoutMs = 1_000; + + while (Date.now() - startedAt < timeoutMs) { + if (process.exitCode !== null) { + throw new Error(`Perf server exited early with code ${process.exitCode}.`); + } + try { + const response = await fetch(url, { + redirect: "manual", + signal: AbortSignal.timeout(requestTimeoutMs), + }); + if (response.ok) { + return; + } + } catch { + // Ignore connection races during startup. + } + await new Promise((resolveDelay) => setTimeout(resolveDelay, 200)); + } + + throw new Error(`Timed out waiting for perf server readiness at ${url}.`); +} + +async function stopChildProcess(process: ChildProcess): Promise { + if (process.exitCode !== null) { + return; + } + + process.kill("SIGTERM"); + const exited = await new Promise((resolveExited) => { + const timer = setTimeout(() => resolveExited(false), 5_000); + process.once("exit", () => { + clearTimeout(timer); + resolveExited(true); + }); + }); + + if (!exited && process.exitCode === null) { + process.kill("SIGKILL"); + await new Promise((resolveExited) => { + process.once("exit", () => resolveExited()); + }); + } +} + +async function cleanupPerfRunDir(runParentDir: string): Promise { + await rm(runParentDir, { recursive: true, force: true }); +} + +function openUrl(url: string): void { + const command: [string, ...string[]] = + process.platform === "darwin" + ? ["open", url] + : process.platform === "win32" + ? ["cmd", "/c", "start", "", url] + : ["xdg-open", url]; + const child = spawn(command[0], command.slice(1), { + detached: true, + stdio: "ignore", + }); + child.unref(); +} + +function printSeedSummary( + seededState: PerfSeededState, + url: string, + providerScenarioId: string | null, +): void { + process.stdout.write(`\nPerf app ready at ${url}\n`); + process.stdout.write(`Scenario: ${seededState.scenarioId}\n`); + process.stdout.write(`Base dir: ${seededState.baseDir}\n`); + process.stdout.write(`Primary workspace: ${seededState.workspaceRoot}\n`); + process.stdout.write("Projects:\n"); + for (const project of seededState.projectSummaries) { + process.stdout.write( + ` - ${project.title} (${project.id}): ${project.threadCount} threads, ${project.workspaceRoot}\n`, + ); + } + + process.stdout.write("Threads:\n"); + for (const thread of seededState.threadSummaries.toSorted( + (left, right) => + right.messageCount - left.messageCount || left.title.localeCompare(right.title), + )) { + process.stdout.write( + ` - ${thread.projectTitle ?? ""} / ${thread.title} (${thread.id}): ${thread.turnCount ?? "?"} turns, ${thread.messageCount} messages, ${thread.activityCount} worklog rows, ${thread.proposedPlanCount} plans, ${thread.checkpointCount} checkpoints\n`, + ); + } + + if (providerScenarioId !== null) { + process.stdout.write("\nLive burst mode is enabled.\n"); + process.stdout.write( + "Open the burst thread and send one message to trigger the multi-thread websocket burst.\n", + ); + } + + process.stdout.write("\nPress Ctrl+C to stop the server.\n\n"); +} + +async function main(): Promise { + const options = parseArgs(process.argv.slice(2)); + await verifyBuiltArtifacts(); + const seededState = await seedPerfState(options.scenarioId); + const port = options.port === 0 ? await pickFreePort() : options.port; + + const serverProcess = spawn( + "node", + [ + serverBinPath, + "--mode", + "web", + "--host", + options.host, + "--port", + port.toString(), + "--base-dir", + seededState.baseDir, + "--no-browser", + ], + { + cwd: repoRoot, + env: { + ...process.env, + ...(options.providerScenarioId + ? { + [PERF_PROVIDER_ENV]: "1", + [PERF_SCENARIO_ENV]: options.providerScenarioId, + } + : {}), + }, + stdio: ["ignore", "inherit", "inherit"], + }, + ); + + let shuttingDown = false; + const shutdown = async (signal: NodeJS.Signals) => { + if (shuttingDown) { + return; + } + shuttingDown = true; + process.stdout.write(`\nReceived ${signal}. Stopping perf app...\n`); + await stopChildProcess(serverProcess); + await cleanupPerfRunDir(seededState.runParentDir); + process.exit(0); + }; + + process.on("SIGINT", () => { + void shutdown("SIGINT"); + }); + process.on("SIGTERM", () => { + void shutdown("SIGTERM"); + }); + + try { + const url = `http://${options.host}:${port.toString()}`; + await waitForServerReady(url, serverProcess); + printSeedSummary(seededState, url, options.providerScenarioId); + + if (options.openBrowser) { + openUrl(url); + } + + const [exitCode] = (await once(serverProcess, "exit")) as [number | null]; + if (!shuttingDown) { + await cleanupPerfRunDir(seededState.runParentDir); + process.exit(exitCode ?? 0); + } + } catch (error) { + await stopChildProcess(serverProcess); + await cleanupPerfRunDir(seededState.runParentDir); + throw error; + } +} + +try { + await main(); +} catch (error) { + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`${message}\n`); + process.exit(1); +} diff --git a/test/perf/support/artifact.ts b/test/perf/support/artifact.ts new file mode 100644 index 0000000000..6e821044e0 --- /dev/null +++ b/test/perf/support/artifact.ts @@ -0,0 +1,119 @@ +import { + percentile, + summarizeLatencySamples, + summarizeLatencyValues, + type PerfLatencySample, + type PerfLatencySummary, + writeJsonArtifact, +} from "../../../packages/shared/src/perf/artifact"; +import type { PerfThresholdProfile } from "./thresholds.ts"; + +export interface PerfActionDuration { + readonly name: string; + readonly durationMs: number; + readonly startedAtMs: number; + readonly endedAtMs: number; +} + +export interface PerfLongTaskSample { + readonly startTimeMs: number; + readonly durationMs: number; + readonly name: string; +} + +export interface PerfMountedRowSample { + readonly label: string; + readonly count: number; + readonly capturedAtMs: number; +} + +export interface BrowserPerfMetrics { + readonly actions: ReadonlyArray; + readonly longTasks: ReadonlyArray; + readonly rafGapsMs: ReadonlyArray; + readonly mountedRowSamples: ReadonlyArray; +} + +export interface PerfServerMetricSample { + readonly capturedAt: string; + readonly cpuUserMicros?: number; + readonly cpuSystemMicros?: number; + readonly rssBytes?: number; + readonly heapTotalBytes?: number; + readonly heapUsedBytes?: number; + readonly externalBytes?: number; + readonly arrayBuffersBytes?: number; +} + +export interface PerfArtifactSummary { + readonly maxMountedTimelineRows: number; + readonly threadSwitchP50Ms: number | null; + readonly threadSwitchP95Ms: number | null; + readonly maxLongTaskMs: number; + readonly longTasksOver50Ms: number; + readonly maxRafGapMs: number; + readonly burstCompletionMs: number | null; +} + +export type { PerfLatencySample, PerfLatencySummary }; +export { percentile, summarizeLatencySamples, summarizeLatencyValues, writeJsonArtifact }; + +export interface PerfRunArtifact { + readonly suite: string; + readonly scenarioId: string; + readonly startedAt: string; + readonly completedAt: string; + readonly thresholds: PerfThresholdProfile; + readonly summary: PerfArtifactSummary; + readonly browserMetrics: BrowserPerfMetrics; + readonly serverMetrics: ReadonlyArray | null; + readonly metadata?: Record; +} + +export function summarizeBrowserPerfMetrics( + browserMetrics: BrowserPerfMetrics, + options?: { + readonly threadSwitchActionPrefix?: string; + readonly burstActionName?: string; + }, +): PerfArtifactSummary { + const threadSwitchActions = browserMetrics.actions + .filter((action) => + options?.threadSwitchActionPrefix + ? action.name.startsWith(options.threadSwitchActionPrefix) + : action.name.startsWith("thread-switch"), + ) + .map((action) => action.durationMs); + const burstCompletionAction = browserMetrics.actions.find( + (action) => action.name === (options?.burstActionName ?? "burst-completion"), + ); + const maxMountedTimelineRows = browserMetrics.mountedRowSamples.reduce( + (maxCount, sample) => Math.max(maxCount, sample.count), + 0, + ); + const maxLongTaskMs = browserMetrics.longTasks.reduce( + (maxDuration, sample) => Math.max(maxDuration, sample.durationMs), + 0, + ); + const longTasksOver50Ms = browserMetrics.longTasks.filter( + (sample) => sample.durationMs > 50, + ).length; + const maxRafGapMs = browserMetrics.rafGapsMs.reduce((maxGap, gap) => Math.max(maxGap, gap), 0); + + return { + maxMountedTimelineRows, + threadSwitchP50Ms: percentile(threadSwitchActions, 0.5), + threadSwitchP95Ms: percentile(threadSwitchActions, 0.95), + maxLongTaskMs, + longTasksOver50Ms, + maxRafGapMs, + burstCompletionMs: burstCompletionAction?.durationMs ?? null, + }; +} + +export async function writePerfArtifact( + outputPath: string, + artifact: PerfRunArtifact, +): Promise { + await writeJsonArtifact(outputPath, artifact); +} diff --git a/test/perf/support/browserMetrics.ts b/test/perf/support/browserMetrics.ts new file mode 100644 index 0000000000..e309c95ced --- /dev/null +++ b/test/perf/support/browserMetrics.ts @@ -0,0 +1,111 @@ +import type { BrowserPerfMetrics } from "./artifact.ts"; + +export const PERF_BROWSER_GLOBAL = "__t3PerfCollector"; +const DEFAULT_TIMELINE_ROW_SELECTOR = "[data-timeline-row-kind]"; + +export interface BrowserPerfCollector { + readonly startAction: (name: string) => void; + readonly endAction: (name: string) => number | null; + readonly sampleMountedRows: (label: string) => number; + readonly snapshot: () => BrowserPerfMetrics; + readonly reset: () => void; +} + +declare global { + interface Window { + __t3PerfCollector?: BrowserPerfCollector; + } +} + +export function installBrowserPerfCollector( + timelineRowSelector = DEFAULT_TIMELINE_ROW_SELECTOR, +): void { + if (typeof window === "undefined" || window.__t3PerfCollector) { + return; + } + + const actionStarts = new Map(); + const actions: Array = []; + const longTasks: Array = []; + const rafGapsMs: number[] = []; + const mountedRowSamples: Array = []; + let previousAnimationFrameTs = 0; + let rafHandle = 0; + + const animationFrameLoop = (timestampMs: number) => { + if (previousAnimationFrameTs > 0) { + rafGapsMs.push(timestampMs - previousAnimationFrameTs); + } + previousAnimationFrameTs = timestampMs; + rafHandle = window.requestAnimationFrame(animationFrameLoop); + }; + rafHandle = window.requestAnimationFrame(animationFrameLoop); + + if (typeof PerformanceObserver !== "undefined") { + try { + const longTaskObserver = new PerformanceObserver((list: PerformanceObserverEntryList) => { + for (const entry of list.getEntries()) { + longTasks.push({ + name: entry.name, + startTimeMs: entry.startTime, + durationMs: entry.duration, + }); + } + }); + longTaskObserver.observe({ entryTypes: ["longtask"] } as PerformanceObserverInit); + } catch { + // Ignore browsers or runtimes that do not expose long-task observation. + } + } + + const readMountedRows = () => document.querySelectorAll(timelineRowSelector).length; + + window.__t3PerfCollector = { + startAction(name: string) { + actionStarts.set(name, performance.now()); + }, + endAction(name: string) { + const startedAtMs = actionStarts.get(name); + if (startedAtMs === undefined) { + return null; + } + const endedAtMs = performance.now(); + const durationMs = endedAtMs - startedAtMs; + actions.push({ + name, + durationMs, + startedAtMs, + endedAtMs, + }); + actionStarts.delete(name); + return durationMs; + }, + sampleMountedRows(label: string) { + const count = readMountedRows(); + mountedRowSamples.push({ + label, + count, + capturedAtMs: performance.now(), + }); + return count; + }, + snapshot() { + return { + actions: [...actions], + longTasks: [...longTasks], + rafGapsMs: [...rafGapsMs], + mountedRowSamples: [...mountedRowSamples], + }; + }, + reset() { + actionStarts.clear(); + actions.length = 0; + longTasks.length = 0; + rafGapsMs.length = 0; + mountedRowSamples.length = 0; + previousAnimationFrameTs = 0; + window.cancelAnimationFrame(rafHandle); + rafHandle = window.requestAnimationFrame(animationFrameLoop); + }, + }; +} diff --git a/test/perf/support/scenarioCatalog.ts b/test/perf/support/scenarioCatalog.ts new file mode 100644 index 0000000000..8e237e92e2 --- /dev/null +++ b/test/perf/support/scenarioCatalog.ts @@ -0,0 +1 @@ +export * from "../../../packages/shared/src/perf/scenarioCatalog"; diff --git a/test/perf/support/serverSampler.ts b/test/perf/support/serverSampler.ts new file mode 100644 index 0000000000..265b10061e --- /dev/null +++ b/test/perf/support/serverSampler.ts @@ -0,0 +1,14 @@ +import type { PerfServerMetricSample } from "./artifact.ts"; + +export interface PerfServerSampler { + readonly start: (input: { readonly pid: number }) => Promise; + readonly stop: () => Promise | null>; +} + +export class NoopServerSampler implements PerfServerSampler { + async start(_input: { readonly pid: number }): Promise {} + + async stop(): Promise { + return null; + } +} diff --git a/test/perf/support/thresholds.ts b/test/perf/support/thresholds.ts new file mode 100644 index 0000000000..173f9c66b4 --- /dev/null +++ b/test/perf/support/thresholds.ts @@ -0,0 +1,23 @@ +export interface PerfThresholdProfile { + readonly maxMountedTimelineRows: number; + readonly threadSwitchP50Ms: number; + readonly threadSwitchP95Ms: number; + readonly maxLongTaskMs: number; + readonly maxRafGapMs: number; + readonly burstCompletionMs: number; + readonly longTasksOver50MsMax: number; +} + +export const PERF_THRESHOLDS = { + local: { + maxMountedTimelineRows: 140, + threadSwitchP50Ms: 250, + threadSwitchP95Ms: 500, + maxLongTaskMs: 120, + maxRafGapMs: 120, + burstCompletionMs: 14_000, + longTasksOver50MsMax: 2, + }, +} as const satisfies Record; + +export type PerfThresholdProfileName = keyof typeof PERF_THRESHOLDS;