diff --git a/js/dev/server.test.ts b/js/dev/server.test.ts new file mode 100644 index 000000000..e7b4d8e3e --- /dev/null +++ b/js/dev/server.test.ts @@ -0,0 +1,78 @@ +import { describe, expect, test, vi } from "vitest"; +import { type BraintrustState } from "../src/logger"; +import { _exportsForTestingOnly } from "./server"; + +describe("run eval dataset selector helpers", () => { + const state = {} as BraintrustState; + + test("maps project dataset refs into initDataset args", async () => { + await expect( + _exportsForTestingOnly.buildRunEvalDatasetInitArgs(state, { + project_name: "test-project", + dataset_name: "test-dataset", + dataset_environment: "production", + _internal_btql: { limit: 10 }, + }), + ).resolves.toEqual({ + state, + project: "test-project", + dataset: "test-dataset", + environment: "production", + _internal_btql: { limit: 10 }, + }); + }); + + test("maps dataset id refs into initDataset args", async () => { + const lookupDatasetById = vi.fn().mockResolvedValue({ + projectId: "project-id-123", + dataset: "resolved-dataset", + }); + + await expect( + _exportsForTestingOnly.buildRunEvalDatasetInitArgs( + state, + { + dataset_id: "dataset-id-123", + dataset_snapshot_name: "release-candidate", + }, + lookupDatasetById, + ), + ).resolves.toEqual({ + state, + projectId: "project-id-123", + dataset: "resolved-dataset", + snapshotName: "release-candidate", + }); + expect(lookupDatasetById).toHaveBeenCalledWith({ + state, + datasetId: "dataset-id-123", + }); + }); + + test("prefers dataset_version over other dataset selectors", () => { + expect( + _exportsForTestingOnly.getRunEvalDatasetSelector({ + project_name: "test-project", + dataset_name: "test-dataset", + dataset_version: "123", + dataset_snapshot_name: "release-candidate", + dataset_environment: "production", + }), + ).toEqual({ + version: "123", + }); + }); + + test("prefers dataset_snapshot_name over dataset_environment", () => { + expect( + _exportsForTestingOnly.getRunEvalDatasetSelector({ + project_name: "test-project", + dataset_name: "test-dataset", + dataset_snapshot_name: "release-candidate", + dataset_environment: "production", + }), + ).toEqual({ + snapshotName: "release-candidate", + }); + }); +}); diff --git a/js/dev/server.ts b/js/dev/server.ts index aee357ad5..186985c54 100644 --- a/js/dev/server.ts +++ b/js/dev/server.ts @@ -305,32 +305,102 @@ const asyncHandler = Promise.resolve(fn(req, res, next)).catch(next); }; -async function getDataset( +type RunEvalDatasetSelector = + | { + version: string; + environment?: never; + snapshotName?: never; + } + | { + version?: never; + environment: string; + snapshotName?: never; + } + | { + version?: never; + environment?: never; + snapshotName: string; + } + | { + version?: never; + environment?: never; + snapshotName?: never; + }; + +type RunEvalDatasetReference = + | Extract + | Extract; + +type RunEvalDatasetInitArgs = { + state: BraintrustState; + dataset: string; + _internal_btql?: Record; +} & ( + | { project: string; projectId?: never } + | { project?: never; projectId: string } +) & + RunEvalDatasetSelector; + +function getRunEvalDatasetSelector( + data: RunEvalDatasetReference, +): RunEvalDatasetSelector { + if (data.dataset_version != null) { + return { version: data.dataset_version }; + } + if (data.dataset_snapshot_name != null) { + return { snapshotName: data.dataset_snapshot_name }; + } + if (data.dataset_environment != null) { + return { environment: data.dataset_environment }; + } + + return {}; +} + +async function buildRunEvalDatasetInitArgs( state: BraintrustState, - data: RunEvalRequest["data"], -): Promise> { + data: RunEvalDatasetReference, + lookupDatasetById: typeof getDatasetById = getDatasetById, +): Promise { + const commonArgs = { + state, + ...(data._internal_btql != null + ? { _internal_btql: data._internal_btql } + : {}), + ...getRunEvalDatasetSelector(data), + }; + if ("project_name" in data) { - return initDataset({ - state, + const args = { + ...commonArgs, project: data.project_name, dataset: data.dataset_name, - _internal_btql: data._internal_btql ?? undefined, - }); - } else if ("dataset_id" in data) { - const datasetInfo = await getDatasetById({ - state, - datasetId: data.dataset_id, - }); - return initDataset({ - state, - projectId: datasetInfo.projectId, - dataset: datasetInfo.dataset, - _internal_btql: data._internal_btql ?? undefined, - }); - } else { + } satisfies RunEvalDatasetInitArgs; + return args; + } + + const datasetInfo = await lookupDatasetById({ + state, + datasetId: data.dataset_id, + }); + const args = { + ...commonArgs, + projectId: datasetInfo.projectId, + dataset: datasetInfo.dataset, + } satisfies RunEvalDatasetInitArgs; + return args; +} + +async function getDataset( + state: BraintrustState, + data: RunEvalRequest["data"], +): Promise> { + if ("data" in data) { // eslint-disable-next-line @typescript-eslint/consistent-type-assertions return data.data as EvalCase[]; } + + return initDataset(await buildRunEvalDatasetInitArgs(state, data)); } const datasetFetchSchema = z.object({ @@ -354,6 +424,11 @@ async function getDatasetById({ return { projectId: parsed[0].project_id, dataset: parsed[0].name }; } +export const _exportsForTestingOnly = { + buildRunEvalDatasetInitArgs, + getRunEvalDatasetSelector, +}; + function makeScorer( state: BraintrustState, name: string, diff --git a/js/src/cli/index.ts b/js/src/cli/index.ts index 48a38be3f..863f19987 100755 --- a/js/src/cli/index.ts +++ b/js/src/cli/index.ts @@ -127,6 +127,7 @@ async function initExperiment( fallback: (_text: string, url: string) => url, }) : "locally"; + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error( chalk.cyan("▶") + ` Experiment ${chalk.bold(info.experimentName)} is running at ${linkText}`, @@ -219,13 +220,17 @@ function buildWatchPluginForEvaluator( name: "run-evalutator-on-end", setup(build: esbuild.PluginBuild) { build.onEnd(async (result) => { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error(`Done building ${inFile}`); if (!result.outputFiles) { if (opts.showDetailedErrors) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn(`Failed to compile ${inFile}`); + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn(result.errors); } else { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn(`Failed to compile ${inFile}: ${result.errors}`); } return; @@ -306,6 +311,7 @@ function buildWatchPluginForEvaluator( )) { const success = await reporter.reportRun(await Promise.all(results)); if (!success) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error(error(`Reporter ${reporterName} failed.`)); } } @@ -421,9 +427,12 @@ export function handleBuildFailure({ if (terminateOnFailure) { throw result.error; } else if (showDetailedErrors) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn(`Failed to compile ${result.sourceFile}`); + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn(result.error); } else { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn( `Failed to compile ${result.sourceFile}: ${result.error.message}`, ); @@ -466,6 +475,7 @@ function updateEvaluators( evaluators.reporters[reporterName] && evaluators.reporters[reporterName] !== reporter ) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn( warning( `Reporter '${reporterName}' already exists. Will skip '${reporterName}' from ${result.sourceFile}.`, @@ -486,12 +496,14 @@ async function runAndWatch({ onExit?: () => void; }) { const count = Object.keys(handles).length; + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error(`Watching ${pluralize("file", count, true)}...`); Object.values(handles).map((handle) => handle.watch()); ["SIGINT", "SIGTERM"].forEach((signal: string) => { process.on(signal, function () { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error("Stopped watching."); for (const handle of Object.values(handles)) { handle.destroy(); @@ -540,6 +552,7 @@ async function runOnce( if (opts.list) { for (const evaluator of evaluators.evaluators) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.log(evaluator.evaluator.evalName); } return true; @@ -581,6 +594,7 @@ async function runOnce( } }); + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error( chalk.dim( `Processing ${chalk.bold(resultPromises.length)} evaluator${resultPromises.length === 1 ? "" : "s"}...`, @@ -588,6 +602,7 @@ async function runOnce( ); const allEvalsResults = await Promise.all(resultPromises); opts.progressReporter.stop(); + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error(""); const evalReports: Record< @@ -685,6 +700,7 @@ async function collectFiles( try { pathStat = fs.lstatSync(inputPath); } catch (e) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error(error(`Error reading ${inputPath}: ${e}`)); process.exit(1); } @@ -699,6 +715,7 @@ async function collectFiles( ) ) { const prefix = mode === "eval" ? ".eval" : ""; + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn( warning( `Reading ${inputPath} because it was specified directly. Rename it to end in ${prefix}.ts or ` + @@ -848,6 +865,7 @@ export async function initializeHandles({ for (const inputPath of inputPaths) { const newFiles = await collectFiles(inputPath, mode); if (newFiles.length == 0) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn( warning( `Provided path ${inputPath} is not an eval file or a directory containing eval files, skipping...`, @@ -860,6 +878,7 @@ export async function initializeHandles({ } if (Object.keys(files).length == 0) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.warn( warning("No eval files were found in any of the provided paths."), ); @@ -906,6 +925,7 @@ async function run(args: RunArgs) { // Load via dotenv library const loaded = dotenv.config({ path: args.env_file }); if (loaded.error) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error(error(`Error loading ${args.env_file}: ${loaded.error}`)); process.exit(1); } @@ -930,6 +950,7 @@ async function run(args: RunArgs) { }; if (args.list && args.watch) { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. console.error(error("Cannot specify both --list and --watch.")); process.exit(1); } diff --git a/js/src/exports.ts b/js/src/exports.ts index c01c1e3cb..5cab7a8e2 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -8,6 +8,7 @@ export type { CompiledPromptParams, CompletionPrompt, ContextParentSpanIds, + DatasetSnapshot, DataSummary, DatasetSummary, DefaultMetadataType, diff --git a/js/src/generated_types.ts b/js/src/generated_types.ts index 6d0414645..f43e37634 100644 --- a/js/src/generated_types.ts +++ b/js/src/generated_types.ts @@ -1825,6 +1825,9 @@ export const RunEval = z.object({ data: z.union([ z.object({ dataset_id: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), @@ -1832,6 +1835,9 @@ export const RunEval = z.object({ z.object({ project_name: z.string(), dataset_name: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index fc4f14f7b..cf7cb9913 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -4,6 +4,7 @@ import { vi, expect, test, describe, beforeEach, afterEach } from "vitest"; import { _exportsForTestingOnly, init, + initDataset, initLogger, Prompt, BraintrustState, @@ -453,6 +454,341 @@ test("init accepts dataset with id and version", () => { expect(datasetWithVersion.version).toBe("v2"); }); +test("init accepts dataset with id and environment", () => { + const datasetWithEnvironment = { + id: "dataset-id-123", + environment: "production", + }; + + expect(datasetWithEnvironment.id).toBe("dataset-id-123"); + expect(datasetWithEnvironment.environment).toBe("production"); +}); + +test("init accepts dataset with id and snapshotName", () => { + const datasetWithSnapshot = { + id: "dataset-id-123", + snapshotName: "123", + }; + + expect(datasetWithSnapshot.id).toBe("dataset-id-123"); + expect(datasetWithSnapshot.snapshotName).toBe("123"); +}); + +test("initDataset prefers version over environment in eval data", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + environment: "production", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.toEvalData preserves dataset_environment", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.apiConn(), "get_json").mockResolvedValue({ + object_version: "123", + }); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + environment: "production", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_environment: "production", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.toEvalData preserves dataset_snapshot_name", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "get_json").mockResolvedValue([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + snapshotName: "123", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_snapshot_name: "123", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init keeps plain dataset refs attached to the experiment", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(experiment.dataset).toMatchObject({ + id: "00000000-0000-0000-0000-000000000002", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init resolves dataset environment before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const getJson = vi.spyOn(state.apiConn(), "get_json").mockResolvedValue({ + object_version: "123", + }); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(getJson).toHaveBeenCalledWith( + "environment-object/dataset/00000000-0000-0000-0000-000000000002/production", + ); + expect(experiment.dataset).toMatchObject({ + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init prefers dataset version over environment before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const getJson = vi.spyOn(state.apiConn(), "get_json"); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + version: "123", + environment: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(getJson).not.toHaveBeenCalled(); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init resolves dataset snapshots before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const appGetJson = vi.spyOn(state.appConn(), "get_json").mockResolvedValue([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + snapshotName: "123", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(appGetJson).toHaveBeenCalledWith("api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + }); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "456", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init surfaces dataset environment lookup errors instead of falling back to latest", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.apiConn(), "get_json").mockRejectedValue( + new Error("environment lookup failed"), + ); + const postJson = vi.spyOn(state.appConn(), "post_json"); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }, + setCurrent: false, + state, + }); + + await expect(experiment.id).rejects.toThrow("environment lookup failed"); + expect(postJson).not.toHaveBeenCalled(); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + describe("loader version precedence", () => { let state: BraintrustState; let getJson: ReturnType; @@ -614,6 +950,370 @@ describe("loader version precedence", () => { version: "v1", }); }); + + test("initDataset resolves env to version before fetching dataset rows", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + getJson.mockResolvedValueOnce({ + object_version: "123", + }); + const post = vi.spyOn(state.apiConn(), "post").mockResolvedValue({ + json: vi.fn().mockResolvedValue({ data: [], cursor: undefined }), + } as Response); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + environment: "production", + state, + }); + + await dataset.fetchedData(); + + expect(getJson).toHaveBeenCalledWith( + "environment-object/dataset/00000000-0000-0000-0000-000000000002/production", + ); + const requestBody = post.mock.calls[0]?.[1] as Record; + expect(requestBody).toMatchObject({ + version: "123", + query_source: "js_sdk_object_fetcher_dataset", + }); + expect(requestBody).not.toHaveProperty("env"); + }); + + test("initDataset resolves snapshots before experiment registration when passed as a Dataset object", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + const appGetJson = vi.spyOn(state.appConn(), "get_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + appGetJson.mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + snapshotName: "123", + state, + }); + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(appGetJson).toHaveBeenCalledWith("api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + }); + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "456", + }), + ); + }); + + test("initDataset surfaces snapshot lookup errors instead of falling back to latest", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + vi.spyOn(state.appConn(), "get_json").mockRejectedValueOnce( + new Error("snapshot lookup failed"), + ); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + snapshotName: "123", + state, + }); + + await expect(dataset.version()).rejects.toThrow("snapshot lookup failed"); + }); + + test("initDataset requires a matching snapshot name when a snapshot is requested", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + vi.spyOn(state.appConn(), "get_json").mockResolvedValueOnce([]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + snapshotName: "123", + state, + }); + + await expect(dataset.version()).rejects.toThrow( + 'Dataset snapshot "123" not found for 00000000-0000-0000-0000-000000000002', + ); + }); + + test("createSnapshot allows purely numeric snapshot names", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + dataset_snapshot: { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + found_existing: false, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "456", + state, + }); + + await expect( + dataset.createSnapshot({ + name: "123", + }), + ).resolves.toEqual({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }); + + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/dataset_snapshot/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + xact_id: "456", + }), + ); + }); + + test("createSnapshot returns the created snapshot from the register response", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + dataset_snapshot: { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: "Pinned snapshot", + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + found_existing: false, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect( + dataset.createSnapshot({ + name: "production", + description: "Pinned snapshot", + }), + ).resolves.toEqual({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: "Pinned snapshot", + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }); + + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/dataset_snapshot/register", + { + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: "Pinned snapshot", + xact_id: "123", + }, + ); + }); + + test("listSnapshots returns dataset snapshots", async () => { + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + const appGetJson = vi + .spyOn(state.appConn(), "get_json") + .mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect(dataset.listSnapshots()).resolves.toEqual([ + { + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + expect(appGetJson).toHaveBeenCalledWith("api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + }); + }); + + test("deleteSnapshot returns the deleted snapshot", async () => { + const postJson = vi.spyOn(state.appConn(), "post_json"); + postJson + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect( + dataset.deleteSnapshot("00000000-0000-0000-0000-000000000003"), + ).resolves.toEqual({ + id: "00000000-0000-0000-0000-000000000003", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "production", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }); + + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/dataset_snapshot/delete_id", + { + id: "00000000-0000-0000-0000-000000000003", + }, + ); + }); }); describe("prompt.build structured output templating", () => { diff --git a/js/src/logger.ts b/js/src/logger.ts index 658d47ae1..9767cf851 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -90,6 +90,21 @@ const RESET_CONTEXT_MANAGER_STATE = Symbol.for( // 6 MB for the AWS lambda gateway (from our own testing). export const DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024; +const datasetSnapshotResponseSchema = z.object({ + id: z.string().uuid(), + dataset_id: z.string().uuid(), + name: z.string(), + description: z.string().nullish(), + xact_id: z.string(), + created: z.string(), +}); +export type DatasetSnapshot = z.infer; + +const datasetSnapshotRegisterResponseSchema = z.object({ + dataset_snapshot: datasetSnapshotResponseSchema, + found_existing: z.boolean().optional(), +}); + const parametersRowSchema = z.object({ id: z.string().uuid(), _xact_id: z.string(), @@ -3366,13 +3381,18 @@ type InitOpenOption = { open?: IsOpen; }; +type DatasetSelection = { + version?: string; + environment?: string; + snapshotName?: string; +}; + /** - * Reference to a dataset by ID and optional version. + * Reference to a dataset by ID and optional explicit selector. */ -export interface DatasetRef { +export type DatasetRef = { id: string; - version?: string; -} +} & DatasetSelection; export interface ParametersRef { id: string; @@ -3598,20 +3618,13 @@ export function init( } if (dataset !== undefined) { - if ( - "id" in dataset && - typeof dataset.id === "string" && - !("__braintrust_dataset_marker" in dataset) - ) { - // Simple {id: ..., version?: ...} object - args["dataset_id"] = dataset.id; - if ("version" in dataset && dataset.version !== undefined) { - args["dataset_version"] = dataset.version; - } - } else { - // Full Dataset object - args["dataset_id"] = await (dataset as AnyDataset).id; - args["dataset_version"] = await (dataset as AnyDataset).version(); + const datasetSelection = await serializeDatasetForExperiment({ + dataset, + state, + }); + args["dataset_id"] = datasetSelection.datasetId; + if (datasetSelection.datasetVersion !== undefined) { + args["dataset_version"] = datasetSelection.datasetVersion; } } @@ -3681,9 +3694,7 @@ export function init( const ret = new Experiment( state, lazyMetadata, - dataset !== undefined && "version" in dataset - ? (dataset as AnyDataset) - : undefined, + dataset !== undefined ? (dataset as AnyDataset) : undefined, ); if (options.setCurrent ?? true) { state.currentExperiment = ret; @@ -3695,7 +3706,7 @@ export function init( * Alias for init(options). */ export function initExperiment( - options: Readonly>, + options: Readonly>, ): InitializedExperiment; /** @@ -3712,7 +3723,7 @@ export function initExperiment( * `initExperiment(project, options)`. */ export function initExperiment( - projectOrOptions: string | Readonly>, + projectOrOptions: string | Readonly>, optionalOptions?: Readonly>, ): InitializedExperiment { const options = ((): Readonly> => { @@ -3772,6 +3783,8 @@ export type InitDatasetOptions = dataset?: string; description?: string; version?: string; + environment?: string; + snapshotName?: string; projectId?: string; metadata?: Record; state?: BraintrustState; @@ -3782,6 +3795,188 @@ export type FullInitDatasetOptions = { project?: string; } & InitDatasetOptions; +async function getDatasetSnapshots({ + state, + datasetId, +}: { + state: BraintrustState; + datasetId: string; +}): Promise { + return datasetSnapshotResponseSchema.array().parse( + await state.appConn().get_json("api/dataset_snapshot/get", { + dataset_id: datasetId, + }), + ); +} + +function normalizeDatasetSelection({ + version, + environment, + snapshotName, +}: { + version?: string; + environment?: string; + snapshotName?: string; +}): DatasetSelection { + if (version !== undefined) { + return { version }; + } + + if (snapshotName !== undefined) { + return { snapshotName }; + } + + if (environment !== undefined) { + return { environment }; + } + + return {}; +} + +async function resolveDatasetSnapshotName({ + state, + datasetId, + snapshotName, +}: { + state: BraintrustState; + datasetId: string; + snapshotName: string; +}): Promise { + const snapshots = await getDatasetSnapshots({ state, datasetId }); + const match = snapshots.find((s) => s.name === snapshotName); + if (!match) { + throw new Error( + `Dataset snapshot "${snapshotName}" not found for ${datasetId}`, + ); + } + return match.xact_id; +} + +async function resolveDatasetSnapshotNameForMetadata({ + state, + lazyMetadata, + snapshotName, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + snapshotName: string; +}): Promise { + const metadata = await lazyMetadata.get(); + return await resolveDatasetSnapshotName({ + state, + datasetId: metadata.dataset.id, + snapshotName, + }); +} + +async function resolveDatasetEnvironment({ + state, + datasetId, + environment, +}: { + state: BraintrustState; + datasetId: string; + environment: string; +}): Promise { + const response = await state + .apiConn() + .get_json( + `environment-object/dataset/${datasetId}/${encodeURIComponent(environment)}`, + ); + return z.object({ object_version: z.string() }).parse(response) + .object_version; +} + +async function resolveDatasetEnvironmentForMetadata({ + state, + lazyMetadata, + environment, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + environment: string; +}): Promise { + const metadata = await lazyMetadata.get(); + return await resolveDatasetEnvironment({ + state, + datasetId: metadata.dataset.id, + environment, + }); +} + +async function serializeDatasetForExperiment({ + dataset, + state, +}: { + dataset: AnyDataset | DatasetRef; + state: BraintrustState; +}): Promise<{ datasetId: string; datasetVersion?: string }> { + if (!Dataset.isDataset(dataset)) { + const selection = normalizeDatasetSelection(dataset); + + if (selection.version !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: selection.version, + }; + } + + if (selection.snapshotName !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: await resolveDatasetSnapshotName({ + state, + datasetId: dataset.id, + snapshotName: selection.snapshotName, + }), + }; + } + + if (selection.environment !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: await resolveDatasetEnvironment({ + state, + datasetId: dataset.id, + environment: selection.environment, + }), + }; + } + + return { + datasetId: dataset.id, + }; + } + + const evalData = await dataset.toEvalData(); + const selection = normalizeDatasetSelection({ + version: evalData.dataset_version, + environment: evalData.dataset_environment, + snapshotName: evalData.dataset_snapshot_name, + }); + + if (selection.version !== undefined) { + return { + datasetId: evalData.dataset_id, + datasetVersion: selection.version, + }; + } + + if ( + selection.environment !== undefined || + selection.snapshotName !== undefined + ) { + return { + datasetId: evalData.dataset_id, + datasetVersion: await dataset.version(), + }; + } + + return { + datasetId: evalData.dataset_id, + }; +} + /** * Create a new dataset in a specified project. If the project does not exist, it will be created. * @@ -3789,6 +3984,9 @@ export type FullInitDatasetOptions = { * @param options.project The name of the project to create the dataset in. Must specify at least one of `project` or `projectId`. * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically. * @param options.description An optional description of the dataset. + * @param options.version Pin the dataset to a specific version xact_id. If `snapshotName` or `environment` are also provided, `version` takes precedence. + * @param options.snapshotName Pin the dataset to the version captured by this named snapshot. Snapshot names are resolved to a concrete xact_id and throw if no matching snapshot exists. If `environment` is also provided, `snapshotName` takes precedence. + * @param options.environment Pin the dataset to the version tagged with this environment slug. Throws if the environment lookup fails when it is the selected dataset selector. * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev. * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login. * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple. @@ -3845,6 +4043,8 @@ export function initDataset< dataset, description, version, + snapshotName, + environment, appUrl, apiKey, orgName, @@ -3856,6 +4056,14 @@ export function initDataset< state: stateArg, _internal_btql, } = options; + const selection = normalizeDatasetSelection({ + version, + environment, + snapshotName, + }); + const normalizedVersion = selection.version; + const normalizedEnvironment = selection.environment; + const normalizedSnapshotName = selection.snapshotName; const state = stateArg ?? _globalState; @@ -3896,10 +4104,33 @@ export function initDataset< }, ); + const resolvedVersion = + normalizedVersion !== undefined + ? normalizedVersion + : normalizedSnapshotName !== undefined + ? new LazyValue(async () => { + return await resolveDatasetSnapshotNameForMetadata({ + state, + lazyMetadata, + snapshotName: normalizedSnapshotName, + }); + }) + : normalizedEnvironment !== undefined + ? new LazyValue(async () => { + return await resolveDatasetEnvironmentForMetadata({ + state, + lazyMetadata, + environment: normalizedEnvironment, + }); + }) + : undefined; + return new Dataset( stateArg ?? _globalState, lazyMetadata, - version, + resolvedVersion, + normalizedEnvironment, + normalizedSnapshotName, legacy, _internal_btql, ); @@ -5631,10 +5862,11 @@ export class ObjectFetcher implements AsyncIterable< | "experiment" | "project_logs" | "playground_logs", - private pinnedVersion: string | undefined, + protected pinnedVersion: string | undefined, // eslint-disable-next-line @typescript-eslint/no-explicit-any private mutateRecord?: (r: any) => WithTransactionId, - private _internal_btql?: Record, + protected _internal_btql?: Record, + protected pinnedEnvironment?: string, ) {} public get id(): Promise { @@ -5702,6 +5934,12 @@ export class ObjectFetcher implements AsyncIterable< version: this.pinnedVersion, } : {}), + ...(this.pinnedVersion === undefined && + this.pinnedEnvironment !== undefined + ? { + env: this.pinnedEnvironment, + } + : {}), }, { headers: { "Accept-Encoding": "gzip" } }, ); @@ -5765,6 +6003,10 @@ export class ObjectFetcher implements AsyncIterable< } public async version(options?: { batchSize?: number }) { + // Resolve any lazy pinned version (e.g. from environment lookup) before + // checking the field — subclasses like Dataset populate pinnedVersion + // inside getState(). + await this.getState(); if (this.pinnedVersion !== undefined) { return this.pinnedVersion; } else { @@ -6777,13 +7019,17 @@ export class Dataset< IsLegacyDataset extends boolean = typeof DEFAULT_IS_LEGACY_DATASET, > extends ObjectFetcher> { private readonly lazyMetadata: LazyValue; + private readonly lazyPinnedVersion: LazyValue | undefined; + private readonly pinnedSnapshotName: string | undefined; private readonly __braintrust_dataset_marker = true; private newRecords = 0; constructor( private state: BraintrustState, lazyMetadata: LazyValue, - pinnedVersion?: string, + pinnedVersion?: string | LazyValue, + pinnedEnvironment?: string, + pinnedSnapshotName?: string, legacy?: IsLegacyDataset, _internal_btql?: Record, ) { @@ -6797,9 +7043,11 @@ export class Dataset< `Records will be fetched from this dataset in the legacy format, with the "expected" field renamed to "output". Please update your code to use "expected", and use \`braintrust.initDataset()\` with \`{ useOutput: false }\`, which will become the default in a future version of Braintrust.`, ); } + const staticVersion = + pinnedVersion instanceof LazyValue ? undefined : pinnedVersion; super( "dataset", - pinnedVersion, + staticVersion, (r: AnyDatasetRecord) => // eslint-disable-next-line @typescript-eslint/consistent-type-assertions ensureDatasetRecord( @@ -6807,8 +7055,12 @@ export class Dataset< isLegacyDataset, ) as WithTransactionId>, _internal_btql, + pinnedEnvironment, ); this.lazyMetadata = lazyMetadata; + this.lazyPinnedVersion = + pinnedVersion instanceof LazyValue ? pinnedVersion : undefined; + this.pinnedSnapshotName = pinnedSnapshotName; } public get id(): Promise { @@ -6833,9 +7085,53 @@ export class Dataset< return this.state; } + public async toEvalData(): Promise<{ + dataset_id: string; + dataset_version?: string; + dataset_environment?: string; + dataset_snapshot_name?: string; + _internal_btql?: Record; + }> { + await this.getState(); + const metadata = await this.lazyMetadata.get(); + + return { + dataset_id: metadata.dataset.id, + ...(this.pinnedEnvironment !== undefined + ? { + dataset_environment: this.pinnedEnvironment, + } + : {}), + ...(this.pinnedEnvironment === undefined && + this.pinnedSnapshotName !== undefined + ? { + dataset_snapshot_name: this.pinnedSnapshotName, + } + : {}), + ...(this.pinnedEnvironment === undefined && + this.pinnedSnapshotName === undefined && + this.pinnedVersion !== undefined + ? { + dataset_version: this.pinnedVersion, + } + : {}), + ...(this._internal_btql !== undefined + ? { _internal_btql: this._internal_btql } + : {}), + }; + } + protected async getState(): Promise { // Ensure the login state is populated by awaiting lazyMetadata. await this.lazyMetadata.get(); + // Resolve lazy pinned version (e.g. from environment or snapshot-name + // lookup). + if ( + this.lazyPinnedVersion !== undefined && + this.pinnedVersion === undefined + ) { + this.pinnedVersion = await this.lazyPinnedVersion.get(); + } return this.state; } @@ -7015,6 +7311,71 @@ export class Dataset< return id; } + /** + * Create a named snapshot of the dataset at the current version. Flushes any pending writes first. + * + * @param options.name A human-readable name for the snapshot. Must be unique within the dataset. + * @param options.description An optional description of the snapshot. + * @returns The created snapshot object. + */ + public async createSnapshot({ + name, + description, + }: { + readonly name: string; + readonly description?: string; + }): Promise { + await this.flush(); + const state = await this.getState(); + const datasetId = await this.id; + const currentVersion = await this.version(); + if (currentVersion === undefined) { + throw new Error("Cannot create snapshot: dataset has no records"); + } + const response = await state + .appConn() + .post_json("api/dataset_snapshot/register", { + dataset_id: datasetId, + name, + description, + xact_id: currentVersion, + }); + return datasetSnapshotRegisterResponseSchema.parse(response) + .dataset_snapshot; + } + + /** + * List all named snapshots for this dataset. + * + * @returns An array of snapshot objects, ordered by creation time (newest first). + */ + public async listSnapshots(): Promise { + const state = await this.getState(); + const datasetId = await this.id; + const response = await state + .appConn() + .get_json("api/dataset_snapshot/get", { + dataset_id: datasetId, + }); + return datasetSnapshotResponseSchema.array().parse(response); + } + + /** + * Delete a named snapshot from this dataset. + * + * @param snapshotId The unique identifier of the snapshot to delete. + * @returns The deleted snapshot object. + */ + public async deleteSnapshot(snapshotId: string): Promise { + const state = await this.getState(); + const response = await state + .appConn() + .post_json("api/dataset_snapshot/delete_id", { + id: snapshotId, + }); + return datasetSnapshotResponseSchema.parse(response); + } + /** * Summarize the dataset, including high level metrics about its size and other metadata. * @param summarizeData Whether to summarize the data. If false, only the metadata will be returned. diff --git a/js/util/generated_types.ts b/js/util/generated_types.ts index e24a44951..b0659d95b 100644 --- a/js/util/generated_types.ts +++ b/js/util/generated_types.ts @@ -1295,6 +1295,9 @@ export const RunEval = z.object({ data: z.union([ z.object({ dataset_id: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(), @@ -1302,6 +1305,9 @@ export const RunEval = z.object({ z.object({ project_name: z.string(), dataset_name: z.string(), + dataset_version: z.union([z.string(), z.null()]).optional(), + dataset_environment: z.union([z.string(), z.null()]).optional(), + dataset_snapshot_name: z.union([z.string(), z.null()]).optional(), _internal_btql: z .union([z.object({}).partial().passthrough(), z.null()]) .optional(),