braintrustdata · max-braintrust · Mar 19, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 31, 2026
diff --git a/js/dev/server.test.ts b/js/dev/server.test.ts
@@ -0,0 +1,78 @@
+import { describe, expect, test, vi } from "vitest";
+import { type BraintrustState } from "../src/logger";
+import { _exportsForTestingOnly } from "./server";
+
+describe("run eval dataset selector helpers", () => {
+  const state = {} as BraintrustState;
+
+  test("maps project dataset refs into initDataset args", async () => {
+    await expect(
+      _exportsForTestingOnly.buildRunEvalDatasetInitArgs(state, {
+        project_name: "test-project",
+        dataset_name: "test-dataset",
+        dataset_environment: "production",
+        _internal_btql: { limit: 10 },
+      }),
+    ).resolves.toEqual({
+      state,
+      project: "test-project",
+      dataset: "test-dataset",
+      environment: "production",
+      _internal_btql: { limit: 10 },
+    });
+  });
+
+  test("maps dataset id refs into initDataset args", async () => {
+    const lookupDatasetById = vi.fn().mockResolvedValue({
+      projectId: "project-id-123",
+      dataset: "resolved-dataset",
+    });
+
+    await expect(
+      _exportsForTestingOnly.buildRunEvalDatasetInitArgs(
+        state,
+        {
+          dataset_id: "dataset-id-123",
+          dataset_snapshot_name: "release-candidate",
+        },
+        lookupDatasetById,
+      ),
+    ).resolves.toEqual({
+      state,
+      projectId: "project-id-123",
+      dataset: "resolved-dataset",
+      snapshotName: "release-candidate",
+    });
+    expect(lookupDatasetById).toHaveBeenCalledWith({
+      state,
+      datasetId: "dataset-id-123",
+    });
+  });
+
+  test("prefers dataset_version over other dataset selectors", () => {
+    expect(
+      _exportsForTestingOnly.getRunEvalDatasetSelector({
+        project_name: "test-project",
+        dataset_name: "test-dataset",
+        dataset_version: "123",
+        dataset_snapshot_name: "release-candidate",
+        dataset_environment: "production",
+      }),
+    ).toEqual({
+      version: "123",
+    });
+  });
+
+  test("prefers dataset_snapshot_name over dataset_environment", () => {
+    expect(
+      _exportsForTestingOnly.getRunEvalDatasetSelector({
+        project_name: "test-project",
+        dataset_name: "test-dataset",
+        dataset_snapshot_name: "release-candidate",
+        dataset_environment: "production",
+      }),
+    ).toEqual({
+      snapshotName: "release-candidate",
+    });
+  });
+});
diff --git a/js/dev/server.ts b/js/dev/server.ts
@@ -305,32 +305,102 @@ const asyncHandler =
     Promise.resolve(fn(req, res, next)).catch(next);
   };
 
-async function getDataset(
+type RunEvalDatasetSelector =
+  | {
+      version: string;
+      environment?: never;
+      snapshotName?: never;
+    }
+  | {
+      version?: never;
+      environment: string;
+      snapshotName?: never;
+    }
+  | {
+      version?: never;
+      environment?: never;
+      snapshotName: string;
+    }
+  | {
+      version?: never;
+      environment?: never;
+      snapshotName?: never;
+    };
+
+type RunEvalDatasetReference =
+  | Extract<RunEvalRequest["data"], { project_name: string }>
+  | Extract<RunEvalRequest["data"], { dataset_id: string }>;
+
+type RunEvalDatasetInitArgs = {
+  state: BraintrustState;
+  dataset: string;
+  _internal_btql?: Record<string, unknown>;
+} & (
+  | { project: string; projectId?: never }
+  | { project?: never; projectId: string }
+) &
+  RunEvalDatasetSelector;
+
+function getRunEvalDatasetSelector(
+  data: RunEvalDatasetReference,
+): RunEvalDatasetSelector {
+  if (data.dataset_version != null) {
+    return { version: data.dataset_version };
+  }
+  if (data.dataset_snapshot_name != null) {
+    return { snapshotName: data.dataset_snapshot_name };
+  }
+  if (data.dataset_environment != null) {
+    return { environment: data.dataset_environment };
+  }
+
+  return {};
+}
+
+async function buildRunEvalDatasetInitArgs(
   state: BraintrustState,
-  data: RunEvalRequest["data"],
-): Promise<EvalData<unknown, unknown, BaseMetadata>> {
+  data: RunEvalDatasetReference,
+  lookupDatasetById: typeof getDatasetById = getDatasetById,
+): Promise<RunEvalDatasetInitArgs> {
+  const commonArgs = {
+    state,
+    ...(data._internal_btql != null
+      ? { _internal_btql: data._internal_btql }
+      : {}),
+    ...getRunEvalDatasetSelector(data),
+  };
+
   if ("project_name" in data) {
-    return initDataset({
-      state,
+    const args = {
+      ...commonArgs,
       project: data.project_name,
       dataset: data.dataset_name,
-      _internal_btql: data._internal_btql ?? undefined,
-    });
-  } else if ("dataset_id" in data) {
-    const datasetInfo = await getDatasetById({
-      state,
-      datasetId: data.dataset_id,
-    });
-    return initDataset({
-      state,
-      projectId: datasetInfo.projectId,
-      dataset: datasetInfo.dataset,
-      _internal_btql: data._internal_btql ?? undefined,
-    });
-  } else {
+    } satisfies RunEvalDatasetInitArgs;
+    return args;
+  }
+
+  const datasetInfo = await lookupDatasetById({
+    state,
+    datasetId: data.dataset_id,
+  });
+  const args = {
+    ...commonArgs,
+    projectId: datasetInfo.projectId,
+    dataset: datasetInfo.dataset,
+  } satisfies RunEvalDatasetInitArgs;
+  return args;
+}
+
+async function getDataset(
+  state: BraintrustState,
+  data: RunEvalRequest["data"],
+): Promise<EvalData<unknown, unknown, BaseMetadata>> {
+  if ("data" in data) {
     // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
     return data.data as EvalCase<unknown, unknown, BaseMetadata>[];
   }
+
+  return initDataset(await buildRunEvalDatasetInitArgs(state, data));
 }
 
 const datasetFetchSchema = z.object({
@@ -354,6 +424,11 @@ async function getDatasetById({
   return { projectId: parsed[0].project_id, dataset: parsed[0].name };
 }
 
+export const _exportsForTestingOnly = {
+  buildRunEvalDatasetInitArgs,
+  getRunEvalDatasetSelector,
+};
+
 function makeScorer(
   state: BraintrustState,
   name: string,

diff --git a/js/src/cli/index.ts b/js/src/cli/index.ts
@@ -127,6 +127,7 @@ async function initExperiment(
         fallback: (_text: string, url: string) => url,
       })
     : "locally";
+  // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
   console.error(
     chalk.cyan("▶") +
       ` Experiment ${chalk.bold(info.experimentName)} is running at ${linkText}`,
@@ -219,13 +220,17 @@ function buildWatchPluginForEvaluator(
     name: "run-evalutator-on-end",
     setup(build: esbuild.PluginBuild) {
       build.onEnd(async (result) => {
+        // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
         console.error(`Done building ${inFile}`);
 
         if (!result.outputFiles) {
           if (opts.showDetailedErrors) {
+            // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
             console.warn(`Failed to compile ${inFile}`);
+            // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
             console.warn(result.errors);
           } else {
+            // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
             console.warn(`Failed to compile ${inFile}: ${result.errors}`);
           }
           return;
@@ -306,6 +311,7 @@ function buildWatchPluginForEvaluator(
         )) {
           const success = await reporter.reportRun(await Promise.all(results));
           if (!success) {
+            // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
             console.error(error(`Reporter ${reporterName} failed.`));
           }
         }
@@ -421,9 +427,12 @@ export function handleBuildFailure({
   if (terminateOnFailure) {
     throw result.error;
   } else if (showDetailedErrors) {
+    // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
     console.warn(`Failed to compile ${result.sourceFile}`);
+    // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
     console.warn(result.error);
   } else {
+    // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
     console.warn(
       `Failed to compile ${result.sourceFile}: ${result.error.message}`,
     );
@@ -466,6 +475,7 @@ function updateEvaluators(
         evaluators.reporters[reporterName] &&
         evaluators.reporters[reporterName] !== reporter
       ) {
+        // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
         console.warn(
           warning(
             `Reporter '${reporterName}' already exists. Will skip '${reporterName}' from ${result.sourceFile}.`,
@@ -486,12 +496,14 @@ async function runAndWatch({
   onExit?: () => void;
 }) {
   const count = Object.keys(handles).length;
+  // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
   console.error(`Watching ${pluralize("file", count, true)}...`);
 
   Object.values(handles).map((handle) => handle.watch());
 
   ["SIGINT", "SIGTERM"].forEach((signal: string) => {
     process.on(signal, function () {
+      // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
       console.error("Stopped watching.");
       for (const handle of Object.values(handles)) {
         handle.destroy();
@@ -540,6 +552,7 @@ async function runOnce(
 
   if (opts.list) {
     for (const evaluator of evaluators.evaluators) {
+      // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
       console.log(evaluator.evaluator.evalName);
     }
     return true;
@@ -581,13 +594,15 @@ async function runOnce(
     }
   });
 
+  // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
   console.error(
     chalk.dim(
       `Processing ${chalk.bold(resultPromises.length)} evaluator${resultPromises.length === 1 ? "" : "s"}...`,
     ),
   );
   const allEvalsResults = await Promise.all(resultPromises);
   opts.progressReporter.stop();
+  // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
   console.error("");
 
   const evalReports: Record<
@@ -685,6 +700,7 @@ async function collectFiles(
   try {
     pathStat = fs.lstatSync(inputPath);
   } catch (e) {
+    // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
     console.error(error(`Error reading ${inputPath}: ${e}`));
     process.exit(1);
   }
@@ -699,6 +715,7 @@ async function collectFiles(
       )
     ) {
       const prefix = mode === "eval" ? ".eval" : "";
+      // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
       console.warn(
         warning(
           `Reading ${inputPath} because it was specified directly. Rename it to end in ${prefix}.ts or ` +
@@ -848,6 +865,7 @@ export async function initializeHandles({
   for (const inputPath of inputPaths) {
     const newFiles = await collectFiles(inputPath, mode);
     if (newFiles.length == 0) {
+      // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
       console.warn(
         warning(
           `Provided path ${inputPath} is not an eval file or a directory containing eval files, skipping...`,
@@ -860,6 +878,7 @@ export async function initializeHandles({
   }
 
   if (Object.keys(files).length == 0) {
+    // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
     console.warn(
       warning("No eval files were found in any of the provided paths."),
     );
@@ -906,6 +925,7 @@ async function run(args: RunArgs) {
     // Load via dotenv library
     const loaded = dotenv.config({ path: args.env_file });
     if (loaded.error) {
+      // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
       console.error(error(`Error loading ${args.env_file}: ${loaded.error}`));
       process.exit(1);
     }
@@ -930,6 +950,7 @@ async function run(args: RunArgs) {
   };
 
   if (args.list && args.watch) {
+    // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
     console.error(error("Cannot specify both --list and --watch."));
     process.exit(1);
   }

diff --git a/js/src/exports.ts b/js/src/exports.ts
@@ -8,6 +8,7 @@ export type {
   CompiledPromptParams,
   CompletionPrompt,
   ContextParentSpanIds,
+  DatasetSnapshot,
   DataSummary,
   DatasetSummary,
   DefaultMetadataType,

diff --git a/js/src/generated_types.ts b/js/src/generated_types.ts
@@ -1825,13 +1825,19 @@ export const RunEval = z.object({
   data: z.union([
     z.object({
       dataset_id: z.string(),
+      dataset_version: z.union([z.string(), z.null()]).optional(),
+      dataset_environment: z.union([z.string(), z.null()]).optional(),
+      dataset_snapshot_name: z.union([z.string(), z.null()]).optional(),
       _internal_btql: z
         .union([z.object({}).partial().passthrough(), z.null()])
         .optional(),
     }),
     z.object({
       project_name: z.string(),
       dataset_name: z.string(),
+      dataset_version: z.union([z.string(), z.null()]).optional(),
+      dataset_environment: z.union([z.string(), z.null()]).optional(),
+      dataset_snapshot_name: z.union([z.string(), z.null()]).optional(),
       _internal_btql: z
         .union([z.object({}).partial().passthrough(), z.null()])
         .optional(),