Skip to content
78 changes: 78 additions & 0 deletions js/dev/server.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { describe, expect, test, vi } from "vitest";
import { type BraintrustState } from "../src/logger";
import { _exportsForTestingOnly } from "./server";

describe("run eval dataset selector helpers", () => {
const state = {} as BraintrustState;

test("maps project dataset refs into initDataset args", async () => {
await expect(
_exportsForTestingOnly.buildRunEvalDatasetInitArgs(state, {
project_name: "test-project",
dataset_name: "test-dataset",
dataset_environment: "production",
_internal_btql: { limit: 10 },
}),
).resolves.toEqual({
state,
project: "test-project",
dataset: "test-dataset",
environment: "production",
_internal_btql: { limit: 10 },
});
});

test("maps dataset id refs into initDataset args", async () => {
const lookupDatasetById = vi.fn().mockResolvedValue({
projectId: "project-id-123",
dataset: "resolved-dataset",
});

await expect(
_exportsForTestingOnly.buildRunEvalDatasetInitArgs(
state,
{
dataset_id: "dataset-id-123",
dataset_snapshot_name: "release-candidate",
},
lookupDatasetById,
),
).resolves.toEqual({
state,
projectId: "project-id-123",
dataset: "resolved-dataset",
snapshotName: "release-candidate",
});
expect(lookupDatasetById).toHaveBeenCalledWith({
state,
datasetId: "dataset-id-123",
});
});

test("prefers dataset_version over other dataset selectors", () => {
expect(
_exportsForTestingOnly.getRunEvalDatasetSelector({
project_name: "test-project",
dataset_name: "test-dataset",
dataset_version: "123",
dataset_snapshot_name: "release-candidate",
dataset_environment: "production",
}),
).toEqual({
version: "123",
});
});

test("prefers dataset_snapshot_name over dataset_environment", () => {
expect(
_exportsForTestingOnly.getRunEvalDatasetSelector({
project_name: "test-project",
dataset_name: "test-dataset",
dataset_snapshot_name: "release-candidate",
dataset_environment: "production",
}),
).toEqual({
snapshotName: "release-candidate",
});
});
});
113 changes: 94 additions & 19 deletions js/dev/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -305,32 +305,102 @@ const asyncHandler =
Promise.resolve(fn(req, res, next)).catch(next);
};

async function getDataset(
type RunEvalDatasetSelector =
| {
version: string;
environment?: never;
snapshotName?: never;
}
| {
version?: never;
environment: string;
snapshotName?: never;
}
| {
version?: never;
environment?: never;
snapshotName: string;
}
| {
version?: never;
environment?: never;
snapshotName?: never;
};

type RunEvalDatasetReference =
| Extract<RunEvalRequest["data"], { project_name: string }>
| Extract<RunEvalRequest["data"], { dataset_id: string }>;

type RunEvalDatasetInitArgs = {
state: BraintrustState;
dataset: string;
_internal_btql?: Record<string, unknown>;
} & (
| { project: string; projectId?: never }
| { project?: never; projectId: string }
) &
RunEvalDatasetSelector;

function getRunEvalDatasetSelector(
data: RunEvalDatasetReference,
): RunEvalDatasetSelector {
if (data.dataset_version != null) {
return { version: data.dataset_version };
}
if (data.dataset_snapshot_name != null) {
return { snapshotName: data.dataset_snapshot_name };
}
if (data.dataset_environment != null) {
return { environment: data.dataset_environment };
}

return {};
}

async function buildRunEvalDatasetInitArgs(
state: BraintrustState,
data: RunEvalRequest["data"],
): Promise<EvalData<unknown, unknown, BaseMetadata>> {
data: RunEvalDatasetReference,
lookupDatasetById: typeof getDatasetById = getDatasetById,
): Promise<RunEvalDatasetInitArgs> {
const commonArgs = {
state,
...(data._internal_btql != null
? { _internal_btql: data._internal_btql }
: {}),
...getRunEvalDatasetSelector(data),
};

if ("project_name" in data) {
return initDataset({
state,
const args = {
...commonArgs,
project: data.project_name,
dataset: data.dataset_name,
_internal_btql: data._internal_btql ?? undefined,
});
} else if ("dataset_id" in data) {
const datasetInfo = await getDatasetById({
state,
datasetId: data.dataset_id,
});
return initDataset({
state,
projectId: datasetInfo.projectId,
dataset: datasetInfo.dataset,
_internal_btql: data._internal_btql ?? undefined,
});
} else {
} satisfies RunEvalDatasetInitArgs;
return args;
}

const datasetInfo = await lookupDatasetById({
state,
datasetId: data.dataset_id,
});
const args = {
...commonArgs,
projectId: datasetInfo.projectId,
dataset: datasetInfo.dataset,
} satisfies RunEvalDatasetInitArgs;
return args;
}

async function getDataset(
state: BraintrustState,
data: RunEvalRequest["data"],
): Promise<EvalData<unknown, unknown, BaseMetadata>> {
if ("data" in data) {
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
return data.data as EvalCase<unknown, unknown, BaseMetadata>[];
}

return initDataset(await buildRunEvalDatasetInitArgs(state, data));
}

const datasetFetchSchema = z.object({
Expand All @@ -354,6 +424,11 @@ async function getDatasetById({
return { projectId: parsed[0].project_id, dataset: parsed[0].name };
}

export const _exportsForTestingOnly = {
buildRunEvalDatasetInitArgs,
getRunEvalDatasetSelector,
};

function makeScorer(
state: BraintrustState,
name: string,
Expand Down
21 changes: 21 additions & 0 deletions js/src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ async function initExperiment(
fallback: (_text: string, url: string) => url,
})
: "locally";
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(
chalk.cyan("▶") +
` Experiment ${chalk.bold(info.experimentName)} is running at ${linkText}`,
Expand Down Expand Up @@ -219,13 +220,17 @@ function buildWatchPluginForEvaluator(
name: "run-evalutator-on-end",
setup(build: esbuild.PluginBuild) {
build.onEnd(async (result) => {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(`Done building ${inFile}`);

if (!result.outputFiles) {
if (opts.showDetailedErrors) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(`Failed to compile ${inFile}`);
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(result.errors);
} else {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(`Failed to compile ${inFile}: ${result.errors}`);
}
return;
Expand Down Expand Up @@ -306,6 +311,7 @@ function buildWatchPluginForEvaluator(
)) {
const success = await reporter.reportRun(await Promise.all(results));
if (!success) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(error(`Reporter ${reporterName} failed.`));
}
}
Expand Down Expand Up @@ -421,9 +427,12 @@ export function handleBuildFailure({
if (terminateOnFailure) {
throw result.error;
} else if (showDetailedErrors) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(`Failed to compile ${result.sourceFile}`);
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(result.error);
} else {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(
`Failed to compile ${result.sourceFile}: ${result.error.message}`,
);
Expand Down Expand Up @@ -466,6 +475,7 @@ function updateEvaluators(
evaluators.reporters[reporterName] &&
evaluators.reporters[reporterName] !== reporter
) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(
warning(
`Reporter '${reporterName}' already exists. Will skip '${reporterName}' from ${result.sourceFile}.`,
Expand All @@ -486,12 +496,14 @@ async function runAndWatch({
onExit?: () => void;
}) {
const count = Object.keys(handles).length;
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(`Watching ${pluralize("file", count, true)}...`);

Object.values(handles).map((handle) => handle.watch());

["SIGINT", "SIGTERM"].forEach((signal: string) => {
process.on(signal, function () {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error("Stopped watching.");
for (const handle of Object.values(handles)) {
handle.destroy();
Expand Down Expand Up @@ -540,6 +552,7 @@ async function runOnce(

if (opts.list) {
for (const evaluator of evaluators.evaluators) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.log(evaluator.evaluator.evalName);
}
return true;
Expand Down Expand Up @@ -581,13 +594,15 @@ async function runOnce(
}
});

// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(
chalk.dim(
`Processing ${chalk.bold(resultPromises.length)} evaluator${resultPromises.length === 1 ? "" : "s"}...`,
),
);
const allEvalsResults = await Promise.all(resultPromises);
opts.progressReporter.stop();
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error("");

const evalReports: Record<
Expand Down Expand Up @@ -685,6 +700,7 @@ async function collectFiles(
try {
pathStat = fs.lstatSync(inputPath);
} catch (e) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(error(`Error reading ${inputPath}: ${e}`));
process.exit(1);
}
Expand All @@ -699,6 +715,7 @@ async function collectFiles(
)
) {
const prefix = mode === "eval" ? ".eval" : "";
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(
warning(
`Reading ${inputPath} because it was specified directly. Rename it to end in ${prefix}.ts or ` +
Expand Down Expand Up @@ -848,6 +865,7 @@ export async function initializeHandles({
for (const inputPath of inputPaths) {
const newFiles = await collectFiles(inputPath, mode);
if (newFiles.length == 0) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(
warning(
`Provided path ${inputPath} is not an eval file or a directory containing eval files, skipping...`,
Expand All @@ -860,6 +878,7 @@ export async function initializeHandles({
}

if (Object.keys(files).length == 0) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.warn(
warning("No eval files were found in any of the provided paths."),
);
Expand Down Expand Up @@ -906,6 +925,7 @@ async function run(args: RunArgs) {
// Load via dotenv library
const loaded = dotenv.config({ path: args.env_file });
if (loaded.error) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(error(`Error loading ${args.env_file}: ${loaded.error}`));
process.exit(1);
}
Expand All @@ -930,6 +950,7 @@ async function run(args: RunArgs) {
};

if (args.list && args.watch) {
// eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
console.error(error("Cannot specify both --list and --watch."));
process.exit(1);
}
Expand Down
1 change: 1 addition & 0 deletions js/src/exports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export type {
CompiledPromptParams,
CompletionPrompt,
ContextParentSpanIds,
DatasetSnapshot,
DataSummary,
DatasetSummary,
DefaultMetadataType,
Expand Down
6 changes: 6 additions & 0 deletions js/src/generated_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1825,13 +1825,19 @@ export const RunEval = z.object({
data: z.union([
z.object({
dataset_id: z.string(),
dataset_version: z.union([z.string(), z.null()]).optional(),
dataset_environment: z.union([z.string(), z.null()]).optional(),
dataset_snapshot_name: z.union([z.string(), z.null()]).optional(),
_internal_btql: z
.union([z.object({}).partial().passthrough(), z.null()])
.optional(),
}),
z.object({
project_name: z.string(),
dataset_name: z.string(),
dataset_version: z.union([z.string(), z.null()]).optional(),
dataset_environment: z.union([z.string(), z.null()]).optional(),
dataset_snapshot_name: z.union([z.string(), z.null()]).optional(),
_internal_btql: z
.union([z.object({}).partial().passthrough(), z.null()])
.optional(),
Expand Down
Loading