From 472680415de1d3ca879d3897d60f43e099b64d65 Mon Sep 17 00:00:00 2001 From: allardy <42552874+allardy@users.noreply.github.com> Date: Sat, 7 Mar 2026 01:50:37 +0000 Subject: [PATCH 1/2] chore(cognitive): update AI model catalog --- .../providers/anthropic/anthropic.config.ts | 157 +++++++++++++++++- .../providers/cerebras/cerebras.config.ts | 55 ++++++ .../fireworks-ai/fireworks.config.ts | 56 +++++++ .../providers/google-ai/google-ai.config.ts | 84 ++++++++++ .../features/providers/groq/groq.config.ts | 67 +++++++- .../src/features/providers/xai/xai.config.ts | 52 ++++++ 6 files changed, 463 insertions(+), 8 deletions(-) diff --git a/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts b/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts index 70853f9..0e898d4 100644 --- a/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts +++ b/packages/cognitive/src/features/providers/anthropic/anthropic.config.ts @@ -4,8 +4,156 @@ export const ANTHROPIC_CONFIG: ProviderConfig = { id: 'anthropic', name: 'Anthropic', description: 'Claude models focused on safety and helpfulness', - defaultModel: 'claude-sonnet-4-5-20250929', + defaultModel: 'claude-sonnet-4-6', models: [ + { + id: 'claude-opus-4-6', + displayName: 'Claude Opus 4.6', + aliases: ['claude-opus-4-6'], + description: + 'Claude Opus 4.6 is the most intelligent Claude model for building agents and coding. It offers state-of-the-art performance on complex reasoning, coding, and agentic tasks with a 200K context window.', + lifecycle: 'production', + health: 'healthy', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 5, + outputCostPer1mTokens: 25, + }, + tags: ['recommended', 'reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, + { + id: 'claude-sonnet-4-6', + displayName: 'Claude Sonnet 4.6', + aliases: ['claude-sonnet-4-6'], + description: + 'Claude Sonnet 4.6 offers the best combination of speed and intelligence. It delivers strong performance across coding, reasoning, and agentic workflows with a 200K context window.', + lifecycle: 'production', + health: 'healthy', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 64_000, + }, + cost: { + inputCostPer1mTokens: 3, + outputCostPer1mTokens: 15, + }, + tags: ['recommended', 'reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, + { + id: 'claude-opus-4-5-20251101', + displayName: 'Claude Opus 4.5', + aliases: ['claude-opus-4-5'], + description: + 'Claude Opus 4.5 is a powerful model for complex tasks requiring deep reasoning and coding capabilities, offering near-frontier intelligence with a 200K context window.', + lifecycle: 'production', + health: 'healthy', + releaseDate: '2025-11-01', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 64_000, + }, + cost: { + inputCostPer1mTokens: 5, + outputCostPer1mTokens: 25, + }, + tags: ['reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, + { + id: 'claude-opus-4-1-20250805', + displayName: 'Claude Opus 4.1', + aliases: ['claude-opus-4-1'], + description: + 'Claude Opus 4.1 is a high-capability model optimized for complex reasoning and agentic tasks with a 200K context window.', + lifecycle: 'production', + health: 'healthy', + releaseDate: '2025-08-05', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 32_000, + }, + cost: { + inputCostPer1mTokens: 15, + outputCostPer1mTokens: 75, + }, + tags: ['reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, + { + id: 'claude-opus-4-20250514', + displayName: 'Claude Opus 4', + aliases: ['claude-opus-4-0'], + description: + 'Claude Opus 4 is Anthropic\'s first model in the Claude 4 family, delivering frontier-level intelligence for complex reasoning, coding, and agentic tasks.', + lifecycle: 'production', + health: 'healthy', + releaseDate: '2025-05-14', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: false, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 200_000, + maxOutputTokens: 32_000, + }, + cost: { + inputCostPer1mTokens: 15, + outputCostPer1mTokens: 75, + }, + tags: ['reasoning', 'agents', 'vision', 'general-purpose', 'coding'], + }, { id: 'claude-sonnet-4-5-20250929', displayName: 'Claude Sonnet 4.5', @@ -320,9 +468,12 @@ export const ANTHROPIC_CONFIG: ProviderConfig = { displayName: 'Claude 3 Haiku', description: "Claude 3 Haiku is Anthropic's fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance.", - lifecycle: 'production', + lifecycle: 'deprecated', health: 'healthy', releaseDate: '2024-03-07', + deprecationDate: '2026-04-19', + discontinuedDate: '2026-04-19', + replacementModels: ['claude-haiku-4-5-20251001'], capabilities: { supportsText: true, supportsImages: true, @@ -342,7 +493,7 @@ export const ANTHROPIC_CONFIG: ProviderConfig = { inputCostPer1mTokens: 0.25, outputCostPer1mTokens: 1.25, }, - tags: ['low-cost', 'general-purpose'], + tags: ['deprecated', 'low-cost', 'general-purpose'], }, ], } diff --git a/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts b/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts index 665e691..0db5484 100644 --- a/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts +++ b/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts @@ -6,6 +6,61 @@ export const CEREBRAS_CONFIG: ProviderConfig = { description: 'Cerebras AI models for high-performance language processing', defaultModel: 'gpt-oss-120b', models: [ + { + id: 'qwen-3-235b-a22b-instruct-2507', + displayName: 'Qwen3 235B A22B Instruct (Preview)', + description: + 'Qwen3-235B A22B is a large mixture-of-experts model with 235B total parameters and 22B active parameters per forward pass. It offers multilingual capabilities with significant improvements in instruction following, reasoning, and coding.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 32_000, + maxOutputTokens: 16_000, + }, + cost: { + inputCostPer1mTokens: 0.6, + outputCostPer1mTokens: 1.2, + }, + tags: ['preview', 'general-purpose', 'reasoning'], + releaseDate: '2025-07-01', + lifecycle: 'production', + }, + { + id: 'zai-glm-4.7', + displayName: 'Z.ai GLM 4.7 (Preview)', + description: + 'Z.ai GLM 4.7 is a large-scale language model from Z.ai with strong coding performance and advanced reasoning capabilities. It features 355B parameters and excels at technical tasks.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 8_000, + maxOutputTokens: 8_000, + }, + cost: { + inputCostPer1mTokens: 2.25, + outputCostPer1mTokens: 2.75, + }, + tags: ['preview', 'general-purpose', 'reasoning', 'coding'], + lifecycle: 'production', + }, { id: 'gpt-oss-120b', displayName: 'GPT-OSS 120B (Preview)', diff --git a/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts b/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts index c2a2d42..23a53eb 100644 --- a/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts +++ b/packages/cognitive/src/features/providers/fireworks-ai/fireworks.config.ts @@ -6,6 +6,62 @@ export const FIREWORKS_CONFIG: ProviderConfig = { description: 'Fireworks AI provides fast inference for various language models', defaultModel: 'accounts/fireworks/models/llama-v3p1-70b-instruct', models: [ + { + id: 'kimi-k2-instruct', + internalModelId: 'accounts/fireworks/models/kimi-k2-instruct', + displayName: 'Kimi K2 Instruct', + description: + 'Kimi K2 is a state-of-the-art language model from Moonshot AI with strong agentic capabilities, tool use, and long-context reasoning. Optimized for complex instruction-following, coding, and multi-step tasks.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 262_144, + maxOutputTokens: 16_384, + }, + cost: { + inputCostPer1mTokens: 0.6, + outputCostPer1mTokens: 2.5, + }, + tags: ['general-purpose', 'reasoning', 'coding'], + lifecycle: 'production', + }, + { + id: 'deepseek-v3p1', + internalModelId: 'accounts/fireworks/models/deepseek-v3p1', + displayName: 'DeepSeek V3.1', + description: + 'DeepSeek V3.1 is the latest iteration of the DeepSeek V3 family, a 685B-parameter mixture-of-experts model. It delivers improved performance across reasoning, coding, and general-purpose tasks at a lower cost than its predecessors.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: false, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 163_840, + maxOutputTokens: 16_384, + }, + cost: { + inputCostPer1mTokens: 0.56, + outputCostPer1mTokens: 1.68, + }, + tags: ['recommended', 'general-purpose', 'coding'], + lifecycle: 'production', + }, { id: 'gpt-oss-20b', internalModelId: 'accounts/fireworks/models/gpt-oss-20b', diff --git a/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts b/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts index c5d8e21..c1816e3 100644 --- a/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts +++ b/packages/cognitive/src/features/providers/google-ai/google-ai.config.ts @@ -6,6 +6,62 @@ export const GOOGLE_AI_CONFIG: ProviderConfig = { description: 'Gemini models from Google with multimodal capabilities', defaultModel: 'gemini-2.5-flash', models: [ + { + id: 'gemini-3.1-pro', + internalModelId: 'gemini-3.1-pro-preview', + displayName: 'Gemini 3.1 Pro', + description: + "Google's most advanced model with state-of-the-art reasoning, agentic capabilities, and multimodal understanding. Delivers richer visuals and deeper interactivity built on frontier-level reasoning.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_048_576, + maxOutputTokens: 65_536, + }, + cost: { + inputCostPer1mTokens: 2, + outputCostPer1mTokens: 12, + }, + tags: ['reasoning', 'agents', 'general-purpose', 'vision'], + lifecycle: 'preview', + }, + { + id: 'gemini-3.1-flash-lite', + internalModelId: 'gemini-3.1-flash-lite-preview', + displayName: 'Gemini 3.1 Flash-Lite', + description: + "Google's most cost-effective frontier-class model, designed for speed and high-volume tasks with strong intelligence at a fraction of the cost.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_048_576, + maxOutputTokens: 65_536, + }, + cost: { + inputCostPer1mTokens: 0.25, + outputCostPer1mTokens: 1.5, + }, + tags: ['low-cost', 'general-purpose', 'vision'], + lifecycle: 'preview', + }, { id: 'gemini-3-pro', internalModelId: 'gemini-3-pro-preview', @@ -63,6 +119,34 @@ export const GOOGLE_AI_CONFIG: ProviderConfig = { releaseDate: '2025-12-17', lifecycle: 'preview', }, + { + id: 'gemini-2.5-flash-lite', + displayName: 'Gemini 2.5 Flash-Lite', + description: + "Google's fastest and most budget-friendly Gemini 2.5 model, optimized for high-volume, low-latency tasks while maintaining strong multimodal capabilities.", + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 1_048_576, + maxOutputTokens: 65_536, + }, + cost: { + inputCostPer1mTokens: 0.1, + outputCostPer1mTokens: 0.4, + }, + tags: ['low-cost', 'general-purpose', 'vision'], + releaseDate: '2025-06-01', + lifecycle: 'production', + }, { id: 'gemini-2.5-flash', displayName: 'Gemini 2.5 Flash', diff --git a/packages/cognitive/src/features/providers/groq/groq.config.ts b/packages/cognitive/src/features/providers/groq/groq.config.ts index 0048857..1744ae6 100644 --- a/packages/cognitive/src/features/providers/groq/groq.config.ts +++ b/packages/cognitive/src/features/providers/groq/groq.config.ts @@ -25,11 +25,11 @@ export const GROQ_CONFIG: ProviderConfig = { }, limits: { maxInputTokens: 131_000, - maxOutputTokens: 32_000, + maxOutputTokens: 65_536, }, cost: { - inputCostPer1mTokens: 0.1, - outputCostPer1mTokens: 0.5, + inputCostPer1mTokens: 0.075, + outputCostPer1mTokens: 0.3, }, tags: ['preview', 'general-purpose', 'reasoning', 'low-cost'], releaseDate: '2024-12-01', @@ -54,16 +54,73 @@ export const GROQ_CONFIG: ProviderConfig = { }, limits: { maxInputTokens: 131_000, - maxOutputTokens: 32_000, + maxOutputTokens: 65_536, }, cost: { inputCostPer1mTokens: 0.15, - outputCostPer1mTokens: 0.75, + outputCostPer1mTokens: 0.6, }, tags: ['preview', 'general-purpose', 'reasoning'], releaseDate: '2024-12-01', lifecycle: 'production', }, + { + id: 'kimi-k2-instruct-0905', + displayName: 'Kimi K2 Instruct 0905 (Preview)', + internalModelId: 'moonshotai/kimi-k2-instruct-0905', + description: + 'Kimi K2 is a state-of-the-art language model from Moonshot AI with strong agentic capabilities, tool use, and long-context reasoning. Optimized for complex instruction-following and multi-step tasks.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsSystemMessages: true, + supportsTools: true, + supportsJsonMode: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 262_144, + maxOutputTokens: 16_384, + }, + cost: { + inputCostPer1mTokens: 1, + outputCostPer1mTokens: 3, + }, + tags: ['preview', 'general-purpose', 'reasoning'], + releaseDate: '2025-09-05', + lifecycle: 'production', + }, + { + id: 'qwen3-32b', + displayName: 'Qwen3 32B (Preview)', + internalModelId: 'qwen/qwen3-32b', + description: + 'Qwen3-32B is a world-class reasoning model with comparable quality to DeepSeek R1 while outperforming GPT-4.1 and Claude Sonnet 3.7. It excels in code-gen, tool-calling, and advanced reasoning.', + capabilities: { + supportsText: true, + supportsImages: false, + supportsSystemMessages: true, + supportsTools: true, + supportsJsonMode: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 131_072, + maxOutputTokens: 40_960, + }, + cost: { + inputCostPer1mTokens: 0.29, + outputCostPer1mTokens: 0.59, + }, + tags: ['preview', 'general-purpose', 'reasoning'], + lifecycle: 'production', + }, { id: 'deepseek-r1-distill-llama-70b', displayName: 'DeepSeek R1-Distill Llama 3.3 70B (Preview)', diff --git a/packages/cognitive/src/features/providers/xai/xai.config.ts b/packages/cognitive/src/features/providers/xai/xai.config.ts index 20c3161..394aece 100644 --- a/packages/cognitive/src/features/providers/xai/xai.config.ts +++ b/packages/cognitive/src/features/providers/xai/xai.config.ts @@ -6,6 +6,58 @@ export const XAI_CONFIG: ProviderConfig = { description: 'xAI Grok models', defaultModel: 'grok-4-fast-non-reasoning', models: [ + { + id: 'grok-4-1-fast-reasoning', + displayName: 'Grok 4.1 Fast (Reasoning)', + description: 'Next-generation fast Grok model with reasoning capabilities and very large 2M token context window.', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: true, + supportsStopSequence: false, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 2_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 0.2, + outputCostPer1mTokens: 0.5, + }, + tags: ['reasoning', 'recommended', 'general-purpose'], + lifecycle: 'production', + }, + { + id: 'grok-4-1-fast-non-reasoning', + displayName: 'Grok 4.1 Fast (Non-Reasoning)', + description: 'Next-generation fast, cost-effective Grok model for non-reasoning tasks with a very large 2M token context window.', + capabilities: { + supportsText: true, + supportsImages: true, + supportsTools: true, + supportsJsonMode: true, + supportsSystemMessages: true, + supportsStreaming: true, + supportsReasoning: false, + supportsStopSequence: true, + supportsTemperature: true, + }, + limits: { + maxInputTokens: 2_000_000, + maxOutputTokens: 128_000, + }, + cost: { + inputCostPer1mTokens: 0.2, + outputCostPer1mTokens: 0.5, + }, + tags: ['low-cost', 'recommended', 'general-purpose'], + lifecycle: 'production', + }, { id: 'grok-code-fast-1', displayName: 'Grok Code Fast 1', From bc7cf54a02de57cb43299ab2fed60e37759c5096 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 7 Mar 2026 02:00:42 +0000 Subject: [PATCH 2/2] chore(cognitive): apply model adjustment --- .../providers/cerebras/cerebras.config.ts | 55 --------------- .../features/providers/groq/groq.config.ts | 67 ++----------------- 2 files changed, 5 insertions(+), 117 deletions(-) diff --git a/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts b/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts index 0db5484..665e691 100644 --- a/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts +++ b/packages/cognitive/src/features/providers/cerebras/cerebras.config.ts @@ -6,61 +6,6 @@ export const CEREBRAS_CONFIG: ProviderConfig = { description: 'Cerebras AI models for high-performance language processing', defaultModel: 'gpt-oss-120b', models: [ - { - id: 'qwen-3-235b-a22b-instruct-2507', - displayName: 'Qwen3 235B A22B Instruct (Preview)', - description: - 'Qwen3-235B A22B is a large mixture-of-experts model with 235B total parameters and 22B active parameters per forward pass. It offers multilingual capabilities with significant improvements in instruction following, reasoning, and coding.', - capabilities: { - supportsText: true, - supportsImages: false, - supportsTools: true, - supportsJsonMode: true, - supportsSystemMessages: true, - supportsStreaming: true, - supportsReasoning: true, - supportsStopSequence: true, - supportsTemperature: true, - }, - limits: { - maxInputTokens: 32_000, - maxOutputTokens: 16_000, - }, - cost: { - inputCostPer1mTokens: 0.6, - outputCostPer1mTokens: 1.2, - }, - tags: ['preview', 'general-purpose', 'reasoning'], - releaseDate: '2025-07-01', - lifecycle: 'production', - }, - { - id: 'zai-glm-4.7', - displayName: 'Z.ai GLM 4.7 (Preview)', - description: - 'Z.ai GLM 4.7 is a large-scale language model from Z.ai with strong coding performance and advanced reasoning capabilities. It features 355B parameters and excels at technical tasks.', - capabilities: { - supportsText: true, - supportsImages: false, - supportsTools: true, - supportsJsonMode: true, - supportsSystemMessages: true, - supportsStreaming: true, - supportsReasoning: true, - supportsStopSequence: true, - supportsTemperature: true, - }, - limits: { - maxInputTokens: 8_000, - maxOutputTokens: 8_000, - }, - cost: { - inputCostPer1mTokens: 2.25, - outputCostPer1mTokens: 2.75, - }, - tags: ['preview', 'general-purpose', 'reasoning', 'coding'], - lifecycle: 'production', - }, { id: 'gpt-oss-120b', displayName: 'GPT-OSS 120B (Preview)', diff --git a/packages/cognitive/src/features/providers/groq/groq.config.ts b/packages/cognitive/src/features/providers/groq/groq.config.ts index 1744ae6..0048857 100644 --- a/packages/cognitive/src/features/providers/groq/groq.config.ts +++ b/packages/cognitive/src/features/providers/groq/groq.config.ts @@ -25,11 +25,11 @@ export const GROQ_CONFIG: ProviderConfig = { }, limits: { maxInputTokens: 131_000, - maxOutputTokens: 65_536, + maxOutputTokens: 32_000, }, cost: { - inputCostPer1mTokens: 0.075, - outputCostPer1mTokens: 0.3, + inputCostPer1mTokens: 0.1, + outputCostPer1mTokens: 0.5, }, tags: ['preview', 'general-purpose', 'reasoning', 'low-cost'], releaseDate: '2024-12-01', @@ -54,73 +54,16 @@ export const GROQ_CONFIG: ProviderConfig = { }, limits: { maxInputTokens: 131_000, - maxOutputTokens: 65_536, + maxOutputTokens: 32_000, }, cost: { inputCostPer1mTokens: 0.15, - outputCostPer1mTokens: 0.6, + outputCostPer1mTokens: 0.75, }, tags: ['preview', 'general-purpose', 'reasoning'], releaseDate: '2024-12-01', lifecycle: 'production', }, - { - id: 'kimi-k2-instruct-0905', - displayName: 'Kimi K2 Instruct 0905 (Preview)', - internalModelId: 'moonshotai/kimi-k2-instruct-0905', - description: - 'Kimi K2 is a state-of-the-art language model from Moonshot AI with strong agentic capabilities, tool use, and long-context reasoning. Optimized for complex instruction-following and multi-step tasks.', - capabilities: { - supportsText: true, - supportsImages: false, - supportsSystemMessages: true, - supportsTools: true, - supportsJsonMode: true, - supportsStreaming: true, - supportsReasoning: true, - supportsStopSequence: true, - supportsTemperature: true, - }, - limits: { - maxInputTokens: 262_144, - maxOutputTokens: 16_384, - }, - cost: { - inputCostPer1mTokens: 1, - outputCostPer1mTokens: 3, - }, - tags: ['preview', 'general-purpose', 'reasoning'], - releaseDate: '2025-09-05', - lifecycle: 'production', - }, - { - id: 'qwen3-32b', - displayName: 'Qwen3 32B (Preview)', - internalModelId: 'qwen/qwen3-32b', - description: - 'Qwen3-32B is a world-class reasoning model with comparable quality to DeepSeek R1 while outperforming GPT-4.1 and Claude Sonnet 3.7. It excels in code-gen, tool-calling, and advanced reasoning.', - capabilities: { - supportsText: true, - supportsImages: false, - supportsSystemMessages: true, - supportsTools: true, - supportsJsonMode: true, - supportsStreaming: true, - supportsReasoning: true, - supportsStopSequence: true, - supportsTemperature: true, - }, - limits: { - maxInputTokens: 131_072, - maxOutputTokens: 40_960, - }, - cost: { - inputCostPer1mTokens: 0.29, - outputCostPer1mTokens: 0.59, - }, - tags: ['preview', 'general-purpose', 'reasoning'], - lifecycle: 'production', - }, { id: 'deepseek-r1-distill-llama-70b', displayName: 'DeepSeek R1-Distill Llama 3.3 70B (Preview)',