From 6ebf9fda670bec457871840d8b5556c06cd68659 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Tue, 7 Apr 2026 11:06:20 -0400 Subject: [PATCH 1/5] refactor schema --- .gitignore | 1 + CLAUDE.md | 26 +- pyproject.toml | 3 +- scripts/sync_schema.py | 126 ++ scripts/sync_specs.py | 78 - src/jambonz_sdk/__init__.py | 2 + .../schema/callbacks/amd.schema.json | 50 + .../schema/callbacks/base.schema.json | 29 + .../schema/callbacks/call-status.schema.json | 22 + .../callbacks/conference-status.schema.json | 24 + .../callbacks/conference-wait.schema.json | 11 + .../schema/callbacks/conference.schema.json | 11 + .../schema/callbacks/dequeue.schema.json | 19 + .../schema/callbacks/dial-dtmf.schema.json | 18 + .../schema/callbacks/dial-hold.schema.json | 22 + .../schema/callbacks/dial-refer.schema.json | 28 + .../schema/callbacks/dial.schema.json | 31 + .../schema/callbacks/enqueue-wait.schema.json | 17 + .../schema/callbacks/enqueue.schema.json | 27 + .../callbacks/gather-partial.schema.json | 54 + .../schema/callbacks/gather.schema.json | 60 + .../schema/callbacks/listen.schema.json | 21 + .../schema/callbacks/llm.schema.json | 30 + .../schema/callbacks/message.schema.json | 35 + .../callbacks/pipeline-turn.schema.json | 109 ++ .../schema/callbacks/play.schema.json | 36 + .../schema/callbacks/session-new.schema.json | 143 ++ .../callbacks/session-reconnect.schema.json | 9 + .../callbacks/session-redirect.schema.json | 38 + .../callbacks/sip-refer-event.schema.json | 20 + .../schema/callbacks/sip-refer.schema.json | 22 + .../schema/callbacks/sip-request.schema.json | 27 + .../transcribe-translation.schema.json | 24 + .../schema/callbacks/transcribe.schema.json | 46 + .../callbacks/tts-streaming-event.schema.json | 77 + .../schema/callbacks/verb-status.schema.json | 57 + .../schema/components/actionHook.schema.json | 36 + .../actionHookDelayAction.schema.json | 37 + .../schema/components/amd.schema.json | 68 + .../schema/components/auth.schema.json | 18 + .../components/bidirectionalAudio.schema.json | 22 + .../schema/components/fillerNoise.schema.json | 25 + .../schema/components/llm-base.schema.json | 94 ++ .../recognizer-assemblyAiOptions.schema.json | 66 + .../recognizer-awsOptions.schema.json | 52 + .../recognizer-azureOptions.schema.json | 32 + .../recognizer-cobaltOptions.schema.json | 34 + .../recognizer-customOptions.schema.json | 27 + .../recognizer-deepgramOptions.schema.json | 147 ++ .../recognizer-elevenlabsOptions.schema.json | 39 + .../recognizer-gladiaOptions.schema.json | 8 + .../recognizer-googleOptions.schema.json | 35 + .../recognizer-houndifyOptions.schema.json | 53 + .../recognizer-ibmOptions.schema.json | 54 + .../recognizer-nuanceOptions.schema.json | 150 ++ .../recognizer-nvidiaOptions.schema.json | 39 + .../recognizer-openaiOptions.schema.json | 59 + .../recognizer-sonioxOptions.schema.json | 46 + ...recognizer-speechmaticsOptions.schema.json | 100 ++ .../recognizer-verbioOptions.schema.json | 46 + .../schema/components/recognizer.schema.json | 216 +++ .../schema/components/synthesizer.schema.json | 82 + .../schema/components/target.schema.json | 105 ++ .../schema/components/vad.schema.json | 48 + .../schema/jambonz-app.schema.json | 112 ++ .../schema/verbs/alert.schema.json | 34 + .../schema/verbs/answer.schema.json | 22 + .../schema/verbs/conference.schema.json | 107 ++ .../schema/verbs/config.schema.json | 218 +++ .../schema/verbs/deepgram_s2s.schema.json | 81 + .../schema/verbs/dequeue.schema.json | 51 + src/jambonz_sdk/schema/verbs/dial.schema.json | 187 +++ .../schema/verbs/dialogflow.schema.json | 148 ++ src/jambonz_sdk/schema/verbs/dtmf.schema.json | 49 + src/jambonz_sdk/schema/verbs/dub.schema.json | 103 ++ .../schema/verbs/elevenlabs_s2s.schema.json | 81 + .../schema/verbs/enqueue.schema.json | 53 + .../schema/verbs/gather.schema.json | 188 +++ .../schema/verbs/google_s2s.schema.json | 42 + .../schema/verbs/hangup.schema.json | 36 + .../schema/verbs/leave.schema.json | 22 + .../schema/verbs/listen.schema.json | 127 ++ src/jambonz_sdk/schema/verbs/llm.schema.json | 44 + .../schema/verbs/message.schema.json | 82 + .../schema/verbs/openai_s2s.schema.json | 42 + .../schema/verbs/pause.schema.json | 36 + .../schema/verbs/pipeline.schema.json | 240 +++ src/jambonz_sdk/schema/verbs/play.schema.json | 96 ++ .../schema/verbs/redirect.schema.json | 34 + src/jambonz_sdk/schema/verbs/s2s.schema.json | 39 + src/jambonz_sdk/schema/verbs/say.schema.json | 107 ++ .../schema/verbs/sip-decline.schema.json | 58 + .../schema/verbs/sip-refer.schema.json | 58 + .../schema/verbs/sip-request.schema.json | 54 + .../schema/verbs/stream.schema.json | 103 ++ src/jambonz_sdk/schema/verbs/tag.schema.json | 41 + .../schema/verbs/transcribe.schema.json | 57 + .../schema/verbs/ultravox_s2s.schema.json | 41 + src/jambonz_sdk/specs.json | 1423 ----------------- src/jambonz_sdk/validator.py | 107 ++ src/jambonz_sdk/verb_builder.py | 217 ++- 101 files changed, 6046 insertions(+), 1585 deletions(-) create mode 100644 scripts/sync_schema.py delete mode 100644 scripts/sync_specs.py create mode 100644 src/jambonz_sdk/schema/callbacks/amd.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/base.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/call-status.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/conference-status.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/conference-wait.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/conference.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/dequeue.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/dial-dtmf.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/dial-hold.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/dial-refer.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/dial.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/enqueue-wait.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/enqueue.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/gather-partial.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/gather.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/listen.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/llm.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/message.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/pipeline-turn.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/play.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/session-new.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/session-reconnect.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/session-redirect.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/sip-refer-event.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/sip-refer.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/sip-request.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/transcribe-translation.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/transcribe.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/tts-streaming-event.schema.json create mode 100644 src/jambonz_sdk/schema/callbacks/verb-status.schema.json create mode 100644 src/jambonz_sdk/schema/components/actionHook.schema.json create mode 100644 src/jambonz_sdk/schema/components/actionHookDelayAction.schema.json create mode 100644 src/jambonz_sdk/schema/components/amd.schema.json create mode 100644 src/jambonz_sdk/schema/components/auth.schema.json create mode 100644 src/jambonz_sdk/schema/components/bidirectionalAudio.schema.json create mode 100644 src/jambonz_sdk/schema/components/fillerNoise.schema.json create mode 100644 src/jambonz_sdk/schema/components/llm-base.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-assemblyAiOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-awsOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-azureOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-cobaltOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-customOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-deepgramOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-elevenlabsOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-gladiaOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-googleOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-houndifyOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-ibmOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-nuanceOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-nvidiaOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-openaiOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-sonioxOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-speechmaticsOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer-verbioOptions.schema.json create mode 100644 src/jambonz_sdk/schema/components/recognizer.schema.json create mode 100644 src/jambonz_sdk/schema/components/synthesizer.schema.json create mode 100644 src/jambonz_sdk/schema/components/target.schema.json create mode 100644 src/jambonz_sdk/schema/components/vad.schema.json create mode 100644 src/jambonz_sdk/schema/jambonz-app.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/alert.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/answer.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/conference.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/config.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/deepgram_s2s.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/dequeue.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/dial.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/dialogflow.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/dtmf.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/dub.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/elevenlabs_s2s.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/enqueue.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/gather.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/google_s2s.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/hangup.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/leave.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/listen.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/llm.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/message.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/openai_s2s.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/pause.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/pipeline.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/play.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/redirect.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/s2s.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/say.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/sip-decline.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/sip-refer.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/sip-request.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/stream.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/tag.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/transcribe.schema.json create mode 100644 src/jambonz_sdk/schema/verbs/ultravox_s2s.schema.json delete mode 100644 src/jambonz_sdk/specs.json create mode 100644 src/jambonz_sdk/validator.py diff --git a/.gitignore b/.gitignore index d15759d..3c01e83 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ htmlcov/ .coverage *.egg mcp.json +.vscode/ diff --git a/CLAUDE.md b/CLAUDE.md index bef2441..d7c250b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -58,8 +58,8 @@ The SDK supports all 26+ jambonz verbs. Verb methods on VerbBuilder are **auto-g ### How verb generation works 1. `verb_registry.py` defines which spec entries are verbs, their Python method names, JSON verb names, and any synonym transforms -2. `verb_builder.py` loads `specs.json` at import time and generates a method for each registry entry -3. Each generated method has typed parameters, docstrings, and required-field documentation — all derived from the spec +2. `verb_builder.py` loads JSON Schema files from `schema/verbs/` at import time and generates a method for each registry entry +3. Each generated method has typed parameters, docstrings, and required-field documentation — all derived from the schema 4. To add a new verb: add one `VerbDef` entry in `verb_registry.py` — no other changes needed ### Verb List @@ -80,24 +80,24 @@ Utility: `config`, `tag`, `dtmf`, `dub`, `message`, `alert`, `answer`, `leave` SIP verbs use underscores: `sip_decline()`, `sip_request()`, `sip_refer()` (maps to `sip:decline`, `sip:request`, `sip:refer` in JSON). -## specs.json Management +## JSON Schema Management -The SDK bundles `specs.json` from `@jambonz/verb-specifications` (npm package / GitHub repo). -The file lives at `src/jambonz_sdk/specs.json` and is included in the wheel. +The SDK bundles JSON Schema files from `@jambonz/schema` (npm package / GitHub repo). +Schema files live at `src/jambonz_sdk/schema/` and are included in the wheel. -To update when the upstream spec changes: +To update when the upstream schema changes: ```bash -# From local sibling clone (default) -python scripts/sync_specs.py +# Download the pinned version +python scripts/sync_schema.py -# From GitHub main branch -python scripts/sync_specs.py --github +# Download a specific version +python scripts/sync_schema.py v0.1.1 -# From a specific file -python scripts/sync_specs.py /path/to/specs.json +# Copy from a local directory +python scripts/sync_schema.py --local /path/to/schema ``` -Source: https://github.com/jambonz/verb-specifications +Source: https://github.com/jambonz/schema ## AI Agent Support diff --git a/pyproject.toml b/pyproject.toml index 09e7fdc..e904e20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ ] dependencies = [ "aiohttp>=3.9", + "jsonschema>=4.20", ] [project.urls] @@ -46,7 +47,7 @@ dev = [ packages = ["src/jambonz_sdk"] [tool.hatch.build] -include = ["src/jambonz_sdk/**/*.py", "src/jambonz_sdk/**/*.pyi", "src/jambonz_sdk/specs.json"] +include = ["src/jambonz_sdk/**/*.py", "src/jambonz_sdk/**/*.pyi", "src/jambonz_sdk/schema/**/*.json"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/scripts/sync_schema.py b/scripts/sync_schema.py new file mode 100644 index 0000000..40967a4 --- /dev/null +++ b/scripts/sync_schema.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Sync JSON Schema files from the @jambonz/schema repo. + +Downloads verb, component, and callback schemas and bundles them into this +package. Run this whenever the upstream schema changes. + +Usage: + # Download the pinned version (SCHEMA_VERSION below) + python scripts/sync_schema.py + + # Download a specific version tag + python scripts/sync_schema.py v0.1.1 + + # Copy from a local directory instead + python scripts/sync_schema.py --local /path/to/schema +""" + +import json +import shutil +import sys +import urllib.request +from pathlib import Path + +# ── Pin the schema version here ────────────────────────────────────── +SCHEMA_VERSION = "v0.1.1" +# ──────────────────────────────────────────────────────────────────── + +DEST = Path(__file__).resolve().parent.parent / "src" / "jambonz_sdk" / "schema" +GITHUB_RAW = "https://raw.githubusercontent.com/jambonz/schema/{version}" + +SUBDIRS = ["verbs", "components", "callbacks"] + + +def download_file(url: str, dest: Path) -> None: + urllib.request.urlretrieve(url, dest) + + +def sync_from_github(version: str) -> None: + base_url = GITHUB_RAW.format(version=version) + + # Ensure destination exists + DEST.mkdir(parents=True, exist_ok=True) + + # Download root schema + root_schema = "jambonz-app.schema.json" + print(f"Downloading {root_schema}...") + download_file(f"{base_url}/{root_schema}", DEST / root_schema) + + # Download each subdirectory's index and files + total = 0 + for subdir in SUBDIRS: + subdir_path = DEST / subdir + subdir_path.mkdir(exist_ok=True) + + # GitHub doesn't have a directory listing API on raw, so we fetch + # the known file list from the root schema's $ref entries or use + # the GitHub API + api_url = ( + f"https://api.github.com/repos/jambonz/schema/contents/{subdir}" + f"?ref={version}" + ) + print(f"Fetching {subdir}/ file list...") + req = urllib.request.Request(api_url, headers={"Accept": "application/json"}) + with urllib.request.urlopen(req) as resp: + files = json.loads(resp.read()) + + schema_files = [f["name"] for f in files if f["name"].endswith(".schema.json")] + for fname in schema_files: + download_file(f"{base_url}/{subdir}/{fname}", subdir_path / fname) + total += 1 + + print(f"Downloaded {total} schema files + root schema → {DEST} (version {version})") + + +def sync_from_local(src: Path) -> None: + if not src.is_dir(): + print(f"Error: {src} is not a directory") + sys.exit(1) + + # Clean and recreate destination + if DEST.exists(): + shutil.rmtree(DEST) + DEST.mkdir(parents=True, exist_ok=True) + + # Copy root schema + root_schema = src / "jambonz-app.schema.json" + if root_schema.exists(): + shutil.copy2(root_schema, DEST / "jambonz-app.schema.json") + + # Copy subdirectories + total = 0 + for subdir in SUBDIRS: + src_dir = src / subdir + if src_dir.is_dir(): + dest_dir = DEST / subdir + dest_dir.mkdir(exist_ok=True) + for f in src_dir.glob("*.schema.json"): + shutil.copy2(f, dest_dir / f.name) + total += 1 + + print(f"Copied {total} schema files + root schema from {src} → {DEST}") + + +def main() -> None: + if len(sys.argv) > 1: + arg = sys.argv[1] + if arg == "--local": + if len(sys.argv) < 3: + print("Usage: python scripts/sync_schema.py --local /path/to/schema") + sys.exit(1) + sync_from_local(Path(sys.argv[2])) + elif arg.startswith("v"): + sync_from_github(arg) + else: + print(f"Unknown argument: {arg}") + print("Usage:") + print(" python scripts/sync_schema.py # download pinned version") + print(" python scripts/sync_schema.py v0.1.1 # download specific version") + print(" python scripts/sync_schema.py --local /path/to/schema") + sys.exit(1) + else: + sync_from_github(SCHEMA_VERSION) + + +if __name__ == "__main__": + main() diff --git a/scripts/sync_specs.py b/scripts/sync_specs.py deleted file mode 100644 index e0389f5..0000000 --- a/scripts/sync_specs.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python3 -"""Sync specs.json from the @jambonz/verb-specifications repo. - -Downloads specs.json for a specific version tag and bundles it into this -package. Run this whenever the upstream spec changes. - -Usage: - # Download the pinned version (SPECS_VERSION below) - python scripts/sync_specs.py - - # Download a specific version - python scripts/sync_specs.py v0.1.10 - - # Copy from a local file instead - python scripts/sync_specs.py --local /path/to/specs.json -""" - -import json -import shutil -import sys -import urllib.request -from pathlib import Path - -# ── Pin the specs version here ────────────────────────────────────── -SPECS_VERSION = "v0.1.11" -# ──────────────────────────────────────────────────────────────────── - -DEST = Path(__file__).resolve().parent.parent / "src" / "jambonz_sdk" / "specs.json" -GITHUB_URL_TEMPLATE = ( - "https://raw.githubusercontent.com/jambonz/verb-specifications/{version}/specs.json" -) - - -def sync_from_github(version: str) -> None: - url = GITHUB_URL_TEMPLATE.format(version=version) - print(f"Downloading specs.json {version} from {url}") - urllib.request.urlretrieve(url, DEST) - - # Verify it's valid JSON - with DEST.open() as f: - specs = json.load(f) - verb_count = len(specs) - print(f"Downloaded → {DEST} ({verb_count} entries, version {version})") - - -def sync_from_file(src: Path) -> None: - if not src.is_file(): - print(f"Error: {src} not found") - sys.exit(1) - shutil.copy2(src, DEST) - with DEST.open() as f: - specs = json.load(f) - print(f"Copied {src} → {DEST} ({len(specs)} entries)") - - -def main() -> None: - if len(sys.argv) > 1: - arg = sys.argv[1] - if arg == "--local": - if len(sys.argv) < 3: - print("Usage: python scripts/sync_specs.py --local /path/to/specs.json") - sys.exit(1) - sync_from_file(Path(sys.argv[2])) - elif arg.startswith("v"): - sync_from_github(arg) - else: - print(f"Unknown argument: {arg}") - print("Usage:") - print(" python scripts/sync_specs.py # download pinned version") - print(" python scripts/sync_specs.py v0.1.10 # download specific version") - print(" python scripts/sync_specs.py --local /path/to/specs.json") - sys.exit(1) - else: - sync_from_github(SPECS_VERSION) - - -if __name__ == "__main__": - main() diff --git a/src/jambonz_sdk/__init__.py b/src/jambonz_sdk/__init__.py index 3d94b48..875b655 100644 --- a/src/jambonz_sdk/__init__.py +++ b/src/jambonz_sdk/__init__.py @@ -39,11 +39,13 @@ def handle_session(session): # Re-export main classes for convenience from jambonz_sdk.client import JambonzClient +from jambonz_sdk.validator import JambonzValidator from jambonz_sdk.verb_builder import VerbBuilder from jambonz_sdk.webhook import WebhookResponse __all__ = [ "JambonzClient", + "JambonzValidator", "VerbBuilder", "WebhookResponse", "__version__", diff --git a/src/jambonz_sdk/schema/callbacks/amd.schema.json b/src/jambonz_sdk/schema/callbacks/amd.schema.json new file mode 100644 index 0000000..0609548 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/amd.schema.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/amd", + "title": "AMD ActionHook Payload", + "description": "Payload sent to the AMD actionHook when an answering machine detection event occurs. Multiple events may fire during a single call (e.g. amd_machine_detected followed by amd_machine_stopped_speaking or amd_tone_detected).", + "allOf": [{ "$ref": "base" }], + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The AMD event type. IMPORTANT: This field is 'type', NOT 'amd_type'.", + "enum": [ + "amd_human_detected", + "amd_machine_detected", + "amd_no_speech_detected", + "amd_decision_timeout", + "amd_machine_stopped_speaking", + "amd_tone_detected", + "amd_tone_timeout", + "amd_error", + "amd_stopped" + ] + }, + "reason": { + "type": "string", + "description": "Reason for the detection result (e.g. 'short greeting', 'long greeting', 'hint', 'digit count'). Present on amd_human_detected and amd_machine_detected events." + }, + "greeting": { + "type": "string", + "description": "The transcribed greeting text. Present on amd_human_detected and amd_machine_detected events." + }, + "hint": { + "type": "string", + "description": "The voicemail hint that matched, if detection was triggered by hint matching." + }, + "language": { + "type": "string", + "description": "Language code from the transcription (e.g. 'en-US'). Present on amd_human_detected and amd_machine_detected events." + }, + "frequency": { + "type": "number", + "description": "Frequency of the detected beep in Hz. Present on amd_tone_detected events." + }, + "variance": { + "type": "number", + "description": "Frequency variance of the detected beep. Present on amd_tone_detected events." + } + }, + "required": ["type"] +} diff --git a/src/jambonz_sdk/schema/callbacks/base.schema.json b/src/jambonz_sdk/schema/callbacks/base.schema.json new file mode 100644 index 0000000..6457daa --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/base.schema.json @@ -0,0 +1,29 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/base", + "title": "ActionHook Base Payload", + "description": "Common fields present in every actionHook callback payload. All verb-specific callback schemas extend this base.", + "type": "object", + "properties": { + "call_sid": { "type": "string", "description": "Unique identifier for this call." }, + "account_sid": { "type": "string", "description": "Account identifier." }, + "application_sid": { "type": "string", "description": "Application identifier." }, + "direction": { "type": "string", "enum": ["inbound", "outbound"], "description": "Call direction." }, + "from": { "type": "string", "description": "Caller phone number or SIP URI." }, + "to": { "type": "string", "description": "Called phone number or SIP URI." }, + "call_id": { "type": "string", "description": "SIP Call-ID." }, + "sbc_callid": { "type": "string", "description": "SBC-level Call-ID." }, + "call_status": { + "type": "string", + "enum": ["trying", "ringing", "early-media", "in-progress", "completed", "failed", "busy", "no-answer", "queued"], + "description": "Current call state." + }, + "sip_status": { "type": "integer", "description": "SIP response code (e.g. 200, 486)." }, + "sip_reason": { "type": "string", "description": "SIP reason phrase (e.g. 'OK', 'Busy Here')." }, + "trace_id": { "type": "string", "description": "Distributed tracing identifier for correlating logs across jambonz components." }, + "originating_sip_ip": { "type": "string", "description": "IP address of the originating SIP trunk." }, + "originating_sip_trunk_name": { "type": "string", "description": "Name of the originating SIP trunk as configured in jambonz." }, + "api_base_url": { "type": "string", "description": "jambonz REST API base URL. Use this for mid-call control via the REST API." } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/call-status.schema.json b/src/jambonz_sdk/schema/callbacks/call-status.schema.json new file mode 100644 index 0000000..8f81897 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/call-status.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/call-status", + "title": "Call Status Webhook Payload", + "description": "Payload sent to the call status webhook URL whenever the call state changes (e.g. trying, in-progress, completed). The status webhook is configured at the application level in jambonz. Multiple status events are sent over the life of a call. The final event (completed or failed) includes additional fields like duration and termination cause.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "call_termination_by": { + "type": "string", + "enum": ["caller", "jambonz"], + "description": "Who terminated the call. 'caller' if the remote party hung up, 'jambonz' if the call was ended by the application (e.g. hangup verb, REST API). Present only on the final status event (completed/failed)." + }, + "duration": { + "type": "integer", + "description": "Call duration in seconds. Present only on the final status event (completed/failed)." + } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/conference-status.schema.json b/src/jambonz_sdk/schema/callbacks/conference-status.schema.json new file mode 100644 index 0000000..3bded96 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/conference-status.schema.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/conference-status", + "title": "Conference StatusHook Payload", + "description": "Payload sent to the conference statusHook when a conference event occurs.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "event": { + "type": "string", + "enum": ["start", "end", "join", "leave", "start-talking", "stop-talking"], + "description": "The conference event that occurred." + }, + "conference_sid": { "type": "string", "description": "Conference identifier (format: conf::)." }, + "friendly_name": { "type": "string", "description": "The conference name as specified in the conference verb." }, + "duration": { "type": "number", "description": "Time in seconds since the conference started." }, + "members": { "type": "integer", "description": "Current number of participants in the conference." }, + "time": { "type": "string", "format": "date-time", "description": "ISO 8601 timestamp of when the event occurred." } + }, + "required": ["event", "conference_sid"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/conference-wait.schema.json b/src/jambonz_sdk/schema/callbacks/conference-wait.schema.json new file mode 100644 index 0000000..3a82a8a --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/conference-wait.schema.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/conference-wait", + "title": "Conference WaitHook Payload", + "description": "Payload sent to the conference waitHook while a participant is waiting for the conference to start. The response should contain an array of say, play, and/or pause verbs to play while waiting.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/conference.schema.json b/src/jambonz_sdk/schema/callbacks/conference.schema.json new file mode 100644 index 0000000..8f2c0b8 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/conference.schema.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/conference", + "title": "Conference ActionHook Payload", + "description": "Payload sent to the conference verb's actionHook when the participant leaves or the conference ends. Contains only the base call info.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/dequeue.schema.json b/src/jambonz_sdk/schema/callbacks/dequeue.schema.json new file mode 100644 index 0000000..66f2c46 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/dequeue.schema.json @@ -0,0 +1,19 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/dequeue", + "title": "Dequeue ActionHook Payload", + "description": "Payload sent to the dequeue verb's actionHook when the dequeue operation completes.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "dequeue_result": { + "type": "string", + "enum": ["complete", "timeout", "hangup"], + "description": "Outcome of the dequeue. 'complete' — successfully dequeued and bridged; 'timeout' — no queued caller found; 'hangup' — call hung up during bridge." + } + }, + "required": ["dequeue_result"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/dial-dtmf.schema.json b/src/jambonz_sdk/schema/callbacks/dial-dtmf.schema.json new file mode 100644 index 0000000..64160d7 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/dial-dtmf.schema.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/dial-dtmf", + "title": "Dial DtmfHook Payload", + "description": "Payload sent to the dial dtmfHook when DTMF digits matching the configured pattern are detected during a dial.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "dtmf": { + "type": "string", + "description": "The DTMF digit sequence that matched the configured dtmfCapture pattern." + } + }, + "required": ["dtmf"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/dial-hold.schema.json b/src/jambonz_sdk/schema/callbacks/dial-hold.schema.json new file mode 100644 index 0000000..ba2589a --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/dial-hold.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/dial-hold", + "title": "Dial OnHoldHook Payload", + "description": "Payload sent to the dial onHoldHook when the remote party places the call on hold. The response should contain say, play, and/or pause verbs to play as hold music. The hook is called repeatedly until the hold ends.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "hold_detail": { + "type": "object", + "description": "Details of the hold event.", + "properties": { + "from": { "type": "string", "description": "SIP From header value." }, + "to": { "type": "string", "description": "SIP To header value." } + } + } + }, + "required": ["hold_detail"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/dial-refer.schema.json b/src/jambonz_sdk/schema/callbacks/dial-refer.schema.json new file mode 100644 index 0000000..69c56fd --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/dial-refer.schema.json @@ -0,0 +1,28 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/dial-refer", + "title": "Dial ReferHook Payload", + "description": "Payload sent to the dial referHook when a SIP REFER is received during an active dial. The response can contain new verbs to execute.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "refer_details": { + "type": "object", + "description": "Details of the SIP REFER request. Any custom X-* headers on the REFER are also included as snake_cased properties (e.g. X-Override-Number becomes x_override_number).", + "properties": { + "sip_refer_to": { "type": "string", "description": "Full SIP Refer-To header value." }, + "refer_to_user": { "type": "string", "description": "User part of the Refer-To URI (phone number or SIP user)." }, + "sip_referred_by": { "type": "string", "description": "SIP Referred-By header value, if present." }, + "referred_by_user": { "type": "string", "description": "User part of the Referred-By URI, if present." }, + "sip_user_agent": { "type": "string", "description": "User-Agent header from the REFER request." }, + "referring_call_sid": { "type": "string", "description": "Call SID of the leg that sent the REFER." }, + "referred_call_sid": { "type": "string", "description": "Call SID of the leg being referred." } + }, + "additionalProperties": true + } + }, + "required": ["refer_details"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/dial.schema.json b/src/jambonz_sdk/schema/callbacks/dial.schema.json new file mode 100644 index 0000000..53a1236 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/dial.schema.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/dial", + "title": "Dial ActionHook Payload", + "description": "Payload sent to the actionHook when a dial verb completes, either because the dialed party hung up, the call was not answered, or an error occurred.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "dial_call_status": { + "type": "string", + "enum": ["completed", "failed", "busy", "no-answer", "trying", "ringing", "early-media", "in-progress"], + "description": "Final status of the outbound (dialed) call leg." + }, + "dial_sip_status": { + "type": "integer", + "description": "SIP response code from the dialed party (e.g. 200, 486, 487).", + "examples": [200, 486, 487] + }, + "dial_call_sid": { + "type": "string", + "description": "Call SID of the outbound (dialed) call leg." + }, + "dial_sbc_callid": { + "type": "string", + "description": "SBC-level Call-ID for the outbound (dialed) call leg." + } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/enqueue-wait.schema.json b/src/jambonz_sdk/schema/callbacks/enqueue-wait.schema.json new file mode 100644 index 0000000..b9d46ec --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/enqueue-wait.schema.json @@ -0,0 +1,17 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/enqueue-wait", + "title": "Enqueue WaitHook Payload", + "description": "Payload sent to the enqueue waitHook while a caller is waiting in the queue. The response should contain an array of say, play, pause, and/or leave verbs. Note: this payload is sparse — it contains queue-specific fields plus call_sid and call_id, but NOT the full base payload fields.", + "type": "object", + "properties": { + "queue_sid": { "type": "string", "description": "Queue identifier in the format 'queue:{account_sid}:{queue_name}'." }, + "queue_time": { "type": "integer", "description": "Time in seconds the caller has been waiting in the queue." }, + "queue_size": { "type": "integer", "description": "Total number of callers currently in the queue." }, + "queue_position": { "type": "integer", "description": "Caller's current position in the queue (0-based)." }, + "call_sid": { "type": "string", "description": "Unique identifier for this call." }, + "call_id": { "type": "string", "description": "SIP Call-ID." } + }, + "required": ["queue_sid", "queue_time"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/enqueue.schema.json b/src/jambonz_sdk/schema/callbacks/enqueue.schema.json new file mode 100644 index 0000000..7356e69 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/enqueue.schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/enqueue", + "title": "Enqueue ActionHook Payload", + "description": "Payload sent to the enqueue actionHook when the enqueue verb completes — i.e. the call was bridged, abandoned, left the queue, or an error occurred.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "queue_sid": { + "type": "string", + "description": "Queue identifier in the format 'queue:{account_sid}:{queue_name}'." + }, + "queue_time": { + "type": "integer", + "description": "Time in seconds the caller spent waiting in the queue." + }, + "queue_result": { + "type": "string", + "enum": ["bridged", "hangup", "leave", "error"], + "description": "Outcome of the enqueue. 'bridged' — call was dequeued and connected; 'hangup' — caller hung up while waiting; 'leave' — caller executed a leave verb from the waitHook; 'error' — an error occurred during bridging." + } + }, + "required": ["queue_sid", "queue_time", "queue_result"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/gather-partial.schema.json b/src/jambonz_sdk/schema/callbacks/gather-partial.schema.json new file mode 100644 index 0000000..b110537 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/gather-partial.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/gather-partial", + "title": "Gather Partial Transcript Payload", + "description": "Payload sent to the partialResultHook during a gather verb with speech input. Delivers interim (partial) speech recognition results as they arrive, before the gather completes. This hook is informational — the response is ignored and does not replace the verb stack. Note: the base fields use slightly different names than the actionHook payload (e.g. local_sip_address instead of fs_sip_address).", + "type": "object", + "properties": { + "call_sid": { "type": "string", "description": "Unique identifier for this call." }, + "account_sid": { "type": "string", "description": "Account identifier." }, + "application_sid": { "type": "string", "description": "Application identifier." }, + "direction": { "type": "string", "enum": ["inbound", "outbound"], "description": "Call direction." }, + "from": { "type": "string", "description": "Caller phone number or SIP URI." }, + "to": { "type": "string", "description": "Called phone number or SIP URI." }, + "call_id": { "type": "string", "description": "SIP Call-ID." }, + "sbc_callid": { "type": "string", "description": "SBC-level Call-ID." }, + "call_status": { "type": "string", "description": "Current call state." }, + "sip_status": { "type": "integer", "description": "SIP response code." }, + "sip_reason": { "type": "string", "description": "SIP reason phrase." }, + "trace_id": { "type": "string", "description": "Distributed tracing identifier." }, + "b3": { "type": "string", "description": "B3 trace propagation header." }, + "caller_name": { "type": "string", "description": "Caller display name from SIP, if available." }, + "originating_sip_ip": { "type": "string", "description": "IP address of the originating SIP trunk." }, + "originating_sip_trunk_name": { "type": "string", "description": "Name of the originating SIP trunk." }, + "speech": { + "type": "object", + "description": "Interim speech recognition results.", + "properties": { + "language_code": { "type": "string", "description": "Language code used for recognition (e.g. 'en-US')." }, + "channel_tag": { "type": "integer", "description": "Audio channel number." }, + "is_final": { "type": "boolean", "description": "Always false for partial results." }, + "alternatives": { + "type": "array", + "items": { + "type": "object", + "properties": { + "transcript": { "type": "string", "description": "The partial transcript recognized so far." }, + "confidence": { "type": "number", "description": "Confidence score between 0 and 1." } + } + }, + "description": "Array of recognition alternatives, ordered by confidence." + }, + "vendor": { + "type": "object", + "description": "Vendor-specific STT data. Structure varies by provider.", + "properties": { + "name": { "type": "string", "description": "STT vendor name (e.g. 'deepgram', 'google', 'aws')." }, + "evt": { "type": "object", "description": "Raw vendor-specific event payload.", "additionalProperties": true } + } + } + } + } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/gather.schema.json b/src/jambonz_sdk/schema/callbacks/gather.schema.json new file mode 100644 index 0000000..a3e151c --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/gather.schema.json @@ -0,0 +1,60 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/gather", + "title": "Gather ActionHook Payload", + "description": "Payload sent to the actionHook when a gather verb completes, either due to speech detected, DTMF detected, or timeout.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "reason": { + "type": "string", + "enum": ["speechDetected", "dtmfDetected", "timeout", "error", "stt-low-confidence"], + "description": "The reason the gather completed." + }, + "speech": { + "type": "object", + "description": "Speech recognition results, present when reason is speechDetected.", + "properties": { + "language_code": { "type": "string", "description": "Language code used for recognition (e.g. 'en-US')." }, + "channel_tag": { "type": "integer", "description": "Audio channel number." }, + "is_final": { "type": "boolean", "description": "Whether this is a final (not interim) recognition result." }, + "alternatives": { + "type": "array", + "items": { + "type": "object", + "properties": { + "transcript": { "type": "string", "description": "The recognized transcript." }, + "confidence": { "type": "number", "description": "Confidence score between 0 and 1." } + } + }, + "description": "Array of recognition alternatives, ordered by confidence." + }, + "vendor": { + "type": "object", + "description": "Vendor-specific STT data. Structure varies by provider.", + "properties": { + "name": { "type": "string", "description": "STT vendor name (e.g. 'deepgram', 'google', 'aws')." }, + "evt": { + "description": "Raw vendor-specific event payload. A single object for simple utterances, or an array of objects when jambonz assembles the final transcript from multiple STT segments. Contains provider-specific fields like word-level timestamps, model info, etc.", + "oneOf": [ + { "type": "object", "additionalProperties": true }, + { "type": "array", "items": { "type": "object", "additionalProperties": true } } + ] + } + } + } + } + }, + "digits": { + "type": "string", + "description": "DTMF digits collected, present when reason is dtmfDetected." + }, + "details": { + "type": "object", + "description": "Error information if reason is error." + } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/listen.schema.json b/src/jambonz_sdk/schema/callbacks/listen.schema.json new file mode 100644 index 0000000..847d25b --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/listen.schema.json @@ -0,0 +1,21 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/listen", + "title": "Listen ActionHook Payload", + "description": "Payload sent to the listen verb's actionHook when the listen verb completes.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "dial_call_duration": { + "type": "integer", + "description": "Duration of the listen session in seconds, present if recording was active." + }, + "digits": { + "type": "string", + "description": "DTMF digit that ended the listen, present if finishOnKey was configured and pressed." + } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/llm.schema.json b/src/jambonz_sdk/schema/callbacks/llm.schema.json new file mode 100644 index 0000000..67e73a5 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/llm.schema.json @@ -0,0 +1,30 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/llm", + "title": "LLM ActionHook Payload", + "description": "Payload sent to the llm verb's actionHook when the LLM session ends. Applies to all LLM providers (OpenAI, Google, ElevenLabs, Ultravox, and generic voice agents).", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "completion_reason": { + "type": "string", + "enum": [ + "normal conversation end", + "connection failure", + "disconnect from remote end", + "server failure", + "server error", + "client error calling function", + "client error calling mcp function" + ], + "description": "Reason the LLM session ended." + }, + "error": { + "description": "Error details, present when completion_reason indicates a failure." + } + }, + "required": ["completion_reason"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/message.schema.json b/src/jambonz_sdk/schema/callbacks/message.schema.json new file mode 100644 index 0000000..e94aa5f --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/message.schema.json @@ -0,0 +1,35 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/message", + "title": "Message ActionHook Payload", + "description": "Payload sent to the message verb's actionHook with the delivery status of an outbound SMS.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "message_sid": { + "type": "string", + "description": "Unique identifier for the message." + }, + "message_status": { + "type": "string", + "enum": ["success", "failure", "no carriers", "smpp configuration error", "system error"], + "description": "Delivery status of the message." + }, + "carrier": { + "type": "string", + "description": "Name of the carrier used to send the message. Present on success or carrier-level failure." + }, + "carrier_message_id": { + "type": "string", + "description": "Message ID returned by the carrier. Present on success or carrier-level failure." + }, + "message_failure_reason": { + "type": "string", + "description": "Reason for failure. Present when message_status is 'failure' or 'system error'." + } + }, + "required": ["message_sid", "message_status"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/pipeline-turn.schema.json b/src/jambonz_sdk/schema/callbacks/pipeline-turn.schema.json new file mode 100644 index 0000000..10829fe --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/pipeline-turn.schema.json @@ -0,0 +1,109 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/pipeline-turn", + "title": "Pipeline EventHook Events", + "description": "Events sent to the pipeline verb's eventHook during a conversation. These are sent as 'pipeline:event' messages over the WebSocket connection.", + "type": "object", + "oneOf": [ + { + "properties": { + "type": { + "const": "turn_end", + "description": "Sent at the end of each conversational turn." + }, + "transcript": { + "type": "string", + "description": "The user's final speech-to-text transcript for this turn." + }, + "response": { + "type": "string", + "description": "The assistant's text response for this turn. May be trimmed to what was actually spoken if the turn was interrupted and alignment tracking is enabled." + }, + "interrupted": { + "type": "boolean", + "description": "True if the user interrupted the assistant before it finished speaking.", + "default": false + }, + "latency": { + "type": "object", + "description": "Latency metrics for this turn, all in milliseconds. Fields are absent when not applicable.", + "properties": { + "transcriber_latency": { + "type": "integer", + "description": "STT processing latency: time from user stops talking until final transcript received, in milliseconds." + }, + "turn_detection_latency": { + "type": "integer", + "description": "Additional wait after final transcript for end-of-turn detection, in milliseconds. Absent when EOT fires before or with the transcript." + }, + "model_latency": { + "type": "integer", + "description": "LLM latency: time spent waiting for the first LLM token after the system is ready to prompt, in milliseconds. Absent on a preflight hit." + }, + "voice_latency": { + "type": "integer", + "description": "TTS engine latency: time from first text sent to the TTS engine until first audio received back, in milliseconds." + }, + "preflight": { + "type": "object", + "description": "Early generation (preflight) metrics. Only present when earlyGeneration is enabled.", + "properties": { + "result": { + "type": "string", + "enum": ["hit", "miss", "pending"], + "description": "Whether the speculative preflight transcript matched the final transcript ('hit'), did not match ('miss'), or was still in progress ('pending')." + }, + "tokens": { + "type": "integer", + "description": "Number of preflight tokens that were buffered. Only present on a 'hit'." + } + }, + "additionalProperties": false + } + }, + "additionalProperties": false + } + }, + "required": ["type", "transcript", "response", "interrupted", "latency"], + "additionalProperties": false + }, + { + "properties": { + "type": { + "const": "user_transcript", + "description": "Sent when the user's final transcript is available and the system is proceeding to prompt the LLM. This indicates the end of the user's speech input for the current turn." + }, + "transcript": { + "type": "string", + "description": "The user's final speech-to-text transcript." + } + }, + "required": ["type", "transcript"], + "additionalProperties": false + }, + { + "properties": { + "type": { + "const": "agent_response", + "description": "Sent when the LLM has finished generating its response for the current turn. Contains the complete response text." + }, + "response": { + "type": "string", + "description": "The assistant's complete text response." + } + }, + "required": ["type", "response"], + "additionalProperties": false + }, + { + "properties": { + "type": { + "const": "user_interruption", + "description": "Sent when the user barges in (interrupts) while the assistant is speaking. This event has no additional data." + } + }, + "required": ["type"], + "additionalProperties": false + } + ] +} diff --git a/src/jambonz_sdk/schema/callbacks/play.schema.json b/src/jambonz_sdk/schema/callbacks/play.schema.json new file mode 100644 index 0000000..30734d0 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/play.schema.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/play", + "title": "Play ActionHook Payload", + "description": "Payload sent to the play verb's actionHook when playback completes or fails.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "reason": { + "type": "string", + "enum": ["playCompleted", "playFailed"], + "description": "Outcome of playback. 'playCompleted' on success, 'playFailed' on file-not-found or other error." + }, + "playback_seconds": { + "type": "integer", + "description": "Total playback duration in seconds. Present when reason is 'playCompleted'." + }, + "playback_milliseconds": { + "type": "integer", + "description": "Total playback duration in milliseconds. Present when reason is 'playCompleted'." + }, + "playback_last_offset_pos": { + "type": "string", + "description": "Last offset position in the audio stream. Present when reason is 'playCompleted'." + }, + "status": { + "type": "string", + "enum": ["fail"], + "description": "Set to 'fail' when playback failed (e.g. file not found)." + } + }, + "required": ["reason"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/session-new.schema.json b/src/jambonz_sdk/schema/callbacks/session-new.schema.json new file mode 100644 index 0000000..514913f --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/session-new.schema.json @@ -0,0 +1,143 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/session-new", + "title": "Session:new Payload", + "description": "Payload delivered when a new call arrives. For webhook apps this is the initial POST body; for WebSocket apps it is the `data` property of the first `session:new` message. In the @jambonz/sdk WebSocket transport, this data is available as `session.data`.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "caller_name": { + "type": "string", + "description": "Caller display name from the SIP From header." + }, + "caller_id": { + "type": "string", + "description": "Caller ID value (phone number or SIP user)." + }, + "service_provider_sid": { + "type": "string", + "description": "Service provider identifier, if applicable." + }, + "parent_call_sid": { + "type": "string", + "description": "Call SID of the parent call, present when this session was created via the REST API dial (adulting) or when an outbound call leg is promoted to its own session." + }, + "fs_sip_address": { + "type": "string", + "description": "Internal SIP address of the FreeSWITCH media server handling this call." + }, + "fs_public_ip": { + "type": "string", + "description": "Public IP address of the FreeSWITCH media server, if available." + }, + "sip": { + "type": "object", + "description": "The raw SIP INVITE message (drachtio SipRequest object, serialized). Only present for WebSocket and HTTP POST transports (not for HTTP GET). See https://drachtio.org/api#sip-request for the full drachtio SipRequest API.", + "properties": { + "headers": { + "type": "object", + "description": "SIP headers from the INVITE as key-value pairs. Custom headers (X-* headers) from the originating carrier or SIP client are included here. Standard SIP headers like From, To, Contact, Call-ID, Via, etc. are also present.", + "additionalProperties": { "type": "string" }, + "examples": [ + { + "X-Authenticated-User": "retell@sip.example.com", + "X-Override-Number": "+15551234567", + "From": ";tag=abc123", + "To": "", + "Call-ID": "abc123@10.0.0.1" + } + ] + }, + "body": { + "type": "string", + "description": "SIP message body (typically SDP for INVITE)." + }, + "method": { + "type": "string", + "description": "SIP method (always 'INVITE' for session:new)." + }, + "uri": { + "type": "string", + "description": "Request-URI from the SIP INVITE." + }, + "calledNumber": { + "type": "string", + "description": "Phone number extracted from the Request-URI." + }, + "callingNumber": { + "type": "string", + "description": "Calling phone number extracted from P-Asserted-Identity or From header." + }, + "type": { + "type": "string", + "enum": ["request"], + "description": "Always 'request' for an incoming INVITE." + }, + "source": { + "type": "string", + "enum": ["network", "application"], + "description": "Origin of the SIP message." + }, + "source_address": { + "type": "string", + "description": "IP address of the sender." + }, + "source_port": { + "type": ["string", "integer"], + "description": "Port of the sender." + }, + "protocol": { + "type": "string", + "description": "Transport protocol (e.g. 'udp', 'tcp', 'tls', 'wss')." + }, + "payload": { + "type": "array", + "description": "Message body organized into parts; useful for multipart content." + } + } + }, + "env_vars": { + "type": "object", + "description": "Application environment variables configured in the jambonz portal. These are the key-value pairs defined in the application's environment variable schema. In the @jambonz/sdk, access via `session.data.env_vars`.", + "additionalProperties": true, + "examples": [ + { + "RETELL_TRUNK_NAME": "retell-hosted", + "PSTN_TRUNK_NAME": "my-carrier", + "DEFAULT_COUNTRY": "US" + } + ] + }, + "defaults": { + "type": "object", + "description": "Default speech settings for the account (synthesizer and recognizer defaults).", + "properties": { + "synthesizer": { + "type": "object", + "description": "Default TTS settings.", + "properties": { + "vendor": { "type": "string" }, + "language": { "type": "string" }, + "voice": { "type": "string" } + } + }, + "recognizer": { + "type": "object", + "description": "Default STT settings.", + "properties": { + "vendor": { "type": "string" }, + "language": { "type": "string" } + } + } + } + }, + "customerData": { + "type": "object", + "description": "Custom data attached to the call via the REST API when creating an outbound call. Preserved as-is (not snake-cased).", + "additionalProperties": true + } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/session-reconnect.schema.json b/src/jambonz_sdk/schema/callbacks/session-reconnect.schema.json new file mode 100644 index 0000000..8f9a69a --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/session-reconnect.schema.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/session-reconnect", + "title": "Session:reconnect Payload", + "description": "Payload delivered when a WebSocket client reconnects after a disconnection. The payload is identical to the original session:new payload — it is cached from the initial session setup and replayed on reconnect. This allows the application to restore state without needing to re-fetch call details.", + "allOf": [ + { "$ref": "session-new" } + ] +} diff --git a/src/jambonz_sdk/schema/callbacks/session-redirect.schema.json b/src/jambonz_sdk/schema/callbacks/session-redirect.schema.json new file mode 100644 index 0000000..81dd184 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/session-redirect.schema.json @@ -0,0 +1,38 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/session-redirect", + "title": "Session:redirect Payload", + "description": "Payload delivered when a redirect verb transfers the call to a new application or WebSocket endpoint. Contains only the current call state information — unlike session:new, it does not include defaults, env_vars, sip, or service_provider_sid.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "caller_name": { + "type": "string", + "description": "Caller display name from the SIP From header." + }, + "caller_id": { + "type": "string", + "description": "Caller ID value (phone number or SIP user)." + }, + "parent_call_sid": { + "type": "string", + "description": "Call SID of the parent call, if applicable." + }, + "fs_sip_address": { + "type": "string", + "description": "Internal SIP address of the FreeSWITCH media server handling this call." + }, + "fs_public_ip": { + "type": "string", + "description": "Public IP address of the FreeSWITCH media server, if available." + }, + "customerData": { + "type": "object", + "description": "Custom data attached to the call via the REST API. Preserved as-is (not snake-cased).", + "additionalProperties": true + } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/sip-refer-event.schema.json b/src/jambonz_sdk/schema/callbacks/sip-refer-event.schema.json new file mode 100644 index 0000000..241bc53 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/sip-refer-event.schema.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/sip-refer-event", + "title": "SIP Refer EventHook Payload", + "description": "Payload sent to the sip-refer eventHook when a SIP NOTIFY is received with transfer status updates.", + "type": "object", + "properties": { + "event": { + "type": "string", + "const": "transfer-status", + "description": "Event type — always 'transfer-status' for REFER notifications." + }, + "call_status": { + "type": "integer", + "description": "SIP status code from the NOTIFY sipfrag body (e.g. 100 for trying, 180 for ringing, 200 for success)." + } + }, + "required": ["event", "call_status"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/sip-refer.schema.json b/src/jambonz_sdk/schema/callbacks/sip-refer.schema.json new file mode 100644 index 0000000..40c66e7 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/sip-refer.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/sip-refer", + "title": "SipRefer ActionHook Payload", + "description": "Payload sent to the sip-refer verb's actionHook when the REFER completes. Sent once the REFER response is received, or after a final NOTIFY arrives.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "refer_status": { + "type": "integer", + "description": "SIP status code from the REFER response (e.g. 202 for accepted, 4xx/5xx for failure)." + }, + "final_referred_call_status": { + "type": "integer", + "description": "Final SIP status of the referred call, extracted from a NOTIFY sipfrag. Present only when the REFER was accepted (202) and a final NOTIFY was received." + } + }, + "required": ["refer_status"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/sip-request.schema.json b/src/jambonz_sdk/schema/callbacks/sip-request.schema.json new file mode 100644 index 0000000..435c79e --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/sip-request.schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/sip-request", + "title": "SipRequest ActionHook Payload", + "description": "Payload sent to the sip-request verb's actionHook after a SIP request (e.g. INFO, NOTIFY) is sent and a response is received.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "result": { + "type": "string", + "enum": ["success", "failed"], + "description": "Whether the SIP request succeeded or failed." + }, + "sip_status": { + "type": "integer", + "description": "SIP response status code. Present when result is 'success'." + }, + "err": { + "type": "string", + "description": "Error message. Present when result is 'failed'." + } + }, + "required": ["result"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/transcribe-translation.schema.json b/src/jambonz_sdk/schema/callbacks/transcribe-translation.schema.json new file mode 100644 index 0000000..859f700 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/transcribe-translation.schema.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/transcribe-translation", + "title": "Transcribe TranslationHook Payload", + "description": "Payload sent to the translationHook when a translation result is received.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "translation": { + "type": "object", + "description": "Translation result data.", + "properties": { + "channel": { "type": "integer", "description": "Audio channel number (1 or 2)." }, + "language": { "type": "string", "description": "Target language code for the translation." }, + "translation": { "type": "string", "description": "The translated text." } + }, + "required": ["channel", "language", "translation"] + } + }, + "required": ["translation"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/transcribe.schema.json b/src/jambonz_sdk/schema/callbacks/transcribe.schema.json new file mode 100644 index 0000000..e0da88f --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/transcribe.schema.json @@ -0,0 +1,46 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/transcribe", + "title": "Transcribe TranscriptionHook Payload", + "description": "Payload sent to the transcriptionHook when a transcription result is received from the STT engine.", + "allOf": [ + { "$ref": "base" } + ], + "type": "object", + "properties": { + "speech": { + "type": "object", + "description": "Speech recognition results, present when the STT engine returns alternatives.", + "properties": { + "language_code": { "type": "string", "description": "Language code used for recognition (e.g. 'en-US')." }, + "channel_tag": { "type": "integer", "description": "Audio channel number." }, + "is_final": { "type": "boolean", "description": "Whether this is a final (not interim) recognition result." }, + "alternatives": { + "type": "array", + "items": { + "type": "object", + "properties": { + "transcript": { "type": "string", "description": "The recognized transcript." }, + "confidence": { "type": "number", "description": "Confidence score between 0 and 1." } + } + }, + "description": "Array of recognition alternatives, ordered by confidence." + } + } + }, + "speech_event": { + "type": "object", + "description": "Speech event data, present when the STT engine returns a typed event (e.g. end of utterance).", + "properties": { + "type": { "type": "string", "description": "Event type from the STT vendor." } + }, + "additionalProperties": true + }, + "stt_latency_ms": { "type": "string", "description": "STT latency in milliseconds." }, + "stt_talkspurts": { "type": "string", "description": "JSON-encoded array of talkspurt timing data." }, + "stt_start_time": { "type": "string", "description": "STT recognition start time." }, + "stt_stop_time": { "type": "string", "description": "STT recognition stop time." }, + "stt_usage": { "description": "STT usage data from the vendor." } + }, + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/callbacks/tts-streaming-event.schema.json b/src/jambonz_sdk/schema/callbacks/tts-streaming-event.schema.json new file mode 100644 index 0000000..6c0d070 --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/tts-streaming-event.schema.json @@ -0,0 +1,77 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/tts-streaming-event", + "title": "TTS Streaming Event", + "description": "Events sent to the '/streaming-event' WebSocket endpoint during TTS streaming. These are sent as 'tts:streaming-event' messages. The tts_spoken event is only sent when trackTtsPlayout is enabled via the config verb.", + "type": "object", + "oneOf": [ + { + "properties": { + "event_type": { + "const": "stream_open", + "description": "The TTS streaming connection has been established." + } + }, + "required": ["event_type"], + "additionalProperties": false + }, + { + "properties": { + "event_type": { + "const": "stream_closed", + "description": "The TTS streaming connection has been closed." + } + }, + "required": ["event_type"], + "additionalProperties": false + }, + { + "properties": { + "event_type": { + "const": "stream_paused", + "description": "TTS streaming has been paused." + } + }, + "required": ["event_type"], + "additionalProperties": false + }, + { + "properties": { + "event_type": { + "const": "stream_resumed", + "description": "TTS streaming has been resumed." + } + }, + "required": ["event_type"], + "additionalProperties": false + }, + { + "properties": { + "event_type": { + "const": "user_interruption", + "description": "The user interrupted (barged in) during TTS playout, causing the stream to be cleared." + } + }, + "required": ["event_type"], + "additionalProperties": false + }, + { + "properties": { + "event_type": { + "const": "tts_spoken", + "description": "Reports the actual text that was spoken via TTS. Sent on utterance completion or when the user barges in. Only sent when trackTtsPlayout is enabled via the config verb. Requires a TTS vendor that supports alignment data (e.g. ElevenLabs)." + }, + "text": { + "type": "string", + "description": "The text that was actually spoken before completion or interruption." + }, + "bargein": { + "type": "boolean", + "description": "True if the user barged in (interrupted) before the TTS finished speaking. False if the utterance completed normally." + } + }, + "required": ["event_type", "text", "bargein"], + "additionalProperties": false + } + ] +} diff --git a/src/jambonz_sdk/schema/callbacks/verb-status.schema.json b/src/jambonz_sdk/schema/callbacks/verb-status.schema.json new file mode 100644 index 0000000..2150b6c --- /dev/null +++ b/src/jambonz_sdk/schema/callbacks/verb-status.schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/callbacks/verb-status", + "title": "Verb Status Event", + "description": "Real-time verb lifecycle events sent over WebSocket when notifyEvents is enabled on the session. These are informational — no response is expected.", + "type": "object", + "properties": { + "event": { + "type": "string", + "enum": [ + "starting", + "finished", + "start-playback", + "stop-playback", + "kill-playback", + "dtmf-bargein-detected", + "speech-bargein-detected", + "synthesized-audio" + ], + "description": "The verb lifecycle event." + }, + "verb": { + "type": "string", + "description": "The verb name (e.g. 'say', 'play', 'gather'). Present on synthesized-audio, start-playback, stop-playback, kill-playback, and dtmf/speech-bargein events." + }, + "name": { + "type": "string", + "description": "The verb name. Present on 'starting' and 'finished' events (these use 'name' instead of 'verb')." + }, + "id": { + "type": "string", + "description": "The verb instance id, if one was assigned by the application." + }, + "vendor": { + "type": "string", + "description": "TTS vendor name. Present on synthesized-audio events." + }, + "language": { + "type": "string", + "description": "TTS language code. Present on synthesized-audio events." + }, + "characters": { + "type": "integer", + "description": "Number of characters synthesized. Present on synthesized-audio events when not served from cache." + }, + "elapsed_time": { + "type": "number", + "description": "TTS round-trip time in milliseconds. Present on synthesized-audio events when not served from cache." + }, + "served_from_cache": { + "type": "boolean", + "description": "Whether the TTS audio was served from cache. Present on synthesized-audio events." + } + }, + "required": ["event"], + "additionalProperties": true +} diff --git a/src/jambonz_sdk/schema/components/actionHook.schema.json b/src/jambonz_sdk/schema/components/actionHook.schema.json new file mode 100644 index 0000000..28366b5 --- /dev/null +++ b/src/jambonz_sdk/schema/components/actionHook.schema.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/actionHook", + "title": "ActionHook", + "description": "A webhook or websocket callback that jambonz invokes when a verb completes. Reports verb results (e.g. speech recognition from 'gather', dial outcome) and receives the next verbs to execute. In webhook mode: jambonz POSTs to this URL and the HTTP response body is the next verb array. In WebSocket mode: this value becomes an event name emitted on the session — bind session.on('/hookName', (evt) => {...}) and respond with session.reply() (NOT session.send()). The callback payload always includes 'reason' plus verb-specific fields (e.g. 'speech', 'digits' for gather). Can be a simple URL/path string or an object with additional options.", + "oneOf": [ + { + "type": "string", + "format": "uri", + "description": "A URL to invoke. For webhook applications this is an HTTP(S) URL. For websocket applications this is typically a relative path or event name.", + "examples": ["https://myapp.example.com/gather-result", "/gather-result"] + }, + { + "type": "object", + "description": "A hook specification with URL and additional options.", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The URL to invoke." + }, + "method": { + "type": "string", + "description": "The HTTP method to use. Only applies to webhook applications.", + "enum": ["GET", "POST"], + "default": "POST" + }, + "basicAuth": { + "$ref": "auth", + "description": "Basic authentication credentials to include in the request." + } + }, + "required": ["url"] + } + ] +} diff --git a/src/jambonz_sdk/schema/components/actionHookDelayAction.schema.json b/src/jambonz_sdk/schema/components/actionHookDelayAction.schema.json new file mode 100644 index 0000000..607177f --- /dev/null +++ b/src/jambonz_sdk/schema/components/actionHookDelayAction.schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/actionHookDelayAction", + "title": "ActionHookDelayAction", + "description": "Configuration for what to do when an actionHook (webhook) takes a long time to respond. Allows playing interim content (e.g. 'please wait' messages, hold music) while waiting for the webhook response, with configurable retry and give-up behavior.", + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Whether to enable delay handling for actionHooks." + }, + "noResponseTimeout": { + "type": "number", + "description": "Time in seconds to wait before executing the delay actions. If the webhook responds before this timeout, the delay actions are skipped.", + "examples": [3, 5] + }, + "noResponseGiveUpTimeout": { + "type": "number", + "description": "Total time in seconds to wait for a webhook response before giving up and executing the giveUpActions.", + "examples": [30, 60] + }, + "retries": { + "type": "number", + "description": "Number of times to retry the delay actions while still waiting for the webhook response." + }, + "actions": { + "type": "array", + "description": "An array of jambonz verbs to execute while waiting for the webhook response. Typically 'say' or 'play' verbs with messages like 'please hold'.", + "items": { "type": "object" } + }, + "giveUpActions": { + "type": "array", + "description": "An array of jambonz verbs to execute if the webhook never responds within the giveUpTimeout. Typically an error message and/or hangup.", + "items": { "type": "object" } + } + } +} diff --git a/src/jambonz_sdk/schema/components/amd.schema.json b/src/jambonz_sdk/schema/components/amd.schema.json new file mode 100644 index 0000000..ee175f0 --- /dev/null +++ b/src/jambonz_sdk/schema/components/amd.schema.json @@ -0,0 +1,68 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/amd", + "title": "Answering Machine Detection", + "description": "Configuration for answering machine detection (AMD). Detects whether an outbound or inbound call was answered by a human or a machine. Used as a nested property on the 'config' or 'dial' verb. IMPORTANT: AMD runs asynchronously in the background. When using AMD with the 'config' verb, you MUST follow it with a 'pause' verb (e.g. pause({ length: 25 })) to keep the call alive while AMD detection runs. Without a pause, the call will end immediately after config completes.", + "type": "object", + "properties": { + "actionHook": { + "$ref": "actionHook", + "description": "Webhook to receive AMD events (amd_human_detected, amd_machine_detected, amd_no_speech_detected, amd_decision_timeout, amd_machine_stopped_speaking, amd_tone_detected, amd_error, amd_stopped)." + }, + "thresholdWordCount": { + "type": "number", + "description": "Number of spoken words in a greeting that triggers an amd_machine_detected result.", + "default": 9 + }, + "digitCount": { + "type": "number", + "description": "Number of digits in a greeting to trigger detection. 0 disables digit-based detection.", + "default": 0 + }, + "timers": { + "type": "object", + "description": "Timer settings controlling AMD detection windows.", + "properties": { + "noSpeechTimeoutMs": { + "type": "number", + "description": "Milliseconds to wait for any speech before returning amd_no_speech_detected.", + "default": 5000 + }, + "decisionTimeoutMs": { + "type": "number", + "description": "Milliseconds before returning amd_decision_timeout if no determination is made.", + "default": 15000 + }, + "toneTimeoutMs": { + "type": "number", + "description": "Milliseconds to wait for beep/tone detection.", + "default": 20000 + }, + "greetingCompletionTimeoutMs": { + "type": "number", + "description": "Milliseconds of silence after speech before determining the machine greeting is complete. Automatically reduced to 1000ms if a beep is detected.", + "default": 2000 + } + }, + "additionalProperties": false + }, + "recognizer": { + "$ref": "recognizer", + "description": "Override the STT recognizer used for AMD speech detection. When omitted, AMD uses the session default recognizer with enhancedModel enabled." + } + }, + "required": ["actionHook"], + "examples": [ + { + "actionHook": "/amd-events" + }, + { + "actionHook": "/amd-events", + "thresholdWordCount": 6, + "timers": { + "noSpeechTimeoutMs": 3000, + "decisionTimeoutMs": 10000 + } + } + ] +} diff --git a/src/jambonz_sdk/schema/components/auth.schema.json b/src/jambonz_sdk/schema/components/auth.schema.json new file mode 100644 index 0000000..b2fefa3 --- /dev/null +++ b/src/jambonz_sdk/schema/components/auth.schema.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/auth", + "title": "Auth", + "description": "Basic authentication credentials, used for authenticating with external services such as websocket endpoints or SIP registrars.", + "type": "object", + "properties": { + "username": { + "type": "string", + "description": "The username for authentication." + }, + "password": { + "type": "string", + "description": "The password for authentication." + } + }, + "required": ["username", "password"] +} diff --git a/src/jambonz_sdk/schema/components/bidirectionalAudio.schema.json b/src/jambonz_sdk/schema/components/bidirectionalAudio.schema.json new file mode 100644 index 0000000..777a9a2 --- /dev/null +++ b/src/jambonz_sdk/schema/components/bidirectionalAudio.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/bidirectionalAudio", + "title": "BidirectionalAudio", + "description": "Configuration for bidirectional audio streaming over a websocket connection. When enabled, the remote websocket endpoint can send audio back to jambonz to be played to the caller.", + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Whether to enable bidirectional audio on the websocket connection." + }, + "streaming": { + "type": "boolean", + "description": "If true, audio is streamed continuously rather than sent as complete messages." + }, + "sampleRate": { + "type": "number", + "description": "The sample rate in Hz for bidirectional audio.", + "examples": [8000, 16000, 24000] + } + } +} diff --git a/src/jambonz_sdk/schema/components/fillerNoise.schema.json b/src/jambonz_sdk/schema/components/fillerNoise.schema.json new file mode 100644 index 0000000..37ec5ba --- /dev/null +++ b/src/jambonz_sdk/schema/components/fillerNoise.schema.json @@ -0,0 +1,25 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/fillerNoise", + "title": "FillerNoise", + "description": "Configuration for playing background filler noise (e.g. keyboard typing, hold music) while the application is processing and the caller would otherwise hear silence. Commonly used during LLM response generation to indicate the system is working.", + "type": "object", + "properties": { + "enable": { + "type": "boolean", + "description": "Whether to enable filler noise." + }, + "url": { + "type": "string", + "format": "uri", + "description": "URL of the audio file to play as filler noise. Should be a short, loopable audio clip.", + "examples": ["https://example.com/sounds/typing.wav"] + }, + "startDelaySecs": { + "type": "number", + "description": "Number of seconds to wait before starting filler noise. Prevents filler noise from playing during brief processing pauses.", + "examples": [1, 2] + } + }, + "required": ["enable"] +} diff --git a/src/jambonz_sdk/schema/components/llm-base.schema.json b/src/jambonz_sdk/schema/components/llm-base.schema.json new file mode 100644 index 0000000..81203d7 --- /dev/null +++ b/src/jambonz_sdk/schema/components/llm-base.schema.json @@ -0,0 +1,94 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/llm-base", + "title": "LLM Base Properties", + "description": "Shared properties for llm, s2s, and vendor-specific s2s verb schemas.", + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "vendor": { + "type": "string", + "description": "The LLM vendor to use.", + "examples": ["openai", "anthropic", "google", "groq", "deepseek", "deepgram", "ultravox", "custom"] + }, + "model": { + "type": "string", + "description": "The specific model to use from the vendor.", + "examples": ["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.0-flash"] + }, + "auth": { + "type": "object", + "description": "Authentication credentials for the LLM vendor API.", + "properties": { + "apiKey": { + "type": "string", + "description": "The API key for the LLM vendor." + } + }, + "additionalProperties": true + }, + "connectOptions": { + "type": "object", + "description": "Additional connection options for the LLM vendor, such as custom base URLs or API versions.", + "additionalProperties": true + }, + "llmOptions": { + "type": "object", + "description": "Configuration passed to the LLM including the system prompt, temperature, tools/functions, and other model parameters. The structure varies by vendor but typically includes 'messages' (conversation history), 'temperature', 'tools' (function definitions), and 'maxTokens'.", + "additionalProperties": true, + "examples": [ + { + "messages": [ + { "role": "system", "content": "You are a helpful customer service agent for Acme Corp." } + ], + "temperature": 0.7 + } + ] + }, + "mcpServers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The URL of the MCP server." + }, + "auth": { + "type": "object", + "description": "Authentication for the MCP server.", + "additionalProperties": true + }, + "roots": { + "type": "array", + "items": { "type": "object" }, + "description": "MCP root definitions." + } + }, + "required": ["url"] + }, + "description": "Model Context Protocol servers to connect to. MCP servers provide tools that the LLM can invoke during the conversation." + }, + "actionHook": { + "$ref": "actionHook", + "description": "A webhook invoked when the LLM conversation ends. Receives conversation details and should return the next verbs to execute." + }, + "eventHook": { + "$ref": "actionHook", + "description": "A webhook invoked for real-time events during the LLM conversation (e.g. tool calls, transcription events)." + }, + "toolHook": { + "$ref": "actionHook", + "description": "A webhook invoked when the LLM calls a tool/function. Receives the tool name and arguments, and should return the tool result." + }, + "events": { + "type": "array", + "items": { "type": "string" }, + "description": "List of event types to receive via the eventHook." + } + } +} diff --git a/src/jambonz_sdk/schema/components/recognizer-assemblyAiOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-assemblyAiOptions.schema.json new file mode 100644 index 0000000..3494d8e --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-assemblyAiOptions.schema.json @@ -0,0 +1,66 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-assemblyAiOptions", + "title": "AssemblyAI Recognizer Options", + "description": "AssemblyAI-specific STT options. Only applies when recognizer vendor is 'assemblyai'.", + "type": "object", + "properties": { + "apiKey": { + "type": "string", + "description": "AssemblyAI API key. Overrides credentials configured in jambonz." + }, + "serviceVersion": { + "type": "string", + "enum": ["v2", "v3"], + "description": "AssemblyAI streaming API version." + }, + "speechModel": { + "type": "string", + "description": "AssemblyAI speech model to use for recognition." + }, + "formatTurns": { + "type": "boolean", + "description": "Enable turn-level formatting." + }, + "endOfTurnConfidenceThreshold": { + "type": "number", + "description": "Confidence threshold for end-of-turn detection." + }, + "minEndOfTurnSilenceWhenConfident": { + "type": "number", + "description": "Minimum silence duration (seconds) to trigger end-of-turn when confidence is met." + }, + "maxTurnSilence": { + "type": "number", + "description": "Maximum silence duration (seconds) before forcing end-of-turn." + }, + "minTurnSilence": { + "type": "number", + "description": "Minimum silence duration (seconds) before allowing end-of-turn." + }, + "keyterms": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of key terms to boost in recognition." + }, + "prompt": { + "type": "string", + "description": "Prompt to guide the recognition model." + }, + "languageDetection": { + "type": "boolean", + "description": "Enable automatic language detection." + }, + "vadThreshold": { + "type": "number", + "description": "Voice activity detection threshold." + }, + "inactivityTimeout": { + "type": "number", + "description": "Timeout (seconds) for inactivity before closing the stream." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-awsOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-awsOptions.schema.json new file mode 100644 index 0000000..fe7d53f --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-awsOptions.schema.json @@ -0,0 +1,52 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-awsOptions", + "title": "AWS Recognizer Options", + "description": "AWS Transcribe specific options. Only applies when recognizer vendor is 'aws'.", + "type": "object", + "properties": { + "accessKey": { + "type": "string", + "description": "AWS access key ID. Overrides credentials configured in jambonz." + }, + "secretKey": { + "type": "string", + "description": "AWS secret access key." + }, + "securityToken": { + "type": "string", + "description": "AWS temporary security token (for STS/assumed roles)." + }, + "region": { + "type": "string", + "description": "AWS region for the Transcribe service." + }, + "vocabularyName": { + "type": "string", + "description": "Name of a custom vocabulary to use." + }, + "vocabularyFilterName": { + "type": "string", + "description": "Name of a vocabulary filter to apply." + }, + "vocabularyFilterMethod": { + "type": "string", + "enum": ["remove", "mask", "tag"], + "description": "How filtered vocabulary words should be handled." + }, + "languageModelName": { + "type": "string", + "description": "Name of a custom language model." + }, + "piiEntityTypes": { + "type": "array", + "items": { "type": "string" }, + "description": "PII entity types to identify (e.g. 'BANK_ACCOUNT_NUMBER', 'CREDIT_DEBIT_NUMBER')." + }, + "piiIdentifyEntities": { + "type": "boolean", + "description": "Enable PII entity identification." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-azureOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-azureOptions.schema.json new file mode 100644 index 0000000..51c4a7d --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-azureOptions.schema.json @@ -0,0 +1,32 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-azureOptions", + "title": "Azure Recognizer Options", + "description": "Azure Speech Services specific options. Only applies when recognizer vendor is 'microsoft'.", + "type": "object", + "properties": { + "speechSegmentationSilenceTimeoutMs": { + "type": "number", + "description": "Silence timeout in milliseconds for speech segmentation." + }, + "postProcessing": { + "type": "string", + "description": "Post-processing mode for transcription results." + }, + "audioLogging": { + "type": "boolean", + "description": "Enable audio logging for diagnostics." + }, + "languageIdMode": { + "type": "string", + "enum": ["AtStart", "Continuous"], + "description": "Language identification mode when using multiple languages." + }, + "speechRecognitionMode": { + "type": "string", + "enum": ["CONVERSATION", "DICTATION", "INTERACTIVE"], + "description": "Speech recognition mode optimized for the interaction type." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-cobaltOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-cobaltOptions.schema.json new file mode 100644 index 0000000..23a764d --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-cobaltOptions.schema.json @@ -0,0 +1,34 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-cobaltOptions", + "title": "Cobalt Recognizer Options", + "description": "Cobalt-specific STT options. Only applies when recognizer vendor is 'cobalt'.", + "type": "object", + "properties": { + "serverUri": { + "type": "string", + "description": "Cobalt server URI." + }, + "enableConfusionNetwork": { + "type": "boolean", + "description": "Enable confusion network output." + }, + "metadata": { + "type": "string", + "description": "Metadata string to pass to the server." + }, + "compiledContextData": { + "type": "string", + "description": "Compiled context data for biasing recognition." + }, + "wordTimeOffsets": { + "type": "boolean", + "description": "Include word-level timestamps." + }, + "contextToken": { + "type": "string", + "description": "Context token for server-side context." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-customOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-customOptions.schema.json new file mode 100644 index 0000000..e93490f --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-customOptions.schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-customOptions", + "title": "Custom Recognizer Options", + "description": "Options for custom STT vendors. Only applies when recognizer vendor is 'custom'.", + "type": "object", + "properties": { + "authToken": { + "type": "string", + "description": "Authentication token for the custom STT service." + }, + "uri": { + "type": "string", + "description": "WebSocket URI of the custom STT service." + }, + "sampleRate": { + "type": "number", + "description": "Audio sample rate in Hz." + }, + "options": { + "type": "object", + "description": "Additional vendor-specific options passed through to the custom service.", + "additionalProperties": true + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-deepgramOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-deepgramOptions.schema.json new file mode 100644 index 0000000..13a286e --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-deepgramOptions.schema.json @@ -0,0 +1,147 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-deepgramOptions", + "title": "Deepgram Recognizer Options", + "description": "Deepgram-specific STT options. Only applies when recognizer vendor is 'deepgram'.", + "type": "object", + "properties": { + "deepgramSttUri": { + "type": "string", + "description": "Custom Deepgram STT endpoint URI." + }, + "deepgramSttUseTls": { + "type": "boolean", + "description": "Whether to use TLS when connecting to the Deepgram STT endpoint." + }, + "apiKey": { + "type": "string", + "description": "Deepgram API key. Overrides the key configured in jambonz." + }, + "tier": { + "type": "string", + "description": "Deepgram model tier." + }, + "model": { + "type": "string", + "description": "Deepgram model name (e.g. 'nova-2', 'nova-2-general')." + }, + "customModel": { + "type": "string", + "description": "ID of a custom-trained Deepgram model." + }, + "version": { + "type": "string", + "description": "Model version." + }, + "punctuate": { + "type": "boolean", + "description": "Enable automatic punctuation." + }, + "smartFormatting": { + "type": "boolean", + "description": "Enable Deepgram smart formatting (dates, numbers, etc.)." + }, + "noDelay": { + "type": "boolean", + "description": "Disable Deepgram's internal buffering for lower latency." + }, + "profanityFilter": { + "type": "boolean", + "description": "Filter profanity from transcripts." + }, + "redact": { + "type": "string", + "enum": ["pci", "numbers", "true", "ssn"], + "description": "Redact sensitive information from transcripts." + }, + "diarize": { + "type": "boolean", + "description": "Enable speaker diarization." + }, + "diarizeVersion": { + "type": "string", + "description": "Diarization model version." + }, + "ner": { + "type": "boolean", + "description": "Enable named entity recognition." + }, + "multichannel": { + "type": "boolean", + "description": "Enable multichannel processing." + }, + "alternatives": { + "type": "number", + "description": "Number of alternative transcripts to return." + }, + "numerals": { + "type": "boolean", + "description": "Convert spoken numbers to digits." + }, + "search": { + "type": "array", + "items": { "type": "string" }, + "description": "Terms to search for in the transcript." + }, + "replace": { + "type": "array", + "items": { "type": "string" }, + "description": "Terms to replace in the transcript." + }, + "keywords": { + "type": "array", + "items": { "type": "string" }, + "description": "Keywords to boost recognition for." + }, + "keyterms": { + "type": "array", + "items": { "type": "string" }, + "description": "Key terms to boost recognition for." + }, + "endpointing": { + "type": ["boolean", "number"], + "description": "Endpointing sensitivity. Boolean to enable/disable, or number of milliseconds." + }, + "utteranceEndMs": { + "type": "number", + "description": "Milliseconds of silence to detect end of utterance." + }, + "shortUtterance": { + "type": "boolean", + "description": "Optimize for short utterances." + }, + "vadTurnoff": { + "type": "number", + "description": "Milliseconds of silence before VAD turns off." + }, + "tag": { + "type": "string", + "description": "Tag to associate with the request for tracking." + }, + "fillerWords": { + "type": "boolean", + "description": "Include filler words (um, uh) in transcript." + }, + "eotThreshold": { + "type": "number", + "description": "End-of-turn confidence threshold (0-1)." + }, + "eotTimeoutMs": { + "type": "number", + "description": "End-of-turn timeout in milliseconds." + }, + "mipOptOut": { + "type": "boolean", + "description": "Opt out of Deepgram's model improvement program." + }, + "entityPrompt": { + "type": "string", + "description": "Prompt to guide entity detection." + }, + "eagerEotThreshold": { + "type": "number", + "description": "Eager end-of-turn threshold for faster response." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-elevenlabsOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-elevenlabsOptions.schema.json new file mode 100644 index 0000000..8a55fa4 --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-elevenlabsOptions.schema.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-elevenlabsOptions", + "title": "ElevenLabs Recognizer Options", + "description": "ElevenLabs-specific STT options. Only applies when recognizer vendor is 'elevenlabs'.", + "type": "object", + "properties": { + "includeTimestamps": { + "type": "boolean", + "description": "Include word-level timestamps in results." + }, + "commitStrategy": { + "type": "string", + "enum": ["manual", "vad"], + "description": "How audio chunks are committed. 'manual' for explicit commits, 'vad' for voice activity detection." + }, + "vadSilenceThresholdSecs": { + "type": "number", + "description": "Silence duration in seconds to trigger VAD commit." + }, + "vadThreshold": { + "type": "number", + "description": "VAD activation threshold." + }, + "minSpeechDurationMs": { + "type": "number", + "description": "Minimum speech duration in milliseconds to accept." + }, + "minSilenceDurationMs": { + "type": "number", + "description": "Minimum silence duration in milliseconds to trigger end of speech." + }, + "enableLogging": { + "type": "boolean", + "description": "Enable server-side logging." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-gladiaOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-gladiaOptions.schema.json new file mode 100644 index 0000000..06b5f2f --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-gladiaOptions.schema.json @@ -0,0 +1,8 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-gladiaOptions", + "title": "Gladia Recognizer Options", + "description": "Gladia-specific STT options. Only applies when recognizer vendor is 'gladia'. See Gladia API documentation for available options.", + "type": "object", + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-googleOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-googleOptions.schema.json new file mode 100644 index 0000000..315f510 --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-googleOptions.schema.json @@ -0,0 +1,35 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-googleOptions", + "title": "Google Recognizer Options", + "description": "Google Speech-to-Text specific options. Only applies when recognizer vendor is 'google'.", + "type": "object", + "properties": { + "serviceVersion": { + "type": "string", + "enum": ["v1", "v2"], + "description": "Google Speech-to-Text API version." + }, + "recognizerId": { + "type": "string", + "description": "ID of a Google Speech recognizer resource (v2 only)." + }, + "speechStartTimeoutMs": { + "type": "number", + "description": "Timeout in milliseconds to wait for speech to start." + }, + "speechEndTimeoutMs": { + "type": "number", + "description": "Timeout in milliseconds to detect end of speech." + }, + "enableVoiceActivityEvents": { + "type": "boolean", + "description": "Enable voice activity detection events." + }, + "transcriptNormalization": { + "type": "array", + "description": "Array of transcript normalization rules." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-houndifyOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-houndifyOptions.schema.json new file mode 100644 index 0000000..d21857e --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-houndifyOptions.schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-houndifyOptions", + "title": "Houndify Recognizer Options", + "description": "Houndify-specific STT options. Only applies when recognizer vendor is 'houndify'.", + "type": "object", + "properties": { + "requestInfo": { + "type": "object", + "description": "Houndify RequestInfo object with context data.", + "additionalProperties": true + }, + "sampleRate": { "type": "number", "description": "Audio sample rate in Hz." }, + "latitude": { "type": "number", "description": "User latitude for location-aware queries." }, + "longitude": { "type": "number", "description": "User longitude for location-aware queries." }, + "city": { "type": "string", "description": "User city." }, + "state": { "type": "string", "description": "User state." }, + "country": { "type": "string", "description": "User country." }, + "timeZone": { "type": "string", "description": "User timezone." }, + "domain": { "type": "string", "description": "Houndify domain." }, + "audioEndpoint": { "type": "string", "description": "Custom audio endpoint URL." }, + "maxSilenceSeconds": { "type": "number", "description": "Maximum silence before stopping." }, + "maxSilenceAfterFullQuerySeconds": { "type": "number", "description": "Silence timeout after a complete query." }, + "maxSilenceAfterPartialQuerySeconds": { "type": "number", "description": "Silence timeout after a partial query." }, + "vadSensitivity": { "type": "number", "description": "VAD sensitivity level." }, + "vadTimeout": { "type": "number", "description": "VAD timeout in milliseconds." }, + "vadMode": { "type": "string", "description": "VAD mode." }, + "vadVoiceMs": { "type": "number", "description": "Milliseconds of voice to trigger VAD." }, + "vadSilenceMs": { "type": "number", "description": "Milliseconds of silence to trigger VAD." }, + "vadDebug": { "type": "boolean", "description": "Enable VAD debug logging." }, + "audioFormat": { "type": "string", "description": "Audio format." }, + "enableNoiseReduction": { "type": "boolean", "description": "Enable noise reduction." }, + "enableProfanityFilter": { "type": "boolean", "description": "Filter profanity." }, + "enablePunctuation": { "type": "boolean", "description": "Enable punctuation." }, + "enableCapitalization": { "type": "boolean", "description": "Enable capitalization." }, + "confidenceThreshold": { "type": "number", "description": "Minimum confidence threshold." }, + "enableDisfluencyFilter": { "type": "boolean", "description": "Filter disfluencies (um, uh)." }, + "maxResults": { "type": "number", "description": "Maximum number of results." }, + "enableWordTimestamps": { "type": "boolean", "description": "Include word timestamps." }, + "maxAlternatives": { "type": "number", "description": "Maximum alternative transcripts." }, + "partialTranscriptInterval": { "type": "number", "description": "Interval for partial transcript delivery." }, + "sessionTimeout": { "type": "number", "description": "Session timeout." }, + "connectionTimeout": { "type": "number", "description": "Connection timeout." }, + "customVocabulary": { + "type": "array", + "items": { "type": "string" }, + "description": "Custom vocabulary terms." + }, + "languageModel": { "type": "string", "description": "Language model to use." }, + "audioQueryAbsoluteTimeout": { "type": "number", "description": "Absolute timeout for audio queries." } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-ibmOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-ibmOptions.schema.json new file mode 100644 index 0000000..6f9b0ae --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-ibmOptions.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-ibmOptions", + "title": "IBM Recognizer Options", + "description": "IBM Watson Speech-to-Text specific options. Only applies when recognizer vendor is 'ibm'.", + "type": "object", + "properties": { + "sttApiKey": { + "type": "string", + "description": "IBM STT API key. Overrides credentials configured in jambonz." + }, + "sttRegion": { + "type": "string", + "description": "IBM STT region." + }, + "ttsApiKey": { + "type": "string", + "description": "IBM TTS API key." + }, + "ttsRegion": { + "type": "string", + "description": "IBM TTS region." + }, + "instanceId": { + "type": "string", + "description": "IBM Watson instance ID." + }, + "model": { + "type": "string", + "description": "Recognition model name." + }, + "languageCustomizationId": { + "type": "string", + "description": "ID of a custom language model." + }, + "acousticCustomizationId": { + "type": "string", + "description": "ID of a custom acoustic model." + }, + "baseModelVersion": { + "type": "string", + "description": "Base model version to use." + }, + "watsonMetadata": { + "type": "string", + "description": "Customer ID metadata for data labeling." + }, + "watsonLearningOptOut": { + "type": "boolean", + "description": "Opt out of IBM data collection for service improvements." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-nuanceOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-nuanceOptions.schema.json new file mode 100644 index 0000000..c484c26 --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-nuanceOptions.schema.json @@ -0,0 +1,150 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-nuanceOptions", + "title": "Nuance Recognizer Options", + "description": "Nuance Mix specific options. Only applies when recognizer vendor is 'nuance'.", + "type": "object", + "properties": { + "clientId": { + "type": "string", + "description": "Nuance Mix client ID." + }, + "secret": { + "type": "string", + "description": "Nuance Mix client secret." + }, + "kryptonEndpoint": { + "type": "string", + "description": "Custom Nuance Krypton endpoint URL." + }, + "topic": { + "type": "string", + "description": "Recognition topic (domain)." + }, + "utteranceDetectionMode": { + "type": "string", + "enum": ["single", "multiple", "disabled"], + "description": "How utterance boundaries are detected." + }, + "punctuation": { + "type": "boolean", + "description": "Enable automatic punctuation." + }, + "profanityFilter": { + "type": "boolean", + "description": "Filter profanity from results." + }, + "includeTokenization": { + "type": "boolean", + "description": "Include tokenization data in results." + }, + "discardSpeakerAdaptation": { + "type": "boolean", + "description": "Discard speaker adaptation data." + }, + "suppressCallRecording": { + "type": "boolean", + "description": "Suppress call recording on the Nuance side." + }, + "maskLoadFailures": { + "type": "boolean", + "description": "Mask resource load failures." + }, + "suppressInitialCapitalization": { + "type": "boolean", + "description": "Suppress initial capitalization of results." + }, + "allowZeroBaseLmWeight": { + "type": "boolean", + "description": "Allow zero base language model weight." + }, + "filterWakeupWord": { + "type": "boolean", + "description": "Filter wakeup words from results." + }, + "resultType": { + "type": "string", + "enum": ["final", "partial", "immutable_partial"], + "description": "Type of results to return." + }, + "noInputTimeoutMs": { + "type": "number", + "description": "Timeout in milliseconds before no-input event." + }, + "recognitionTimeoutMs": { + "type": "number", + "description": "Maximum recognition duration in milliseconds." + }, + "utteranceEndSilenceMs": { + "type": "number", + "description": "Silence duration in milliseconds to detect end of utterance." + }, + "maxHypotheses": { + "type": "number", + "description": "Maximum number of recognition hypotheses to return." + }, + "speechDomain": { + "type": "string", + "description": "Speech domain for optimized recognition." + }, + "formatting": { + "type": "object", + "description": "Formatting options for recognition results.", + "properties": { + "scheme": { "type": "string", "description": "Formatting scheme name." }, + "options": { "type": "object", "description": "Scheme-specific formatting options." } + }, + "required": ["scheme", "options"] + }, + "clientData": { + "type": "object", + "description": "Custom client data to pass to Nuance.", + "additionalProperties": true + }, + "userId": { + "type": "string", + "description": "User ID for speaker adaptation." + }, + "speechDetectionSensitivity": { + "type": "number", + "description": "Speech detection sensitivity (0-1)." + }, + "resources": { + "type": "array", + "description": "Array of Nuance recognition resources (grammars, wordsets, etc.).", + "items": { + "type": "object", + "properties": { + "externalReference": { + "type": "object", + "description": "External resource reference.", + "properties": { + "type": { + "type": "string", + "enum": ["undefined_resource_type", "wordset", "compiled_wordset", "domain_lm", "speaker_profile", "grammar", "settings"] + }, + "uri": { "type": "string" }, + "maxLoadFailures": { "type": "boolean" }, + "requestTimeoutMs": { "type": "number" }, + "headers": { "type": "object" } + } + }, + "inlineWordset": { "type": "string", "description": "Inline wordset JSON string." }, + "builtin": { "type": "string", "description": "Built-in grammar name." }, + "inlineGrammar": { "type": "string", "description": "Inline SRGS grammar." }, + "wakeupWord": { "type": "array", "items": { "type": "string" }, "description": "Wakeup words." }, + "weightName": { + "type": "string", + "enum": ["defaultWeight", "lowest", "low", "medium", "high", "highest"] + }, + "weightValue": { "type": "number" }, + "reuse": { + "type": "string", + "enum": ["undefined_reuse", "low_reuse", "high_reuse"] + } + } + } + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-nvidiaOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-nvidiaOptions.schema.json new file mode 100644 index 0000000..15131f9 --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-nvidiaOptions.schema.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-nvidiaOptions", + "title": "NVIDIA Recognizer Options", + "description": "NVIDIA Riva specific options. Only applies when recognizer vendor is 'nvidia'.", + "type": "object", + "properties": { + "rivaUri": { + "type": "string", + "description": "NVIDIA Riva server URI." + }, + "maxAlternatives": { + "type": "number", + "description": "Maximum number of alternative transcripts." + }, + "profanityFilter": { + "type": "boolean", + "description": "Filter profanity from results." + }, + "punctuation": { + "type": "boolean", + "description": "Enable automatic punctuation." + }, + "wordTimeOffsets": { + "type": "boolean", + "description": "Include word-level timestamps." + }, + "verbatimTranscripts": { + "type": "boolean", + "description": "Return verbatim (unformatted) transcripts." + }, + "customConfiguration": { + "type": "object", + "description": "Custom Riva configuration parameters.", + "additionalProperties": true + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-openaiOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-openaiOptions.schema.json new file mode 100644 index 0000000..7ae4a2d --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-openaiOptions.schema.json @@ -0,0 +1,59 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-openaiOptions", + "title": "OpenAI Recognizer Options", + "description": "OpenAI Whisper/Realtime specific STT options. Only applies when recognizer vendor is 'openai'.", + "type": "object", + "properties": { + "apiKey": { + "type": "string", + "description": "OpenAI API key. Overrides credentials configured in jambonz." + }, + "model": { + "type": "string", + "description": "OpenAI STT model name." + }, + "prompt": { + "type": "string", + "description": "Prompt to guide the recognition model." + }, + "promptTemplates": { + "type": "object", + "description": "Templates for dynamic prompt generation.", + "properties": { + "hintsTemplate": { "type": "string", "description": "Template for injecting hints into the prompt." }, + "conversationHistoryTemplate": { "type": "string", "description": "Template for injecting conversation history." } + } + }, + "language": { + "type": "string", + "description": "Language code for recognition." + }, + "input_audio_noise_reduction": { + "type": "string", + "enum": ["near_field", "far_field"], + "description": "Input audio noise reduction mode." + }, + "turn_detection": { + "type": "object", + "description": "Turn detection configuration for the OpenAI Realtime API.", + "properties": { + "type": { + "type": "string", + "enum": ["none", "server_vad", "semantic_vad"], + "description": "Turn detection strategy." + }, + "eagerness": { + "type": "string", + "enum": ["low", "medium", "high", "auto"], + "description": "How eagerly the model should respond." + }, + "threshold": { "type": "number", "description": "VAD activation threshold (0-1)." }, + "prefix_padding_ms": { "type": "number", "description": "Milliseconds of audio to include before detected speech." }, + "silence_duration_ms": { "type": "number", "description": "Milliseconds of silence to detect end of speech." } + }, + "required": ["type"] + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-sonioxOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-sonioxOptions.schema.json new file mode 100644 index 0000000..fe6dced --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-sonioxOptions.schema.json @@ -0,0 +1,46 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-sonioxOptions", + "title": "Soniox Recognizer Options", + "description": "Soniox-specific STT options. Only applies when recognizer vendor is 'soniox'.", + "type": "object", + "properties": { + "apiKey": { + "type": "string", + "description": "Soniox API key." + }, + "model": { + "type": "string", + "description": "Soniox recognition model." + }, + "endpointDetection": { + "type": "boolean", + "description": "Enable endpoint detection." + }, + "profanityFilter": { + "type": "boolean", + "description": "Filter profanity from results." + }, + "speechContext": { + "type": "string", + "description": "Speech context for improved recognition." + }, + "clientRequestReference": { + "type": "string", + "description": "Client request reference for tracking." + }, + "storage": { + "type": "object", + "description": "Soniox storage configuration for persisting transcripts.", + "properties": { + "id": { "type": "string", "description": "Storage ID." }, + "title": { "type": "string", "description": "Storage title." }, + "disableStoreAudio": { "type": "boolean", "description": "Disable audio storage." }, + "disableStoreTranscript": { "type": "boolean", "description": "Disable transcript storage." }, + "disableSearch": { "type": "boolean", "description": "Disable search indexing." }, + "metadata": { "type": "object", "description": "Custom metadata.", "additionalProperties": true } + } + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-speechmaticsOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-speechmaticsOptions.schema.json new file mode 100644 index 0000000..9fb193f --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-speechmaticsOptions.schema.json @@ -0,0 +1,100 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-speechmaticsOptions", + "title": "Speechmatics Recognizer Options", + "description": "Speechmatics-specific STT options. Only applies when recognizer vendor is 'speechmatics'.", + "type": "object", + "properties": { + "host": { + "type": "string", + "description": "Speechmatics host URL." + }, + "profile": { + "type": "string", + "enum": ["adaptive", "agile", "smart", "external"], + "description": "Speechmatics profile." + }, + "transcription_config": { + "type": "object", + "description": "Speechmatics transcription configuration.", + "properties": { + "language": { "type": "string", "description": "Language code." }, + "domain": { "type": "string", "description": "Domain model." }, + "additional_vocab": { "type": "array", "description": "Additional vocabulary entries." }, + "diarization": { "type": "string", "description": "Diarization mode." }, + "speaker_diarization_config": { + "type": "object", + "properties": { + "speaker_sensitivity": { "type": "number" }, + "max_speakers": { "type": "number" } + } + }, + "conversation_config": { + "type": "object", + "properties": { + "end_of_utterance_silence_trigger": { "type": "number" } + } + }, + "enable_partials": { "type": "boolean", "description": "Enable partial transcripts." }, + "max_delay": { "type": "number", "description": "Maximum delay in seconds." }, + "max_delay_mode": { + "type": "string", + "enum": ["fixed", "flexible"], + "description": "Delay mode." + }, + "output_locale": { "type": "string", "description": "Output locale for formatting." }, + "punctuation_overrides": { + "type": "object", + "properties": { + "permitted_marks": { "type": "array", "items": { "type": "string" } }, + "sensitivity": { "type": "number" } + } + }, + "operating_point": { "type": "string", "description": "Operating point (standard or enhanced)." }, + "enable_entities": { "type": "boolean", "description": "Enable entity detection." }, + "audio_filtering_config": { + "type": "object", + "properties": { + "volume_threshold": { "type": "number" } + }, + "required": ["volume_threshold"] + }, + "transcript_filtering_config": { + "type": "object", + "properties": { + "remove_disfluencies": { "type": "boolean" } + }, + "required": ["remove_disfluencies"] + } + } + }, + "translation_config": { + "type": "object", + "description": "Speechmatics translation configuration.", + "properties": { + "target_languages": { + "type": "array", + "items": { "type": "string" }, + "description": "Target languages for translation." + }, + "enable_partials": { "type": "boolean", "description": "Enable partial translations." } + }, + "required": ["target_languages"] + }, + "audio_events_config": { + "type": "object", + "description": "Audio event detection configuration.", + "properties": { + "types": { + "type": "array", + "items": { + "type": "string", + "enum": ["applause", "music", "laughter"] + }, + "description": "Audio event types to detect." + } + } + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer-verbioOptions.schema.json b/src/jambonz_sdk/schema/components/recognizer-verbioOptions.schema.json new file mode 100644 index 0000000..2b0c883 --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer-verbioOptions.schema.json @@ -0,0 +1,46 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer-verbioOptions", + "title": "Verbio Recognizer Options", + "description": "Verbio-specific STT options. Only applies when recognizer vendor is 'verbio'.", + "type": "object", + "properties": { + "enable_formatting": { + "type": "boolean", + "description": "Enable text formatting of results." + }, + "enable_diarization": { + "type": "boolean", + "description": "Enable speaker diarization." + }, + "topic": { + "type": "number", + "description": "Topic ID for domain-specific recognition." + }, + "inline_grammar": { + "type": "string", + "description": "Inline SRGS grammar for constrained recognition." + }, + "grammar_uri": { + "type": "string", + "description": "URI of an external grammar resource." + }, + "label": { + "type": "string", + "description": "Label for the recognition session." + }, + "recognition_timeout": { + "type": "number", + "description": "Maximum recognition duration in seconds." + }, + "speech_complete_timeout": { + "type": "number", + "description": "Silence duration in seconds after complete speech." + }, + "speech_incomplete_timeout": { + "type": "number", + "description": "Silence duration in seconds after incomplete speech." + } + }, + "additionalProperties": false +} diff --git a/src/jambonz_sdk/schema/components/recognizer.schema.json b/src/jambonz_sdk/schema/components/recognizer.schema.json new file mode 100644 index 0000000..f7f3084 --- /dev/null +++ b/src/jambonz_sdk/schema/components/recognizer.schema.json @@ -0,0 +1,216 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/recognizer", + "title": "Recognizer", + "description": "Configuration for speech-to-text recognition. Specifies the STT vendor, language, and vendor-specific options. Can be set at the session level via the 'config' verb or overridden per-verb (e.g. on 'gather').", + "type": "object", + "properties": { + "vendor": { + "type": "string", + "description": "The STT vendor to use. Must match a vendor configured in the jambonz platform.", + "examples": ["google", "aws", "microsoft", "deepgram", "nuance", "ibm", "nvidia", "soniox", "cobalt", "assemblyai", "speechmatics", "openai", "houndify", "gladia", "elevenlabs", "verbio", "custom"] + }, + "label": { + "type": "string", + "description": "An optional label identifying a specific credential set for this vendor. Used when multiple credentials are configured for the same vendor." + }, + "language": { + "type": "string", + "description": "The language code for speech recognition, in BCP-47 format.", + "examples": ["en-US", "en-GB", "es-ES", "fr-FR"] + }, + "fallbackVendor": { + "type": "string", + "description": "A backup STT vendor to use if the primary vendor fails or is unavailable." + }, + "fallbackLabel": { + "type": "string", + "description": "Credential label for the fallback vendor." + }, + "fallbackLanguage": { + "type": "string", + "description": "Language code to use with the fallback vendor." + }, + "vad": { + "$ref": "vad", + "description": "Voice activity detection settings for this recognizer." + }, + "autogeneratePrompt": { + "type": "boolean", + "description": "If true, automatically generate a prompt for the STT vendor based on context (e.g. TTS voice, language). Supported by vendors that accept prompts for recognition guidance." + }, + "hints": { + "type": "array", + "items": { "type": "string" }, + "description": "An array of words or phrases that the recognizer should favor. Use this to improve accuracy for domain-specific terminology, product names, or proper nouns.", + "examples": [["jambonz", "drachtio", "SIP", "WebRTC"]] + }, + "hintsBoost": { + "type": "number", + "description": "A boost factor for hint words. Higher values increase the likelihood of recognizing hinted words. Vendor-specific range." + }, + "altLanguages": { + "type": "array", + "items": { "type": "string" }, + "description": "Additional languages the recognizer should listen for simultaneously. Enables multilingual recognition.", + "examples": [["es-ES", "fr-FR"]] + }, + "profanityFilter": { + "type": "boolean", + "description": "If true, the vendor will attempt to filter profanity from transcription results." + }, + "interim": { + "type": "boolean", + "description": "If true, return interim (partial) transcription results as they become available, before the utterance is complete." + }, + "singleUtterance": { + "type": "boolean", + "description": "If true, recognition stops after the first complete utterance is detected." + }, + "dualChannel": { + "type": "boolean", + "description": "If true, send separate audio channels for each call leg (caller and callee) to the recognizer." + }, + "separateRecognitionPerChannel": { + "type": "boolean", + "description": "If true, perform independent recognition on each audio channel. Requires dualChannel." + }, + "punctuation": { + "type": "boolean", + "description": "If true, enable automatic punctuation in transcription results." + }, + "enhancedModel": { + "type": "boolean", + "description": "If true, use an enhanced (premium) recognition model if available from the vendor." + }, + "words": { + "type": "boolean", + "description": "If true, include word-level timing information in transcription results." + }, + "diarization": { + "type": "boolean", + "description": "If true, enable speaker diarization to identify different speakers in the audio." + }, + "diarizationMinSpeakers": { + "type": "number", + "description": "Minimum number of speakers expected. Used to guide the diarization algorithm." + }, + "diarizationMaxSpeakers": { + "type": "number", + "description": "Maximum number of speakers expected. Used to guide the diarization algorithm." + }, + "interactionType": { + "type": "string", + "description": "A hint to the recognizer about the type of interaction, which can improve accuracy.", + "enum": ["unspecified", "discussion", "presentation", "phone_call", "voicemail", "voice_search", "voice_command", "dictation"] + }, + "naicsCode": { + "type": "number", + "description": "North American Industry Classification System code. Some vendors use this to improve domain-specific accuracy." + }, + "identifyChannels": { + "type": "boolean", + "description": "If true, identify and label which channel each transcription segment came from." + }, + "vocabularyName": { + "type": "string", + "description": "Name of a custom vocabulary resource configured at the vendor for improved recognition of specialized terms." + }, + "vocabularyFilterName": { + "type": "string", + "description": "Name of a vocabulary filter configured at the vendor for masking or removing specific words." + }, + "filterMethod": { + "type": "string", + "description": "How filtered vocabulary words should be handled in the transcript.", + "enum": ["remove", "mask", "tag"] + }, + "model": { + "type": "string", + "description": "The specific recognition model to use. Model names are vendor-specific.", + "examples": ["latest_long", "phone_call", "nova-2", "chirp"] + }, + "outputFormat": { + "type": "string", + "description": "The level of detail in recognition results.", + "enum": ["simple", "detailed"] + }, + "profanityOption": { + "type": "string", + "description": "How profanity should be handled in results.", + "enum": ["masked", "removed", "raw"] + }, + "requestSnr": { + "type": "boolean", + "description": "If true, request signal-to-noise ratio information in results." + }, + "initialSpeechTimeoutMs": { + "type": "number", + "description": "Time in milliseconds to wait for initial speech before timing out.", + "examples": [5000] + }, + "azureServiceEndpoint": { + "type": "string", + "description": "Custom Azure Speech Services endpoint URL. Only applies when vendor is 'microsoft'." + }, + "azureSttEndpointId": { + "type": "string", + "description": "Azure custom speech endpoint ID for using a custom-trained model. Only applies when vendor is 'microsoft'." + }, + "asrDtmfTerminationDigit": { + "type": "string", + "description": "A DTMF digit that terminates speech recognition when pressed.", + "examples": ["#"] + }, + "asrTimeout": { + "type": "number", + "description": "Maximum time in seconds to wait for a complete recognition result." + }, + "fastRecognitionTimeout": { + "type": "number", + "description": "Timeout in seconds for fast recognition mode. Shorter timeout for quick responses." + }, + "minConfidence": { + "type": "number", + "description": "Minimum confidence score (0-1) required to accept a recognition result. Results below this threshold are discarded.", + "minimum": 0, + "maximum": 1 + }, + "deepgramOptions": { "$ref": "recognizer-deepgramOptions" }, + "googleOptions": { "$ref": "recognizer-googleOptions" }, + "awsOptions": { "$ref": "recognizer-awsOptions" }, + "azureOptions": { "$ref": "recognizer-azureOptions" }, + "nuanceOptions": { "$ref": "recognizer-nuanceOptions" }, + "ibmOptions": { "$ref": "recognizer-ibmOptions" }, + "nvidiaOptions": { "$ref": "recognizer-nvidiaOptions" }, + "sonioxOptions": { "$ref": "recognizer-sonioxOptions" }, + "cobaltOptions": { "$ref": "recognizer-cobaltOptions" }, + "assemblyAiOptions": { "$ref": "recognizer-assemblyAiOptions" }, + "speechmaticsOptions": { "$ref": "recognizer-speechmaticsOptions" }, + "openaiOptions": { "$ref": "recognizer-openaiOptions" }, + "houndifyOptions": { "$ref": "recognizer-houndifyOptions" }, + "gladiaOptions": { "$ref": "recognizer-gladiaOptions" }, + "elevenlabsOptions": { "$ref": "recognizer-elevenlabsOptions" }, + "verbioOptions": { "$ref": "recognizer-verbioOptions" }, + "customOptions": { "$ref": "recognizer-customOptions" } + }, + "required": ["vendor"], + "examples": [ + { + "vendor": "deepgram", + "language": "en-US", + "deepgramOptions": { + "model": "nova-2", + "smartFormatting": true, + "endpointing": 500 + } + }, + { + "vendor": "google", + "language": "en-US", + "hints": ["jambonz", "drachtio"], + "punctuation": true, + "enhancedModel": true + } + ] +} diff --git a/src/jambonz_sdk/schema/components/synthesizer.schema.json b/src/jambonz_sdk/schema/components/synthesizer.schema.json new file mode 100644 index 0000000..c938efc --- /dev/null +++ b/src/jambonz_sdk/schema/components/synthesizer.schema.json @@ -0,0 +1,82 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/synthesizer", + "title": "Synthesizer", + "description": "Configuration for text-to-speech synthesis. Specifies the TTS vendor, voice, language, and vendor-specific options. Can be set at the session level via the 'config' verb or overridden per-verb (e.g. on 'say').", + "type": "object", + "properties": { + "vendor": { + "type": "string", + "description": "The TTS vendor to use. Must match a vendor configured in the jambonz platform.", + "examples": ["google", "aws", "microsoft", "elevenlabs", "cartesia", "deepgram", "ibm", "nuance", "nvidia", "wellsaid", "whisper", "verbio", "custom"] + }, + "label": { + "type": "string", + "description": "An optional label identifying a specific credential set for this vendor. Used when multiple credentials are configured for the same vendor on the jambonz platform." + }, + "language": { + "type": "string", + "description": "The language code for speech synthesis, in BCP-47 format.", + "examples": ["en-US", "en-GB", "es-ES", "fr-FR", "de-DE"] + }, + "voice": { + "oneOf": [ + { "type": "string" }, + { "type": "object", "additionalProperties": true } + ], + "description": "The voice to use for synthesis. Format varies by vendor: Google uses voice names like 'en-US-Wavenet-D', AWS Polly uses names like 'Joanna', but ElevenLabs and Cartesia require voice IDs (alphanumeric strings like 'EXAVITQu4vr4xnSDxMaL'), not human-readable names. Some vendors accept an object for more complex voice configuration.", + "examples": ["en-US-Wavenet-D", "Joanna", "EXAVITQu4vr4xnSDxMaL"] + }, + "fallbackVendor": { + "type": "string", + "description": "A backup TTS vendor to use if the primary vendor fails or is unavailable." + }, + "fallbackLabel": { + "type": "string", + "description": "Credential label for the fallback vendor." + }, + "fallbackLanguage": { + "type": "string", + "description": "Language code to use with the fallback vendor." + }, + "fallbackVoice": { + "oneOf": [ + { "type": "string" }, + { "type": "object", "additionalProperties": true } + ], + "description": "Voice to use with the fallback vendor." + }, + "engine": { + "type": "string", + "description": "The synthesis engine tier to use. Availability depends on the vendor.", + "enum": ["standard", "neural", "generative", "long-form"] + }, + "gender": { + "type": "string", + "description": "Preferred voice gender. Used by some vendors (e.g. Google) when a specific voice is not specified.", + "enum": ["MALE", "FEMALE", "NEUTRAL"] + }, + "options": { + "type": "object", + "description": "Vendor-specific options passed through to the TTS provider. The structure depends on the vendor being used.", + "additionalProperties": true + } + }, + "required": ["vendor"], + "examples": [ + { + "vendor": "google", + "language": "en-US", + "voice": "en-US-Wavenet-D" + }, + { + "vendor": "elevenlabs", + "voice": "Rachel", + "options": { + "model_id": "eleven_turbo_v2", + "stability": 0.5, + "similarity_boost": 0.75 + } + } + ] +} diff --git a/src/jambonz_sdk/schema/components/target.schema.json b/src/jambonz_sdk/schema/components/target.schema.json new file mode 100644 index 0000000..6268cc6 --- /dev/null +++ b/src/jambonz_sdk/schema/components/target.schema.json @@ -0,0 +1,105 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/target", + "title": "Target", + "description": "A call target for the 'dial' verb. Specifies who or what to connect the call to: a phone number (PSTN), a SIP endpoint, a registered user, or a Microsoft Teams user.", + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The type of target to dial.", + "enum": ["phone", "sip", "user", "teams"] + }, + "number": { + "type": "string", + "description": "The phone number to dial. Required when type is 'phone'. Use E.164 format.", + "examples": ["+15085551212"] + }, + "sipUri": { + "type": "string", + "description": "The SIP URI to dial. Required when type is 'sip'.", + "examples": ["sip:alice@example.com"] + }, + "name": { + "type": "string", + "description": "The registered user name to dial. Required when type is 'user'. Also used as the display name for SIP targets." + }, + "tenant": { + "type": "string", + "description": "The Microsoft Teams tenant ID. Required when type is 'teams'." + }, + "trunk": { + "type": "string", + "description": "The SIP trunk to use for the outbound call. When specified, overrides the default carrier routing." + }, + "confirmHook": { + "oneOf": [ + { "type": "string", "format": "uri" }, + { "$ref": "actionHook" } + ], + "description": "A webhook to invoke when the target answers, before connecting the call. Use this to screen calls, play a whisper prompt, or require the target to press a key to accept." + }, + "method": { + "type": "string", + "description": "The HTTP method to use when invoking the confirmHook.", + "enum": ["GET", "POST"], + "default": "POST" + }, + "headers": { + "type": "object", + "description": "Custom SIP headers to include on the outbound INVITE. Keys are header names, values are header values.", + "additionalProperties": { "type": "string" } + }, + "from": { + "type": "object", + "description": "Override the From header on the outbound SIP INVITE.", + "properties": { + "user": { + "type": "string", + "description": "The user part of the SIP From URI." + }, + "host": { + "type": "string", + "description": "The host part of the SIP From URI." + } + } + }, + "auth": { + "$ref": "auth", + "description": "SIP authentication credentials for the outbound call, if the far end requires digest auth." + }, + "vmail": { + "type": "boolean", + "description": "If true, follow the call into voicemail if the target does not answer." + }, + "overrideTo": { + "type": "string", + "description": "Override the Request-URI on the outbound SIP INVITE. Useful when the Request-URI needs to differ from the To header." + }, + "proxy": { + "type": "string", + "description": "A SIP proxy to route the outbound call through, specified as a SIP URI.", + "examples": ["sip:proxy.example.com"] + } + }, + "required": ["type"], + "examples": [ + { + "type": "phone", + "number": "+15085551212" + }, + { + "type": "sip", + "sipUri": "sip:alice@example.com" + }, + { + "type": "user", + "name": "bob" + }, + { + "type": "teams", + "number": "+15085551212", + "tenant": "a]b]c]d]e" + } + ] +} diff --git a/src/jambonz_sdk/schema/components/vad.schema.json b/src/jambonz_sdk/schema/components/vad.schema.json new file mode 100644 index 0000000..d33c75a --- /dev/null +++ b/src/jambonz_sdk/schema/components/vad.schema.json @@ -0,0 +1,48 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/components/vad", + "title": "VAD", + "description": "Voice Activity Detection configuration. Controls how jambonz detects the presence or absence of speech on the audio channel. Used to determine speech start/end boundaries for recognition and barge-in.", + "type": "object", + "properties": { + "enable": { + "type": "boolean", + "description": "Whether to enable voice activity detection." + }, + "voiceMs": { + "type": "number", + "description": "Duration of voice activity (in milliseconds) required before speech is considered to have started.", + "examples": [250] + }, + "silenceMs": { + "type": "number", + "description": "Duration of silence (in milliseconds) required before speech is considered to have ended.", + "examples": [1000] + }, + "strategy": { + "type": "string", + "description": "The VAD strategy to use." + }, + "mode": { + "type": "number", + "description": "WebRTC VAD aggressiveness mode (0-3). Higher values are more aggressive at filtering non-speech. Only applies when vendor is 'webrtc'.", + "minimum": 0, + "maximum": 3 + }, + "vendor": { + "type": "string", + "description": "The VAD engine to use.", + "enum": ["webrtc", "silero"] + }, + "threshold": { + "type": "number", + "description": "Speech detection confidence threshold for Silero VAD. Value between 0 and 1, where higher values require greater confidence. Only applies when vendor is 'silero'.", + "minimum": 0, + "maximum": 1 + }, + "speechPadMs": { + "type": "number", + "description": "Padding in milliseconds added before and after detected speech segments. Prevents clipping utterance boundaries. Only applies when vendor is 'silero'." + } + } +} diff --git a/src/jambonz_sdk/schema/jambonz-app.schema.json b/src/jambonz_sdk/schema/jambonz-app.schema.json new file mode 100644 index 0000000..323da09 --- /dev/null +++ b/src/jambonz_sdk/schema/jambonz-app.schema.json @@ -0,0 +1,112 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/jambonz-app", + "title": "jambonz Application", + "description": "A jambonz application is an array of verbs that are executed sequentially to control a phone call. Each verb performs an action: speaking text, playing audio, collecting input, dialing a number, connecting to an AI model, etc. When a webhook (actionHook) is invoked, it must return a new verb array to continue call processing.\n\nThe execution model is simple: verbs execute one after another, top to bottom. When a verb with an actionHook completes (e.g. gather collects input), the actionHook is called and its response replaces the remaining verb stack. If the verb array is exhausted without a hangup, the call is terminated.\n\nThere are two transport modes for delivering verb arrays to jambonz:\n- **Webhook**: Your HTTP server receives POST/GET requests with call data and returns JSON verb arrays in the response body.\n- **WebSocket**: Your server maintains a persistent websocket connection with jambonz and sends/receives verb arrays as JSON messages. Required for real-time features like LLM conversations.\n\nThe verb schemas and JSON structure are identical regardless of transport mode.", + "type": "array", + "items": { + "$ref": "#/$defs/Verb" + }, + "minItems": 1, + "$defs": { + "Verb": { + "oneOf": [ + { "$ref": "verbs/answer" }, + { "$ref": "verbs/alert" }, + { "$ref": "verbs/config" }, + { "$ref": "verbs/say" }, + { "$ref": "verbs/play" }, + { "$ref": "verbs/gather" }, + { "$ref": "verbs/dial" }, + { "$ref": "verbs/listen" }, + { "$ref": "verbs/stream" }, + { "$ref": "verbs/llm" }, + { "$ref": "verbs/s2s" }, + { "$ref": "verbs/openai_s2s" }, + { "$ref": "verbs/google_s2s" }, + { "$ref": "verbs/elevenlabs_s2s" }, + { "$ref": "verbs/deepgram_s2s" }, + { "$ref": "verbs/ultravox_s2s" }, + { "$ref": "verbs/dialogflow" }, + { "$ref": "verbs/pipeline" }, + { "$ref": "verbs/conference" }, + { "$ref": "verbs/transcribe" }, + { "$ref": "verbs/enqueue" }, + { "$ref": "verbs/dequeue" }, + { "$ref": "verbs/dtmf" }, + { "$ref": "verbs/dub" }, + { "$ref": "verbs/hangup" }, + { "$ref": "verbs/leave" }, + { "$ref": "verbs/message" }, + { "$ref": "verbs/pause" }, + { "$ref": "verbs/redirect" }, + { "$ref": "verbs/tag" }, + { "$ref": "verbs/sip:decline" }, + { "$ref": "verbs/sip:request" }, + { "$ref": "verbs/sip:refer" } + ], + "discriminator": { + "propertyName": "verb" + } + } + }, + "examples": [ + [ + { + "verb": "config", + "synthesizer": { "vendor": "elevenlabs", "voice": "EXAVITQu4vr4xnSDxMaL", "language": "en-US" }, + "recognizer": { "vendor": "deepgram", "language": "en-US" } + }, + { + "verb": "say", + "text": "Hello! Welcome to Acme Corp. How can I help you today?" + }, + { + "verb": "gather", + "input": ["speech"], + "actionHook": "/process-input", + "timeout": 15, + "say": { "text": "I'm listening." } + } + ], + [ + { + "verb": "say", + "text": "Please hold while I connect you to an agent." + }, + { + "verb": "dial", + "target": [{ "type": "phone", "number": "+15085551212" }], + "answerOnBridge": true, + "timeout": 30, + "actionHook": "/dial-complete" + }, + { + "verb": "say", + "text": "Sorry, the agent is not available. Please try again later." + }, + { + "verb": "hangup" + } + ], + [ + { + "verb": "config", + "synthesizer": { "vendor": "cartesia", "voice": "sonic-english" }, + "recognizer": { "vendor": "deepgram", "language": "en-US" } + }, + { + "verb": "openai_s2s", + "model": "gpt-4o", + "llmOptions": { + "messages": [ + { "role": "system", "content": "You are a helpful customer service agent for Acme Corp. Be concise and friendly." } + ], + "temperature": 0.7 + }, + "actionHook": "/llm-complete", + "toolHook": "/llm-tool" + } + ] + ] +} diff --git a/src/jambonz_sdk/schema/verbs/alert.schema.json b/src/jambonz_sdk/schema/verbs/alert.schema.json new file mode 100644 index 0000000..3ae3832 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/alert.schema.json @@ -0,0 +1,34 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/alert", + "minVersion": "0.9.6", + "title": "Alert", + "description": "Sends a 180 Ringing provisional response with an Alert-Info header. Used to trigger a specific ring tone or alert behavior on the caller's device before the call is answered.", + "type": "object", + "properties": { + "verb": { + "const": "alert" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "message": { + "type": "string", + "description": "The value to include in the Alert-Info header.", + "examples": [ + "info=alert-internal", + "http://example.com/ringtone.wav" + ] + } + }, + "required": [ + "message" + ], + "examples": [ + { + "verb": "alert", + "message": "info=alert-internal" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/answer.schema.json b/src/jambonz_sdk/schema/verbs/answer.schema.json new file mode 100644 index 0000000..dea0b61 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/answer.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/answer", + "minVersion": "0.9.6", + "title": "Answer", + "description": "Answers an incoming call (sends a 200 OK to the SIP INVITE). Most verbs implicitly answer the call, so this verb is only needed when you want to explicitly control when the call is answered — for example, to play early media before answering.", + "type": "object", + "properties": { + "verb": { + "const": "answer" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + } + }, + "examples": [ + { + "verb": "answer" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/conference.schema.json b/src/jambonz_sdk/schema/verbs/conference.schema.json new file mode 100644 index 0000000..55654d3 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/conference.schema.json @@ -0,0 +1,107 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/conference", + "minVersion": "0.9.6", + "title": "Conference", + "description": "Places the caller into a multi-party conference room. Multiple callers in the same named conference can speak to each other. Supports features like muting, recording, waiting rooms, and participant limits.", + "type": "object", + "properties": { + "verb": { + "const": "conference" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "name": { + "type": "string", + "description": "The name of the conference room. All callers joining the same named conference are connected together.", + "examples": [ + "team-standup", + "customer-call-12345" + ] + }, + "beep": { + "type": "boolean", + "description": "If true, play a beep when participants join or leave." + }, + "memberTag": { + "type": "string", + "description": "A tag to identify this participant. Can be used to target specific members for actions like muting or whispering." + }, + "speakOnlyTo": { + "type": "string", + "description": "If set, this participant's audio is only heard by the member with the specified memberTag. Creates a private whisper channel." + }, + "startConferenceOnEnter": { + "type": "boolean", + "description": "If true (default), the conference starts when this participant joins. If false, this participant waits silently until a participant with startConferenceOnEnter=true joins." + }, + "endConferenceOnExit": { + "type": "boolean", + "description": "If true, the conference ends for all participants when this participant leaves." + }, + "endConferenceDuration": { + "type": "number", + "description": "Maximum duration of the conference in seconds." + }, + "maxParticipants": { + "type": "number", + "description": "Maximum number of participants allowed in the conference." + }, + "joinMuted": { + "type": "boolean", + "description": "If true, this participant joins the conference muted." + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when this participant leaves the conference." + }, + "waitHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked while this participant is waiting for the conference to start. Should return verbs to play (e.g. hold music)." + }, + "statusEvents": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of conference events to receive via the statusHook." + }, + "statusHook": { + "$ref": "../components/actionHook", + "description": "A webhook to receive conference status events (joins, leaves, etc.)." + }, + "enterHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when this participant first enters the conference." + }, + "record": { + "type": "object", + "description": "Recording configuration for the conference.", + "additionalProperties": true + }, + "listen": { + "type": "object", + "description": "Audio streaming configuration for the conference.", + "additionalProperties": true + }, + "distributeDtmf": { + "type": "boolean", + "description": "If true, DTMF events from this participant are distributed to all other participants." + } + }, + "required": [ + "name" + ], + "examples": [ + { + "verb": "conference", + "name": "team-standup", + "beep": true, + "startConferenceOnEnter": true, + "endConferenceOnExit": false, + "statusHook": "/conference-events" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/config.schema.json b/src/jambonz_sdk/schema/verbs/config.schema.json new file mode 100644 index 0000000..0e40c87 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/config.schema.json @@ -0,0 +1,218 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/config", + "minVersion": "0.9.6", + "title": "Config", + "description": "Sets session-level defaults for the call. Configures default TTS, STT, VAD, recording, streaming, and other session-wide settings. These defaults apply to all subsequent verbs unless overridden at the verb level. Typically the first verb in an application. Can be used multiple times during a call to change settings.", + "type": "object", + "properties": { + "verb": { + "const": "config", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "synthesizer": { + "$ref": "../components/synthesizer", + "description": "Default TTS configuration for the session." + }, + "recognizer": { + "$ref": "../components/recognizer", + "description": "Default STT configuration for the session." + }, + "bargeIn": { + "type": "object", + "description": "Default barge-in configuration. When enabled, callers can interrupt playing prompts with speech or DTMF.", + "properties": { + "enable": { + "type": "boolean" + }, + "sticky": { + "type": "boolean", + "description": "If true, barge-in settings persist across verbs rather than resetting after each verb." + }, + "actionHook": { + "$ref": "../components/actionHook" + }, + "input": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "speech", + "digits" + ] + } + }, + "minBargeinWordCount": { + "type": "number" + } + } + }, + "ttsStream": { + "type": "object", + "description": "Default TTS streaming configuration for the session.", + "properties": { + "enable": { + "type": "boolean" + }, + "synthesizer": { + "$ref": "../components/synthesizer" + } + } + }, + "record": { + "type": "object", + "description": "Session-level call recording configuration.", + "additionalProperties": true + }, + "listen": { + "$ref": "listen", + "description": "Nested listen verb — session-level audio streaming configuration." + }, + "stream": { + "$ref": "stream", + "description": "Nested stream verb — session-level audio streaming configuration. Alias for 'listen'." + }, + "transcribe": { + "$ref": "transcribe", + "description": "Nested transcribe verb — session-level real-time transcription configuration." + }, + "amd": { + "$ref": "../components/amd", + "description": "Session-level answering machine detection configuration." + }, + "fillerNoise": { + "$ref": "../components/fillerNoise", + "description": "Default filler noise configuration for the session." + }, + "vad": { + "$ref": "../components/vad", + "description": "Default voice activity detection configuration for the session." + }, + "notifyEvents": { + "type": "boolean", + "description": "If true, send call events (e.g. DTMF, call status changes) to the application via the status webhook." + }, + "notifySttLatency": { + "type": "boolean", + "description": "If true, include STT latency measurements in webhook payloads." + }, + "reset": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Reset specific session-level settings to their defaults. Pass a setting name or array of setting names to reset." + }, + "onHoldMusic": { + "type": "string", + "format": "uri", + "description": "URL of an audio file to play when the call is placed on hold." + }, + "actionHookDelayAction": { + "$ref": "../components/actionHookDelayAction", + "description": "Default configuration for handling slow webhook responses." + }, + "sipRequestWithinDialogHook": { + "$ref": "../components/actionHook", + "description": "A webhook to invoke when a SIP request (e.g. INFO, NOTIFY) is received within the dialog." + }, + "boostAudioSignal": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "description": "Boost (or attenuate) the audio signal in dB for the session." + }, + "referHook": { + "$ref": "../components/actionHook", + "description": "A webhook to invoke when a SIP REFER request is received." + }, + "earlyMedia": { + "type": "boolean", + "description": "If true, allow early media (audio before call answer) for the session." + }, + "autoStreamTts": { + "type": "boolean", + "description": "If true, automatically use streaming TTS for all 'say' verbs in the session." + }, + "disableTtsCache": { + "type": "boolean", + "description": "If true, disable TTS caching for the session." + }, + "trackTtsPlayout": { + "type": "boolean", + "description": "If true, report the actual text spoken via TTS. Requires a TTS vendor that supports alignment data (e.g. ElevenLabs). On each utterance completion or interruption, a tts_spoken event is sent to the '/streaming-event' endpoint with fields: 'text' (string — the text actually spoken) and 'bargein' (boolean — true if the user interrupted before TTS finished). See the tts-streaming-event callback schema for full details." + }, + "noiseIsolation": { + "type": "object", + "description": "Noise isolation configuration to reduce background noise on call audio. Defaults to filtering inbound (caller) audio; can also filter outbound audio via the direction option.", + "properties": { + "enable": { + "type": "boolean" + }, + "vendor": { + "type": "string" + }, + "level": { + "type": "number" + }, + "model": { + "type": "string" + } + } + }, + "turnTaking": { + "type": "object", + "description": "Turn-taking detection configuration for conversational AI applications.", + "properties": { + "enable": { + "type": "boolean" + }, + "vendor": { + "type": "string" + }, + "threshold": { + "type": "number" + }, + "model": { + "type": "string" + } + } + } + }, + "required": [], + "examples": [ + { + "verb": "config", + "synthesizer": { + "vendor": "elevenlabs", + "voice": "Rachel", + "language": "en-US" + }, + "recognizer": { + "vendor": "deepgram", + "language": "en-US" + }, + "fillerNoise": { + "enable": true, + "url": "https://example.com/sounds/typing.wav", + "startDelaySecs": 2 + } + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/deepgram_s2s.schema.json b/src/jambonz_sdk/schema/verbs/deepgram_s2s.schema.json new file mode 100644 index 0000000..ffd4054 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/deepgram_s2s.schema.json @@ -0,0 +1,81 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/deepgram_s2s", + "minVersion": "10.1.0", + "title": "Deepgram S2S", + "description": "Shortcut for 'llm' with vendor automatically set to 'deepgram'. Connects the caller to a Deepgram model for real-time speech-to-speech voice conversation.", + "type": "object", + "allOf": [ + { + "$ref": "../components/llm-base" + } + ], + "properties": { + "verb": { + "const": "deepgram_s2s", + "description": "The verb name." + }, + "vendor": { + "type": "string", + "const": "deepgram", + "description": "The LLM vendor (always 'deepgram' for this shortcut)." + }, + "llmOptions": { + "type": "object", + "description": "IMPORTANT: Deepgram does NOT use a 'messages' array. The llmOptions must contain a 'Settings' object with 'agent.think' (LLM provider, model, and prompt) and 'agent.speak' (TTS provider and voice model). The system prompt goes in Settings.agent.think.prompt, NOT in messages.", + "additionalProperties": true, + "examples": [ + { + "Settings": { + "agent": { + "think": { + "provider": { + "type": "open_ai", + "model": "gpt-4o" + }, + "prompt": "You are a helpful voice assistant." + }, + "speak": { + "provider": { + "type": "deepgram", + "model": "aura-2-thalia-en" + } + } + } + } + } + ] + } + }, + "required": [ + "llmOptions" + ], + "examples": [ + { + "verb": "deepgram_s2s", + "auth": { + "apiKey": "your-deepgram-api-key" + }, + "llmOptions": { + "Settings": { + "agent": { + "think": { + "provider": { + "type": "open_ai", + "model": "gpt-4o" + }, + "prompt": "You are a helpful voice assistant." + }, + "speak": { + "provider": { + "type": "deepgram", + "model": "aura-2-thalia-en" + } + } + } + } + }, + "actionHook": "/s2s-complete" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/dequeue.schema.json b/src/jambonz_sdk/schema/verbs/dequeue.schema.json new file mode 100644 index 0000000..b58efe8 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/dequeue.schema.json @@ -0,0 +1,51 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/dequeue", + "minVersion": "0.9.6", + "title": "Dequeue", + "description": "Removes a caller from a named queue and bridges them to the current call. Typically used by an agent or operator call flow to connect with the next waiting caller.", + "type": "object", + "properties": { + "verb": { + "const": "dequeue" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "name": { + "type": "string", + "description": "The name of the queue to dequeue from.", + "examples": [ + "support", + "sales" + ] + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the dequeued call ends." + }, + "timeout": { + "type": "number", + "description": "Time in seconds to wait for a caller to be available in the queue." + }, + "beep": { + "type": "boolean", + "description": "If true, play a beep when the calls are connected." + }, + "callSid": { + "type": "string", + "description": "Dequeue a specific call by its call SID, rather than the next caller in line." + } + }, + "required": [ + "name" + ], + "examples": [ + { + "verb": "dequeue", + "name": "support", + "beep": true + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/dial.schema.json b/src/jambonz_sdk/schema/verbs/dial.schema.json new file mode 100644 index 0000000..54a85af --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/dial.schema.json @@ -0,0 +1,187 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/dial", + "minVersion": "0.9.6", + "title": "Dial", + "description": "Initiates an outbound call to one or more targets and bridges the caller to the first target that answers. Targets can be phone numbers (PSTN), SIP endpoints, registered users, or Microsoft Teams users. Supports simultaneous ringing, call screening, recording, and DTMF capture during the bridged call.", + "type": "object", + "properties": { + "verb": { + "const": "dial", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "target": { + "type": "array", + "items": { + "$ref": "../components/target" + }, + "description": "One or more call targets to dial. If multiple targets are specified, they are rung simultaneously and the first to answer is connected. The rest are canceled.", + "minItems": 1 + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the dialed call ends. Receives call disposition details (duration, who hung up, etc.) and should return the next verbs to execute." + }, + "onHoldHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the call is placed on hold. Should return verbs to execute (e.g. play hold music) while the caller is holding." + }, + "answerOnBridge": { + "type": "boolean", + "description": "If true, delay answering the inbound call until the outbound leg is answered. This allows the caller to hear ringing until the target picks up, and avoids billing the caller for unanswered outbound attempts." + }, + "callerId": { + "type": "string", + "description": "The caller ID (phone number) to present on the outbound call. Overrides the default caller ID.", + "examples": [ + "+15085551212" + ] + }, + "callerName": { + "type": "string", + "description": "The caller display name to present on the outbound call." + }, + "confirmHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when a target answers, before the call is bridged. Used for call screening — the webhook can return verbs (e.g. a 'say' prompt and 'gather') to confirm the callee wants to accept the call." + }, + "referHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when a SIP REFER is received on the bridged call. Allows handling call transfers initiated by the far end." + }, + "dialMusic": { + "type": "string", + "format": "uri", + "description": "URL of an audio file to play to the caller while the outbound call is ringing. Replaces the default ringback tone." + }, + "dtmfCapture": { + "type": "object", + "description": "Configuration for capturing DTMF digits during the bridged call. Keys are DTMF patterns to capture, values are configuration for each.", + "additionalProperties": true + }, + "dtmfHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when a captured DTMF pattern is detected during the bridged call." + }, + "headers": { + "type": "object", + "description": "Custom SIP headers to include on the outbound INVITE.", + "additionalProperties": { + "type": "string" + } + }, + "anchorMedia": { + "type": "boolean", + "description": "If true, keep media anchored through the jambonz media server even if a direct media path is possible. Required for features like recording, listen, and DTMF capture during bridged calls." + }, + "exitMediaPath": { + "type": "boolean", + "description": "If true, remove jambonz from the media path after the call is bridged. Reduces latency but disables mid-call features like recording and DTMF capture." + }, + "boostAudioSignal": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "description": "Boost (or attenuate) the audio signal in dB. Positive values increase volume, negative values decrease it.", + "examples": [ + 6, + -3 + ] + }, + "listen": { + "$ref": "listen", + "description": "Nested listen verb — streams audio of the bridged call to a websocket endpoint." + }, + "stream": { + "$ref": "stream", + "description": "Nested stream verb — streams audio of the bridged call. Alias for 'listen'." + }, + "transcribe": { + "$ref": "transcribe", + "description": "Nested transcribe verb — enables real-time transcription of the bridged call." + }, + "timeLimit": { + "type": "number", + "description": "Maximum duration in seconds for the bridged call. The call is automatically hung up when this limit is reached.", + "examples": [ + 3600 + ] + }, + "timeout": { + "type": "number", + "description": "Time in seconds to wait for the target to answer before giving up.", + "examples": [ + 30, + 60 + ] + }, + "proxy": { + "type": "string", + "description": "A SIP proxy to route the outbound call through.", + "examples": [ + "sip:proxy.example.com" + ] + }, + "amd": { + "$ref": "../components/amd", + "description": "Answering machine detection configuration. When enabled, jambonz attempts to determine whether the call was answered by a human or a machine." + }, + "dub": { + "type": "array", + "items": { + "$ref": "dub" + }, + "description": "Nested dub verbs — audio dubbing configuration for mixing additional audio tracks into the bridged call." + }, + "tag": { + "type": "object", + "description": "Arbitrary metadata to attach to this call leg. Included in subsequent webhook invocations and CDRs.", + "additionalProperties": true + }, + "forwardPAI": { + "type": "boolean", + "description": "If true, forward the P-Asserted-Identity header from the inbound call to the outbound call." + } + }, + "required": [ + "target" + ], + "examples": [ + { + "verb": "dial", + "target": [ + { + "type": "phone", + "number": "+15085551212" + } + ], + "answerOnBridge": true, + "timeout": 30, + "actionHook": "/dial-complete" + }, + { + "verb": "dial", + "target": [ + { + "type": "sip", + "sipUri": "sip:alice@example.com" + }, + { + "type": "sip", + "sipUri": "sip:bob@example.com" + } + ], + "confirmHook": "/screen-call", + "timeLimit": 3600 + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/dialogflow.schema.json b/src/jambonz_sdk/schema/verbs/dialogflow.schema.json new file mode 100644 index 0000000..a02b347 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/dialogflow.schema.json @@ -0,0 +1,148 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/dialogflow", + "minVersion": "0.9.6", + "title": "Dialogflow", + "description": "Connects the caller to a Google Dialogflow agent for a voice conversation. Supports Dialogflow ES, CX, and CES models. The caller speaks and Dialogflow handles intent detection and response generation.", + "type": "object", + "properties": { + "verb": { + "const": "dialogflow", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "credentials": { + "oneOf": [ + { + "type": "object", + "additionalProperties": true + }, + { + "type": "string" + } + ], + "description": "Google service account credentials as a JSON object or stringified JSON." + }, + "project": { + "type": "string", + "description": "The Google Cloud project ID." + }, + "agent": { + "type": "string", + "description": "The Dialogflow agent ID. Required for CX agents." + }, + "environment": { + "type": "string", + "description": "The Dialogflow environment to use." + }, + "region": { + "type": "string", + "description": "The Google Cloud region for the Dialogflow API endpoint." + }, + "model": { + "type": "string", + "enum": [ + "es", + "cx", + "ces" + ], + "description": "The Dialogflow model type: 'es' for Dialogflow ES, 'cx' for Dialogflow CX, 'ces' for Dialogflow CES." + }, + "lang": { + "type": "string", + "description": "The language code for the conversation (e.g. 'en-US')." + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the Dialogflow session ends." + }, + "eventHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked for Dialogflow events during the conversation." + }, + "events": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of event types to receive via the eventHook." + }, + "welcomeEvent": { + "type": "string", + "description": "A Dialogflow event to trigger at the start of the conversation (e.g. 'welcome')." + }, + "welcomeEventParams": { + "type": "object", + "description": "Parameters to pass with the welcome event.", + "additionalProperties": true + }, + "noInputTimeout": { + "type": "number", + "description": "Seconds to wait for caller input before triggering the no-input event." + }, + "noInputEvent": { + "type": "string", + "description": "Dialogflow event to trigger when no input is received within the timeout." + }, + "passDtmfAsTextInput": { + "type": "boolean", + "description": "If true, pass DTMF digits to Dialogflow as text input." + }, + "thinkingMusic": { + "type": "string", + "description": "URL of an audio file to play while waiting for Dialogflow to respond." + }, + "tts": { + "$ref": "../components/synthesizer", + "description": "TTS configuration for Dialogflow responses." + }, + "bargein": { + "type": "boolean", + "description": "If true, allow the caller to interrupt Dialogflow responses with speech." + }, + "queryInput": { + "type": "object", + "description": "Initial query input to send to Dialogflow.", + "properties": { + "text": { + "type": "string", + "description": "Text input." + }, + "intent": { + "type": "string", + "description": "Intent to trigger." + }, + "event": { + "type": "string", + "description": "Event to trigger." + }, + "dtmf": { + "type": "string", + "description": "DTMF input." + } + }, + "additionalProperties": false + } + }, + "required": [ + "project", + "credentials", + "lang" + ], + "examples": [ + { + "verb": "dialogflow", + "project": "my-gcp-project", + "credentials": "{\"type\": \"service_account\", \"project_id\": \"my-gcp-project\"}", + "lang": "en-US", + "model": "cx", + "agent": "my-agent-id", + "welcomeEvent": "welcome", + "actionHook": "/dialogflow-action", + "eventHook": "/dialogflow-event" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/dtmf.schema.json b/src/jambonz_sdk/schema/verbs/dtmf.schema.json new file mode 100644 index 0000000..fe0ea13 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/dtmf.schema.json @@ -0,0 +1,49 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/dtmf", + "minVersion": "0.9.6", + "title": "DTMF", + "description": "Sends DTMF tones on the call. Used to interact with IVR systems on the far end, or to signal systems that respond to DTMF.", + "type": "object", + "properties": { + "verb": { + "const": "dtmf" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "dtmf": { + "type": "string", + "description": "The DTMF digits to send. Valid characters are 0-9, *, #, and A-D. Use 'w' for a 500ms pause between digits.", + "examples": [ + "1234#", + "1w2w3", + "5551212" + ] + }, + "duration": { + "type": "number", + "description": "Duration in milliseconds for each DTMF tone.", + "default": 500, + "examples": [ + 250, + 500 + ] + } + }, + "required": [ + "dtmf" + ], + "examples": [ + { + "verb": "dtmf", + "dtmf": "1234#" + }, + { + "verb": "dtmf", + "dtmf": "1w2w3w4", + "duration": 250 + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/dub.schema.json b/src/jambonz_sdk/schema/verbs/dub.schema.json new file mode 100644 index 0000000..ca83414 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/dub.schema.json @@ -0,0 +1,103 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/dub", + "minVersion": "0.9.6", + "title": "Dub", + "description": "Manages audio dubbing tracks on a call. Allows adding, removing, and controlling auxiliary audio tracks that are mixed into the call audio. Used for background music, coaching whispers, or injecting audio from external sources.", + "type": "object", + "properties": { + "verb": { + "const": "dub" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "action": { + "type": "string", + "description": "The dubbing action to perform.", + "enum": [ + "addTrack", + "removeTrack", + "silenceTrack", + "playOnTrack", + "sayOnTrack" + ] + }, + "track": { + "type": "string", + "description": "The name of the audio track. Used to reference the track in subsequent dub actions.", + "examples": [ + "background-music", + "coach-whisper" + ] + }, + "play": { + "type": "string", + "format": "uri", + "description": "URL of an audio file to play on the track. Used with 'playOnTrack' action." + }, + "say": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": true + } + ], + "description": "Text to synthesize and play on the track. Used with 'sayOnTrack' action. Can be a string or a say configuration object." + }, + "loop": { + "type": "boolean", + "description": "If true, loop the audio on the track continuously." + }, + "gain": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "description": "Audio gain for the track in dB. Use negative values to reduce volume.", + "examples": [ + -10, + 0, + 6 + ] + } + }, + "required": [ + "action", + "track" + ], + "examples": [ + { + "verb": "dub", + "action": "addTrack", + "track": "bgm" + }, + { + "verb": "dub", + "action": "playOnTrack", + "track": "bgm", + "play": "https://example.com/music.mp3", + "loop": true, + "gain": -15 + }, + { + "verb": "dub", + "action": "sayOnTrack", + "track": "coach", + "say": "Ask about their budget" + }, + { + "verb": "dub", + "action": "removeTrack", + "track": "bgm" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/elevenlabs_s2s.schema.json b/src/jambonz_sdk/schema/verbs/elevenlabs_s2s.schema.json new file mode 100644 index 0000000..c20bbc6 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/elevenlabs_s2s.schema.json @@ -0,0 +1,81 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/elevenlabs_s2s", + "minVersion": "10.1.0", + "title": "ElevenLabs S2S", + "description": "Shortcut for 'llm' with vendor automatically set to 'elevenlabs'. Connects the caller to an ElevenLabs Conversational AI agent for real-time speech-to-speech voice conversation. Unlike other s2s vendors, ElevenLabs requires a pre-configured agent_id rather than a model and messages.", + "type": "object", + "allOf": [ + { + "$ref": "../components/llm-base" + } + ], + "properties": { + "verb": { + "const": "elevenlabs_s2s", + "description": "The verb name." + }, + "vendor": { + "type": "string", + "const": "elevenlabs", + "description": "The LLM vendor (always 'elevenlabs' for this shortcut)." + }, + "auth": { + "type": "object", + "description": "Authentication credentials for ElevenLabs. Requires agent_id; api_key is optional (if not provided, an unsigned URL is used).", + "properties": { + "agent_id": { + "type": "string", + "description": "The ElevenLabs Conversational AI agent ID. Required." + }, + "api_key": { + "type": "string", + "description": "The ElevenLabs API key. Optional; when provided, a signed URL is used for the WebSocket connection." + } + }, + "required": [ + "agent_id" + ] + }, + "llmOptions": { + "type": "object", + "description": "Options for the ElevenLabs conversation session.", + "properties": { + "conversation_initiation_client_data": { + "type": "object", + "description": "Optional data sent to the agent when the conversation starts.", + "additionalProperties": true + }, + "input_sample_rate": { + "type": "integer", + "description": "Audio input sample rate in Hz.", + "default": 16000 + }, + "output_sample_rate": { + "type": "integer", + "description": "Audio output sample rate in Hz.", + "default": 16000 + } + }, + "additionalProperties": true + } + }, + "required": [ + "auth" + ], + "examples": [ + { + "verb": "elevenlabs_s2s", + "auth": { + "agent_id": "your-elevenlabs-agent-id", + "api_key": "your-elevenlabs-api-key" + }, + "llmOptions": {}, + "actionHook": "/s2s-complete", + "eventHook": "/event", + "events": [ + "all" + ] + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/enqueue.schema.json b/src/jambonz_sdk/schema/verbs/enqueue.schema.json new file mode 100644 index 0000000..c0cf5df --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/enqueue.schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/enqueue", + "minVersion": "0.9.6", + "title": "Enqueue", + "description": "Places the caller into a named call queue. While in the queue, the caller hears content returned by the waitHook (typically hold music or position announcements). The caller remains in the queue until dequeued by another call or process.", + "type": "object", + "properties": { + "verb": { + "const": "enqueue" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "name": { + "type": "string", + "description": "The name of the queue to place the caller in. Queues are created implicitly when first referenced.", + "examples": [ + "support", + "sales" + ] + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the caller leaves the queue (either dequeued or hung up). Should return the next verbs to execute." + }, + "waitHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked immediately when the caller enters the queue and periodically while waiting. Should return verbs to play to the caller (e.g. hold music, queue position announcements)." + }, + "priority": { + "type": "number", + "description": "The priority of this caller in the queue. Lower numbers are higher priority and are dequeued first.", + "examples": [ + 1, + 5, + 10 + ] + } + }, + "required": [ + "name" + ], + "examples": [ + { + "verb": "enqueue", + "name": "support", + "waitHook": "/queue-wait", + "actionHook": "/queue-exit" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/gather.schema.json b/src/jambonz_sdk/schema/verbs/gather.schema.json new file mode 100644 index 0000000..cc08fd5 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/gather.schema.json @@ -0,0 +1,188 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/gather", + "minVersion": "0.9.6", + "title": "Gather", + "description": "Collects user input via speech (STT) and/or DTMF digits. Optionally plays a prompt (using nested 'say' or 'play') while listening. When input is received, the result is sent to the actionHook which should return the next set of verbs. This is the primary verb for building interactive voice menus and conversational flows.", + "type": "object", + "properties": { + "verb": { + "const": "gather", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "Invoked when the gather completes. The payload includes 'reason' ('speechDetected', 'dtmfDetected', or 'timeout'), 'speech' (object with alternatives[].transcript and alternatives[].confidence when reason is speechDetected), and 'digits' (string when reason is dtmfDetected). In webhook mode this is a URL that receives an HTTP POST. In WebSocket mode this is an event name — use session.on('/hookName', (evt) => {...}) and respond with session.reply()." + }, + "input": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "speech", + "digits" + ] + }, + "description": "The types of input to accept. Can include 'speech' (STT), 'digits' (DTMF), or both.", + "default": [ + "digits" + ], + "examples": [ + [ + "speech", + "digits" + ], + [ + "speech" + ], + [ + "digits" + ] + ] + }, + "finishOnKey": { + "type": "string", + "description": "A DTMF key that signals the end of digit input. The key itself is not included in the collected digits.", + "examples": [ + "#", + "*" + ] + }, + "numDigits": { + "type": "number", + "description": "Exact number of DTMF digits to collect. Gather completes automatically when this many digits are received." + }, + "minDigits": { + "type": "number", + "description": "Minimum number of DTMF digits required." + }, + "maxDigits": { + "type": "number", + "description": "Maximum number of DTMF digits to collect." + }, + "interDigitTimeout": { + "type": "number", + "description": "Time in seconds to wait between DTMF digits before considering input complete.", + "examples": [ + 5 + ] + }, + "speechTimeout": { + "type": "number", + "description": "Time in seconds of silence after speech before considering the utterance complete.", + "examples": [ + 2, + 3 + ] + }, + "timeout": { + "type": "number", + "description": "Overall timeout in seconds. If no input is received within this time, the gather completes with no input and the actionHook is invoked.", + "examples": [ + 10, + 30 + ] + }, + "partialResultHook": { + "$ref": "../components/actionHook", + "description": "A webhook to invoke with interim (partial) speech recognition results. Useful for providing real-time feedback or early processing." + }, + "listenDuringPrompt": { + "type": "boolean", + "description": "If true, listen for input while the prompt is playing. If false, only start listening after the prompt finishes.", + "default": true + }, + "dtmfBargein": { + "type": "boolean", + "description": "If true, DTMF input interrupts (barges in on) any playing prompt." + }, + "bargein": { + "type": "boolean", + "description": "If true, speech input interrupts (barges in on) any playing prompt." + }, + "minBargeinWordCount": { + "type": "number", + "description": "Minimum number of words that must be recognized before barge-in is triggered. Prevents brief noises from interrupting prompts.", + "examples": [ + 1, + 2 + ] + }, + "recognizer": { + "$ref": "../components/recognizer", + "description": "Override the session-level STT configuration for this gather." + }, + "say": { + "$ref": "say", + "description": "A nested 'say' verb to use as the prompt. Played to the caller while listening for input." + }, + "play": { + "$ref": "play", + "description": "A nested 'play' verb to use as the prompt. Played to the caller while listening for input." + }, + "fillerNoise": { + "$ref": "../components/fillerNoise", + "description": "Filler noise configuration while waiting for the actionHook to respond." + }, + "actionHookDelayAction": { + "$ref": "../components/actionHookDelayAction", + "description": "Configuration for interim actions while the actionHook is processing." + } + }, + "examples": [ + { + "verb": "gather", + "input": [ + "speech", + "digits" + ], + "actionHook": "/gather-result", + "timeout": 15, + "say": { + "text": "Please say or enter your account number." + } + }, + { + "verb": "gather", + "input": [ + "digits" + ], + "actionHook": "/menu-selection", + "numDigits": 1, + "say": { + "text": "Press 1 for sales, 2 for support, or 3 for billing." + } + }, + { + "verb": "gather", + "input": [ + "speech" + ], + "actionHook": "/process-speech", + "timeout": 20, + "bargein": true, + "recognizer": { + "vendor": "deepgram", + "language": "en-US", + "hints": [ + "account", + "balance", + "transfer", + "payment" + ] + }, + "say": { + "text": "How can I help you today?" + }, + "fillerNoise": { + "enable": true, + "url": "https://example.com/sounds/typing.wav", + "startDelaySecs": 2 + } + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/google_s2s.schema.json b/src/jambonz_sdk/schema/verbs/google_s2s.schema.json new file mode 100644 index 0000000..2558539 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/google_s2s.schema.json @@ -0,0 +1,42 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/google_s2s", + "minVersion": "10.1.0", + "title": "Google S2S", + "description": "Shortcut for 'llm' with vendor automatically set to 'google'. Connects the caller to a Google model for real-time speech-to-speech voice conversation.", + "type": "object", + "allOf": [ + { + "$ref": "../components/llm-base" + } + ], + "properties": { + "verb": { + "const": "google_s2s", + "description": "The verb name." + }, + "vendor": { + "type": "string", + "const": "google", + "description": "The LLM vendor (always 'google' for this shortcut)." + } + }, + "required": [ + "llmOptions" + ], + "examples": [ + { + "verb": "google_s2s", + "model": "gemini-2.0-flash", + "llmOptions": { + "messages": [ + { + "role": "system", + "content": "You are a helpful voice assistant." + } + ] + }, + "actionHook": "/s2s-complete" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/hangup.schema.json b/src/jambonz_sdk/schema/verbs/hangup.schema.json new file mode 100644 index 0000000..ef96d03 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/hangup.schema.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/hangup", + "minVersion": "0.9.6", + "title": "Hangup", + "description": "Terminates the call. Optionally includes custom SIP headers on the BYE request.", + "type": "object", + "properties": { + "verb": { + "const": "hangup", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "headers": { + "type": "object", + "description": "Custom SIP headers to include on the BYE request.", + "additionalProperties": { + "type": "string" + } + } + }, + "examples": [ + { + "verb": "hangup" + }, + { + "verb": "hangup", + "headers": { + "X-Reason": "call-complete" + } + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/leave.schema.json b/src/jambonz_sdk/schema/verbs/leave.schema.json new file mode 100644 index 0000000..523a743 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/leave.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/leave", + "minVersion": "0.9.6", + "title": "Leave", + "description": "Removes the caller from a conference or queue that they are currently in. Execution continues with the next verb in the application.", + "type": "object", + "properties": { + "verb": { + "const": "leave" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + } + }, + "examples": [ + { + "verb": "leave" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/listen.schema.json b/src/jambonz_sdk/schema/verbs/listen.schema.json new file mode 100644 index 0000000..863fadc --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/listen.schema.json @@ -0,0 +1,127 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/listen", + "minVersion": "0.9.6", + "title": "Listen", + "description": "Streams real-time call audio to an external websocket endpoint. The remote endpoint receives raw audio and can optionally send audio back (bidirectional). Used for custom speech processing, real-time analysis, AI agent integration, and recording to external systems.", + "type": "object", + "properties": { + "verb": { + "const": "listen", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "url": { + "type": "string", + "format": "uri", + "description": "The websocket URL to stream audio to.", + "examples": [ + "wss://myapp.example.com/audio-stream" + ] + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the listen session ends. Should return the next verbs to execute." + }, + "wsAuth": { + "$ref": "../components/auth", + "description": "Authentication credentials for the websocket connection." + }, + "mixType": { + "type": "string", + "description": "How to mix the audio channels when streaming. 'mono' sends a single mixed channel, 'stereo' sends caller and callee as separate left/right channels, 'mixed' sends both as a single mixed stream.", + "enum": [ + "mono", + "stereo", + "mixed" + ], + "default": "mono" + }, + "metadata": { + "type": "object", + "description": "Arbitrary metadata to send to the websocket endpoint in the initial connection message.", + "additionalProperties": true + }, + "sampleRate": { + "type": "number", + "description": "The audio sample rate in Hz.", + "examples": [ + 8000, + 16000, + 24000 + ], + "default": 8000 + }, + "finishOnKey": { + "type": "string", + "description": "A DTMF key that ends the listen session when pressed.", + "examples": [ + "#" + ] + }, + "maxLength": { + "type": "number", + "description": "Maximum duration in seconds for the listen session." + }, + "passDtmf": { + "type": "boolean", + "description": "If true, forward DTMF events to the websocket endpoint." + }, + "playBeep": { + "type": "boolean", + "description": "If true, play a beep tone before streaming begins." + }, + "disableBidirectionalAudio": { + "type": "boolean", + "description": "If true, disable receiving audio from the websocket endpoint. Audio flows only from the call to the websocket, not back." + }, + "bidirectionalAudio": { + "$ref": "../components/bidirectionalAudio", + "description": "Fine-grained configuration for bidirectional audio." + }, + "timeout": { + "type": "number", + "description": "Time in seconds to wait for audio activity before ending the listen session." + }, + "transcribe": { + "$ref": "transcribe", + "description": "Nested transcribe verb — enables simultaneous real-time transcription of the audio being streamed." + }, + "earlyMedia": { + "type": "boolean", + "description": "If true, begin streaming audio before the call is formally answered." + }, + "channel": { + "type": "number", + "description": "Specific audio channel to stream. Used when streaming a single channel of a multi-channel call." + } + }, + "required": [ + "url" + ], + "examples": [ + { + "verb": "listen", + "url": "wss://myapp.example.com/audio-stream", + "actionHook": "/listen-complete", + "sampleRate": 16000, + "mixType": "stereo" + }, + { + "verb": "listen", + "url": "wss://myapp.example.com/ai-agent", + "bidirectionalAudio": { + "enabled": true, + "streaming": true, + "sampleRate": 24000 + }, + "metadata": { + "callType": "support", + "language": "en-US" + } + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/llm.schema.json b/src/jambonz_sdk/schema/verbs/llm.schema.json new file mode 100644 index 0000000..e539ba1 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/llm.schema.json @@ -0,0 +1,44 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/llm", + "minVersion": "0.9.6", + "title": "LLM", + "description": "Connects the caller to a large language model for a real-time voice conversation. Handles the complete STT → LLM → TTS pipeline, including turn detection, interruption handling, and tool/function calling. The caller speaks naturally and the LLM responds via synthesized speech. This is the primary verb for building AI voice agents on jambonz.", + "type": "object", + "allOf": [ + { + "$ref": "../components/llm-base" + } + ], + "properties": { + "verb": { + "const": "llm", + "description": "The verb name." + } + }, + "required": [ + "vendor", + "llmOptions" + ], + "examples": [ + { + "verb": "llm", + "vendor": "openai", + "model": "gpt-4o", + "auth": { + "apiKey": "sk-..." + }, + "llmOptions": { + "messages": [ + { + "role": "system", + "content": "You are a helpful customer service agent. Be concise and friendly." + } + ], + "temperature": 0.7 + }, + "actionHook": "/llm-complete", + "toolHook": "/llm-tool-call" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/message.schema.json b/src/jambonz_sdk/schema/verbs/message.schema.json new file mode 100644 index 0000000..3ad1e7e --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/message.schema.json @@ -0,0 +1,82 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/message", + "minVersion": "0.9.6", + "title": "Message", + "description": "Sends an SMS or MMS message. Can be used during a voice call to send a text message to the caller or another party, or as a standalone action.", + "type": "object", + "properties": { + "verb": { + "const": "message" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "to": { + "type": "string", + "description": "The destination phone number in E.164 format.", + "examples": [ + "+15085551212" + ] + }, + "from": { + "type": "string", + "description": "The sender phone number in E.164 format. Must be a number provisioned on the jambonz platform.", + "examples": [ + "+15085559876" + ] + }, + "text": { + "type": "string", + "description": "The text content of the message." + }, + "media": { + "oneOf": [ + { + "type": "string", + "format": "uri" + }, + { + "type": "array", + "items": { + "type": "string", + "format": "uri" + } + } + ], + "description": "URL(s) of media to attach to the message (MMS). Can be images, audio, or video.", + "examples": [ + "https://example.com/images/receipt.png" + ] + }, + "carrier": { + "type": "string", + "description": "The messaging carrier to use. If not specified, the default carrier is used." + }, + "account_sid": { + "type": "string", + "description": "The account SID to use for sending. Defaults to the current account." + }, + "message_sid": { + "type": "string", + "description": "An optional message SID for tracking." + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the message send completes or fails." + } + }, + "required": [ + "to", + "from" + ], + "examples": [ + { + "verb": "message", + "to": "+15085551212", + "from": "+15085559876", + "text": "Your order has been confirmed. Order #12345." + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/openai_s2s.schema.json b/src/jambonz_sdk/schema/verbs/openai_s2s.schema.json new file mode 100644 index 0000000..a9fed5f --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/openai_s2s.schema.json @@ -0,0 +1,42 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/openai_s2s", + "minVersion": "10.1.0", + "title": "OpenAI S2S", + "description": "Shortcut for 'llm' with vendor automatically set to 'openai'. Connects the caller to an OpenAI model for real-time speech-to-speech voice conversation.", + "type": "object", + "allOf": [ + { + "$ref": "../components/llm-base" + } + ], + "properties": { + "verb": { + "const": "openai_s2s", + "description": "The verb name." + }, + "vendor": { + "type": "string", + "const": "openai", + "description": "The LLM vendor (always 'openai' for this shortcut)." + } + }, + "required": [ + "llmOptions" + ], + "examples": [ + { + "verb": "openai_s2s", + "model": "gpt-4o-realtime", + "llmOptions": { + "messages": [ + { + "role": "system", + "content": "You are a helpful voice assistant." + } + ] + }, + "actionHook": "/s2s-complete" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/pause.schema.json b/src/jambonz_sdk/schema/verbs/pause.schema.json new file mode 100644 index 0000000..bc2c50c --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/pause.schema.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/pause", + "minVersion": "0.9.6", + "title": "Pause", + "description": "Pauses execution for a specified number of seconds. The caller hears silence during the pause. Useful for adding delays between verbs.", + "type": "object", + "properties": { + "verb": { + "const": "pause", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "length": { + "type": "number", + "description": "The duration of the pause in seconds.", + "examples": [ + 1, + 2, + 5 + ] + } + }, + "required": [ + "length" + ], + "examples": [ + { + "verb": "pause", + "length": 2 + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/pipeline.schema.json b/src/jambonz_sdk/schema/verbs/pipeline.schema.json new file mode 100644 index 0000000..50f478b --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/pipeline.schema.json @@ -0,0 +1,240 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/pipeline", + "minVersion": "10.1.0", + "title": "Pipeline", + "description": "Configures a complete STT → LLM → TTS voice AI pipeline with integrated turn detection. Provides a higher-level abstraction than manually orchestrating the individual components. Optimized for building voice AI agents with proper turn-taking behavior.", + "type": "object", + "properties": { + "verb": { + "const": "pipeline" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "stt": { + "$ref": "../components/recognizer", + "description": "Speech-to-text configuration for the pipeline." + }, + "tts": { + "$ref": "../components/synthesizer", + "description": "Text-to-speech configuration for the pipeline." + }, + "turnDetection": { + "oneOf": [ + { + "type": "string", + "enum": ["stt", "krisp"], + "description": "Turn detection strategy shorthand. 'stt' uses the STT vendor's native signals (silence-based for most vendors; acoustic+semantic for deepgramflux, assemblyai, speechmatics). 'krisp' uses the Krisp acoustic end-of-turn model with default settings." + }, + { + "type": "object", + "description": "Turn detection configuration with tunable parameters.", + "properties": { + "mode": { + "type": "string", + "enum": ["krisp"], + "description": "Turn detection mode. Currently only 'krisp' supports object-form tuning." + }, + "threshold": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "Krisp end-of-turn confidence threshold (0.0–1.0). Lower values trigger earlier turn transitions. Default: 0.5" + }, + "model": { + "type": "string", + "description": "Optional Krisp model name override." + } + }, + "required": ["mode"], + "additionalProperties": false + } + ], + "default": "stt", + "description": "Turn detection strategy. Controls when the pipeline decides the user has finished speaking. STT vendors with native turn-taking (deepgramflux, assemblyai, speechmatics) always use their built-in detection regardless of this setting." + }, + "bargeIn": { + "type": "object", + "description": "Controls whether and how the user can interrupt the assistant while it is speaking.", + "properties": { + "enable": { + "type": "boolean", + "description": "Allow the user to interrupt the assistant while it is speaking. Default: true.", + "default": true + }, + "minSpeechDuration": { + "type": "number", + "minimum": 0, + "description": "Seconds of detected speech required before confirming an interruption. Prevents brief noises from cutting off the assistant. Default: 0.5", + "default": 0.5 + }, + "sticky": { + "type": "boolean", + "description": "If true, once the user interrupts the assistant does not resume speaking. Default: false.", + "default": false + } + }, + "additionalProperties": false + }, + "noResponseTimeout": { + "type": "number", + "minimum": 0, + "description": "Seconds to wait after the assistant finishes speaking before prompting the user to respond. 0 disables. Default: 0.", + "default": 0 + }, + "llm": { + "type": "object", + "description": "LLM configuration for the pipeline. See the 'llm' verb schema for details.", + "additionalProperties": true + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the pipeline ends." + }, + "eventHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked for pipeline events. Receives event types: 'user_transcript' (user speech recognized), 'agent_response' (assistant reply), 'user_interruption' (barge-in detected), and 'turn_end' (end-of-turn summary with transcript, response, and latency metrics)." + }, + "toolHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the LLM requests a tool/function call. The payload includes the tool name and arguments; the response provides the tool result." + }, + "greeting": { + "type": "boolean", + "description": "Whether the LLM should generate an initial greeting before the user speaks. Default: true.", + "default": true + }, + "earlyGeneration": { + "type": "boolean", + "description": "Enable speculative LLM prompting before end-of-turn is confirmed. When using Krisp turn detection, set this to true to speculatively prompt the LLM before Krisp confirms the turn has ended. If the transcript matches when turn ends, buffered tokens are released immediately — reducing response latency. Note: Deepgram Flux performs early generation automatically via its native EagerEndOfTurn signal regardless of this setting. Default: false.", + "default": false + }, + "noiseIsolation": { + "oneOf": [ + { + "type": "string", + "enum": ["krisp", "rnnoise"], + "description": "Shorthand — enable noise isolation with the specified vendor using default settings." + }, + { + "type": "object", + "description": "Detailed noise isolation configuration.", + "properties": { + "mode": { + "type": "string", + "description": "Noise isolation vendor/mode (e.g. 'krisp')." + }, + "level": { + "type": "number", + "minimum": 0, + "maximum": 100, + "description": "Suppression level 0–100. Default: 100." + }, + "direction": { + "type": "string", + "enum": ["read", "write"], + "description": "Audio direction to apply noise isolation. 'read' filters caller audio, 'write' filters outbound audio. Default: 'read'." + }, + "model": { + "type": "string", + "description": "Optional model name override." + } + }, + "required": ["mode"], + "additionalProperties": false + } + ], + "description": "Enable server-side noise isolation to reduce background noise on call audio. Defaults to filtering inbound (caller) audio; set direction to 'write' for outbound. Useful for improving STT accuracy in noisy environments." + }, + "mcpServers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri", + "description": "The URL of the MCP server." + }, + "auth": { + "type": "object", + "description": "Authentication for the MCP server.", + "additionalProperties": true + }, + "roots": { + "type": "array", + "items": { "type": "object" }, + "description": "MCP root definitions." + } + }, + "required": ["url"] + }, + "description": "External MCP servers that provide tools to the LLM. The pipeline connects at startup via SSE, discovers available tools, and makes them callable by the LLM." + } + }, + "required": [ + "llm" + ], + "examples": [ + { + "verb": "pipeline", + "stt": { + "vendor": "deepgram", + "language": "en-US" + }, + "tts": { + "vendor": "cartesia", + "voice": "sonic-english" + }, + "llm": { + "vendor": "openai", + "model": "gpt-4o", + "llmOptions": { + "messages": [ + { + "role": "system", + "content": "You are a helpful voice assistant." + } + ] + } + }, + "turnDetection": "stt", + "actionHook": "/pipeline-complete" + }, + { + "verb": "pipeline", + "stt": { + "vendor": "deepgram", + "language": "en-US" + }, + "tts": { + "vendor": "cartesia", + "voice": "sonic-english" + }, + "llm": { + "vendor": "anthropic", + "model": "claude-opus-4-6", + "llmOptions": { + "messages": [ + { + "role": "user", + "content": "You are a helpful voice assistant." + } + ] + } + }, + "turnDetection": { + "mode": "krisp", + "threshold": 0.3 + }, + "bargeIn": { + "enable": true, + "minSpeechDuration": 0.3, + "sticky": false + }, + "actionHook": "/pipeline-complete" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/play.schema.json b/src/jambonz_sdk/schema/verbs/play.schema.json new file mode 100644 index 0000000..b193b22 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/play.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/play", + "minVersion": "0.9.6", + "title": "Play", + "description": "Plays an audio file to the caller. Supports WAV and MP3 formats hosted at a URL. Can play a single file or cycle through a list of files.", + "type": "object", + "properties": { + "verb": { + "const": "play", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "url": { + "oneOf": [ + { + "type": "string", + "format": "uri" + }, + { + "type": "array", + "items": { + "type": "string", + "format": "uri" + } + } + ], + "description": "The URL(s) of the audio file(s) to play. Supports WAV and MP3. If an array, files are played in sequence.", + "examples": [ + "https://example.com/sounds/greeting.wav", + [ + "https://example.com/sounds/part1.wav", + "https://example.com/sounds/part2.wav" + ] + ] + }, + "loop": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "description": "Number of times to repeat playback. Use 0 or 'forever' to loop indefinitely until interrupted.", + "examples": [ + 3, + "forever" + ] + }, + "earlyMedia": { + "type": "boolean", + "description": "If true, play the audio as early media before the call is answered." + }, + "seekOffset": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "description": "Start playback at this offset in seconds from the beginning of the file." + }, + "timeoutSecs": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "description": "Maximum time in seconds to play the audio. Playback stops after this duration even if the file has not finished." + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook to invoke when playback completes." + } + }, + "required": [ + "url" + ], + "examples": [ + { + "verb": "play", + "url": "https://example.com/sounds/hold-music.mp3", + "loop": "forever" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/redirect.schema.json b/src/jambonz_sdk/schema/verbs/redirect.schema.json new file mode 100644 index 0000000..2cd461c --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/redirect.schema.json @@ -0,0 +1,34 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/redirect", + "minVersion": "0.9.6", + "title": "Redirect", + "description": "Transfers call control to a different webhook URL. The current verb stack is abandoned and the new webhook's response becomes the active application. Useful for modular application design where different URLs handle different phases of a call.", + "type": "object", + "properties": { + "verb": { + "const": "redirect" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "The webhook to transfer control to. Must return a new array of verbs." + }, + "statusHook": { + "$ref": "../components/actionHook", + "description": "A webhook to receive call status events after the redirect." + } + }, + "required": [ + "actionHook" + ], + "examples": [ + { + "verb": "redirect", + "actionHook": "/new-handler" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/s2s.schema.json b/src/jambonz_sdk/schema/verbs/s2s.schema.json new file mode 100644 index 0000000..935dc13 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/s2s.schema.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/s2s", + "minVersion": "10.1.0", + "title": "S2S", + "description": "Synonym for 'llm'. Connects the caller to a large language model for a real-time speech-to-speech voice conversation. Requires 'vendor' to be specified explicitly.", + "type": "object", + "allOf": [ + { + "$ref": "../components/llm-base" + } + ], + "properties": { + "verb": { + "const": "s2s", + "description": "The verb name." + } + }, + "required": [ + "vendor", + "llmOptions" + ], + "examples": [ + { + "verb": "s2s", + "vendor": "openai", + "model": "gpt-4o-realtime", + "llmOptions": { + "messages": [ + { + "role": "system", + "content": "You are a helpful voice assistant." + } + ] + }, + "actionHook": "/s2s-complete" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/say.schema.json b/src/jambonz_sdk/schema/verbs/say.schema.json new file mode 100644 index 0000000..dac059b --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/say.schema.json @@ -0,0 +1,107 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/say", + "minVersion": "0.9.6", + "title": "Say", + "description": "Speaks text to the caller using text-to-speech. The text can be plain text or SSML. Optionally streams TTS output incrementally for lower latency. This is one of the most commonly used verbs in jambonz applications.", + "type": "object", + "properties": { + "verb": { + "const": "say", + "description": "The verb name." + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance. Can be used to reference it in other contexts." + }, + "text": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "The text to speak. Can be plain text or SSML markup. If an array is provided, one entry is selected at random (useful for variety in prompts).", + "examples": [ + "Hello, welcome to our service.", + "Hello welcome.", + [ + "Hello!", + "Hi there!", + "Welcome!" + ] + ] + }, + "instructions": { + "type": "string", + "description": "Natural language instructions to guide TTS expression and delivery. Supported by vendors that offer instruction-based synthesis (e.g. ElevenLabs, some OpenAI models).", + "examples": [ + "Speak in a warm, friendly tone", + "Sound excited and energetic" + ] + }, + "stream": { + "type": "boolean", + "description": "If true, stream TTS audio to the caller incrementally as it is generated, rather than waiting for the complete audio. Reduces time-to-first-byte for long utterances. Requires a vendor that supports streaming synthesis." + }, + "loop": { + "oneOf": [ + { + "type": "number" + }, + { + "type": "string" + } + ], + "description": "Number of times to repeat the speech. Use 0 or 'forever' to loop indefinitely until interrupted.", + "examples": [ + 2, + "forever" + ] + }, + "synthesizer": { + "$ref": "../components/synthesizer", + "description": "Override the session-level TTS configuration for this specific utterance." + }, + "earlyMedia": { + "type": "boolean", + "description": "If true, play the audio as early media (before the call is answered). Used for playing announcements or prompts to the caller before the call is formally connected." + }, + "disableTtsCache": { + "type": "boolean", + "description": "If true, bypass the TTS cache and always generate fresh audio. Useful when the same text should be re-synthesized (e.g. with different SSML or when the voice has been updated)." + }, + "closeStreamOnEmpty": { + "type": "boolean", + "description": "If true, close the TTS stream when an empty text string is received. Only applies when stream is true." + } + }, + "examples": [ + { + "verb": "say", + "text": "Hello, welcome to Acme Corp. How can I help you today?" + }, + { + "verb": "say", + "text": "Please hold while I transfer your call.", + "synthesizer": { + "vendor": "elevenlabs", + "voice": "Rachel" + } + }, + { + "verb": "say", + "text": [ + "Hello!", + "Hi there!", + "Welcome!" + ], + "loop": 1 + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/sip-decline.schema.json b/src/jambonz_sdk/schema/verbs/sip-decline.schema.json new file mode 100644 index 0000000..2bea669 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/sip-decline.schema.json @@ -0,0 +1,58 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/sip:decline", + "minVersion": "0.9.6", + "title": "SIP Decline", + "description": "Rejects an incoming call with a SIP error response. Used to decline calls with a specific status code and reason (e.g. 486 Busy Here, 603 Decline).", + "type": "object", + "properties": { + "verb": { + "const": "sip:decline" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "status": { + "type": "number", + "description": "The SIP response status code to send.", + "examples": [ + 486, + 603, + 404, + 480 + ] + }, + "reason": { + "type": "string", + "description": "The SIP reason phrase to include in the response.", + "examples": [ + "Busy Here", + "Decline", + "Not Found" + ] + }, + "headers": { + "type": "object", + "description": "Custom SIP headers to include in the response.", + "additionalProperties": { + "type": "string" + } + } + }, + "required": [ + "status" + ], + "examples": [ + { + "verb": "sip:decline", + "status": 486, + "reason": "Busy Here" + }, + { + "verb": "sip:decline", + "status": 603, + "reason": "Decline" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/sip-refer.schema.json b/src/jambonz_sdk/schema/verbs/sip-refer.schema.json new file mode 100644 index 0000000..1909519 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/sip-refer.schema.json @@ -0,0 +1,58 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/sip:refer", + "minVersion": "0.9.6", + "title": "SIP Refer", + "description": "Sends a SIP REFER request to transfer the call to another party. Initiates an attended or unattended (blind) transfer.", + "type": "object", + "properties": { + "verb": { + "const": "sip:refer" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "referTo": { + "type": "string", + "description": "The SIP URI or phone number to transfer the call to.", + "examples": [ + "sip:alice@example.com", + "+15085551212" + ] + }, + "referredBy": { + "type": "string", + "description": "The SIP URI to use in the Referred-By header." + }, + "referredByDisplayName": { + "type": "string", + "description": "The display name to use in the Referred-By header." + }, + "headers": { + "type": "object", + "description": "Custom SIP headers to include in the REFER request.", + "additionalProperties": { + "type": "string" + } + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the REFER completes (or fails)." + }, + "eventHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked for NOTIFY events during the REFER process, providing transfer progress updates." + } + }, + "required": [ + "referTo" + ], + "examples": [ + { + "verb": "sip:refer", + "referTo": "sip:alice@example.com", + "actionHook": "/refer-complete" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/sip-request.schema.json b/src/jambonz_sdk/schema/verbs/sip-request.schema.json new file mode 100644 index 0000000..20fc013 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/sip-request.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/sip:request", + "minVersion": "0.9.6", + "title": "SIP Request", + "description": "Sends a SIP request within the current dialog. Used to send INFO, NOTIFY, or other SIP methods to the remote party during an active call.", + "type": "object", + "properties": { + "verb": { + "const": "sip:request" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "method": { + "type": "string", + "description": "The SIP method to send.", + "examples": [ + "INFO", + "NOTIFY", + "MESSAGE" + ] + }, + "body": { + "type": "string", + "description": "The body of the SIP request." + }, + "headers": { + "type": "object", + "description": "Custom SIP headers to include in the request.", + "additionalProperties": { + "type": "string" + } + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the response to the SIP request is received." + } + }, + "required": [ + "method" + ], + "examples": [ + { + "verb": "sip:request", + "method": "INFO", + "body": "Signal=1\nDuration=250", + "headers": { + "Content-Type": "application/dtmf-relay" + } + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/stream.schema.json b/src/jambonz_sdk/schema/verbs/stream.schema.json new file mode 100644 index 0000000..9252b00 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/stream.schema.json @@ -0,0 +1,103 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/stream", + "minVersion": "0.9.6", + "title": "Stream", + "description": "Streams real-time call audio to an external websocket endpoint. Functionally equivalent to 'listen' — this is an alias provided for naming clarity when the intent is audio streaming rather than recording.", + "type": "object", + "properties": { + "verb": { + "const": "stream" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "url": { + "type": "string", + "format": "uri", + "description": "The websocket URL to stream audio to." + }, + "actionHook": { + "$ref": "../components/actionHook", + "description": "A webhook invoked when the stream ends." + }, + "wsAuth": { + "$ref": "../components/auth", + "description": "Authentication credentials for the websocket connection." + }, + "mixType": { + "type": "string", + "enum": [ + "mono", + "stereo", + "mixed" + ], + "description": "How to mix audio channels." + }, + "metadata": { + "type": "object", + "description": "Metadata to send with the initial connection.", + "additionalProperties": true + }, + "sampleRate": { + "type": "number", + "description": "Audio sample rate in Hz.", + "examples": [ + 8000, + 16000 + ] + }, + "finishOnKey": { + "type": "string", + "description": "DTMF key that ends the stream." + }, + "maxLength": { + "type": "number", + "description": "Maximum duration in seconds." + }, + "passDtmf": { + "type": "boolean", + "description": "Forward DTMF events to the websocket." + }, + "playBeep": { + "type": "boolean", + "description": "Play a beep before streaming begins." + }, + "disableBidirectionalAudio": { + "type": "boolean", + "description": "Disable receiving audio from the websocket." + }, + "bidirectionalAudio": { + "$ref": "../components/bidirectionalAudio", + "description": "Bidirectional audio configuration." + }, + "timeout": { + "type": "number", + "description": "Inactivity timeout in seconds." + }, + "transcribe": { + "$ref": "transcribe", + "description": "Nested transcribe verb — enables simultaneous real-time transcription of the streamed audio." + }, + "earlyMedia": { + "type": "boolean", + "description": "Stream audio before the call is answered." + }, + "channel": { + "type": "number", + "description": "Specific audio channel to stream. Used when streaming a single channel of a multi-channel call." + } + }, + "required": [ + "url" + ], + "examples": [ + { + "verb": "stream", + "url": "wss://myapp.example.com/audio", + "sampleRate": 16000, + "mixType": "stereo" + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/tag.schema.json b/src/jambonz_sdk/schema/verbs/tag.schema.json new file mode 100644 index 0000000..610adac --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/tag.schema.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/tag", + "minVersion": "0.9.6", + "title": "Tag", + "description": "Attaches arbitrary metadata to the current call. Tagged data is included in all subsequent webhook requests and in the call detail record (CDR). Useful for tracking business context, routing decisions, or analytics data through the call lifecycle.", + "type": "object", + "properties": { + "verb": { + "const": "tag" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "data": { + "type": "object", + "description": "An object containing the metadata to attach to the call. Keys and values are application-defined.", + "additionalProperties": true, + "examples": [ + { + "customerId": "12345", + "department": "support", + "priority": "high" + } + ] + } + }, + "required": [ + "data" + ], + "examples": [ + { + "verb": "tag", + "data": { + "customerId": "12345", + "intent": "billing-inquiry" + } + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/transcribe.schema.json b/src/jambonz_sdk/schema/verbs/transcribe.schema.json new file mode 100644 index 0000000..48f1bbf --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/transcribe.schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/transcribe", + "minVersion": "0.9.6", + "title": "Transcribe", + "description": "Enables real-time transcription of the call audio. Transcription results are sent to the transcriptionHook as they are produced. Runs as a background process — subsequent verbs execute immediately while transcription continues.", + "type": "object", + "properties": { + "verb": { + "const": "transcribe" + }, + "id": { + "type": "string", + "description": "An optional unique identifier for this verb instance." + }, + "enable": { + "type": "boolean", + "description": "Enable or disable transcription. Used when transcribe is nested inside a config or dial verb to start or stop background transcription." + }, + "transcriptionHook": { + "type": "string", + "format": "uri", + "description": "The webhook URL to receive transcription results." + }, + "translationHook": { + "type": "string", + "format": "uri", + "description": "The webhook URL to receive translated transcription results." + }, + "recognizer": { + "$ref": "../components/recognizer", + "description": "STT configuration for the transcription." + }, + "earlyMedia": { + "type": "boolean", + "description": "If true, begin transcribing before the call is answered." + }, + "channel": { + "type": "number", + "description": "Specific audio channel to transcribe." + } + }, + "examples": [ + { + "verb": "transcribe", + "transcriptionHook": "https://myapp.example.com/transcription", + "recognizer": { + "vendor": "deepgram", + "language": "en-US", + "deepgramOptions": { + "model": "nova-2", + "smartFormatting": true + } + } + } + ] +} diff --git a/src/jambonz_sdk/schema/verbs/ultravox_s2s.schema.json b/src/jambonz_sdk/schema/verbs/ultravox_s2s.schema.json new file mode 100644 index 0000000..308f7c5 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/ultravox_s2s.schema.json @@ -0,0 +1,41 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/ultravox_s2s", + "minVersion": "10.1.0", + "title": "Ultravox S2S", + "description": "Shortcut for 'llm' with vendor automatically set to 'ultravox'. Connects the caller to an Ultravox model for real-time speech-to-speech voice conversation.", + "type": "object", + "allOf": [ + { + "$ref": "../components/llm-base" + } + ], + "properties": { + "verb": { + "const": "ultravox_s2s", + "description": "The verb name." + }, + "vendor": { + "type": "string", + "const": "ultravox", + "description": "The LLM vendor (always 'ultravox' for this shortcut)." + } + }, + "required": [ + "llmOptions" + ], + "examples": [ + { + "verb": "ultravox_s2s", + "llmOptions": { + "messages": [ + { + "role": "system", + "content": "You are a helpful voice assistant." + } + ] + }, + "actionHook": "/s2s-complete" + } + ] +} diff --git a/src/jambonz_sdk/specs.json b/src/jambonz_sdk/specs.json deleted file mode 100644 index 8b47edd..0000000 --- a/src/jambonz_sdk/specs.json +++ /dev/null @@ -1,1423 +0,0 @@ -{ - "alert" : { - "properties": { - "id": "string", - "message": "string" - }, - "required": [ - "message" - ] - }, - "answer": { - "properties": { - "id": "string" - }, - "required": [ - ] - }, - "sip:decline": { - "properties": { - "id": "string", - "status": "number", - "reason": "string", - "headers": "object" - }, - "required": [ - "status" - ] - }, - "sip:request": { - "properties": { - "id": "string", - "method": "string", - "body": "string", - "headers": "object", - "actionHook": "object|string" - }, - "required": [ - "method" - ] - }, - "sip:refer": { - "properties": { - "id": "string", - "referTo": "string", - "referredBy": "string", - "referredByDisplayName": "string", - "headers": "object", - "actionHook": "object|string", - "eventHook": "object|string" - }, - "required": [ - "referTo" - ] - }, - "config": { - "properties": { - "id": "string", - "synthesizer": "#synthesizer", - "recognizer": "#recognizer", - "bargeIn": "#bargeIn", - "ttsStream": "#ttsStream", - "record": "#recordOptions", - "listen": "#listenOptions", - "stream": "#listenOptions", - "transcribe": "#transcribeOptions", - "amd": "#amd", - "fillerNoise": "#fillerNoise", - "notifyEvents": "boolean", - "notifySttLatency": "boolean", - "reset": "string|array", - "onHoldMusic": "string", - "actionHookDelayAction": "#actionHookDelayAction", - "sipRequestWithinDialogHook": "object|string", - "boostAudioSignal": "number|string", - "vad":"#vad", - "referHook": "object|string", - "earlyMedia": "boolean", - "autoStreamTts": "boolean", - "disableTtsCache": "boolean", - "trackTtsPlayout": "boolean", - "noiseIsolation": "#noiseIsolation", - "turnTaking": "#turnTaking" - }, - "required": [] - }, - "fillerNoise": { - "properties": { - "enable": "boolean", - "url": "string", - "startDelaySecs": "number" - }, - "required": [ - "enable" - ] - }, - "listenOptions": { - "properties": { - "enable": "boolean", - "url": "string", - "sampleRate": "number", - "wsAuth": "#auth", - "mixType": { - "type": "string", - "enum": ["mono", "stereo", "mixed"] - }, - "metadata": "object", - "maxLength": "number", - "passDtmf": "boolean", - "playBeep": "boolean", - "disableBidirectionalAudio": "boolean", - "bidirectionalAudio": "#bidirectionalAudio", - "timeout": "number" - }, - "required": [ - "enable" - ] - }, - "ttsStream": { - "properties": { - "enable": "boolean", - "synthesizer": "#synthesizer" - }, - "required": [ - "enable" - ] - }, - "bargeIn": { - "properties": { - "enable": "boolean", - "sticky": "boolean", - "actionHook": "object|string", - "partialResultHook": "object|string", - "input": "array", - "finishOnKey": "string", - "numDigits": "number", - "minDigits": "number", - "maxDigits": "number", - "interDigitTimeout": "number", - "dtmfBargein": "boolean", - "minBargeinWordCount": "number" - }, - "required": [ - "enable" - ] - }, - "transcribeOptions": { - "properties": { - "enable": "boolean", - "transcriptionHook": "string", - "recognizer": "#recognizer" - }, - "required": [ - "enable" - ] - }, - "dub": { - "properties": { - "id": "string", - "action": { - "type": "string", - "enum": ["addTrack", "removeTrack", "silenceTrack", "playOnTrack", "sayOnTrack"] - }, - "track": "string", - "play": "string", - "say": "string|object", - "loop": "boolean", - "gain": "number|string" - }, - "required": [ - "action", - "track" - ] - }, - "dequeue": { - "properties": { - "id": "string", - "name": "string", - "actionHook": "object|string", - "timeout": "number", - "beep": "boolean", - "callSid": "string" - }, - "required": [ - "name" - ] - }, - "enqueue": { - "properties": { - "id": "string", - "name": "string", - "actionHook": "object|string", - "waitHook": "object|string", - "priority": "number", - "_": "object" - }, - "required": [ - "name" - ] - }, - "leave": { - "properties": { - "id": "string" - } - }, - "hangup": { - "properties": { - "id": "string", - "headers": "object" - }, - "required": [ - ] - }, - "play": { - "properties": { - "id": "string", - "url": "string|array", - "loop": "number|string", - "earlyMedia": "boolean", - "seekOffset": "number|string", - "timeoutSecs": "number|string", - "actionHook": "object|string" - }, - "required": [ - "url" - ] - }, - "say": { - "properties": { - "id": "string", - "text": "string|array", - "instructions": "string", - "stream": "boolean", - "loop": "number|string", - "synthesizer": "#synthesizer", - "earlyMedia": "boolean", - "disableTtsCache": "boolean", - "closeStreamOnEmpty": "boolean" - }, - "required": [ - ] - }, - "gather": { - "properties": { - "id": "string", - "actionHook": "object|string", - "finishOnKey": "string", - "input": "array", - "numDigits": "number", - "minDigits": "number", - "maxDigits": "number", - "interDigitTimeout": "number", - "partialResultHook": "object|string", - "speechTimeout": "number", - "listenDuringPrompt": "boolean", - "dtmfBargein": "boolean", - "bargein": "boolean", - "minBargeinWordCount": "number", - "timeout": "number", - "recognizer": "#recognizer", - "play": "#play", - "say": "#say", - "fillerNoise": "#fillerNoise", - "actionHookDelayAction": "#actionHookDelayAction" - }, - "required": [ - ] - }, - "conference": { - "properties": { - "id": "string", - "name": "string", - "beep": "boolean", - "memberTag": "string", - "speakOnlyTo": "string", - "startConferenceOnEnter": "boolean", - "endConferenceOnExit": "boolean", - "endConferenceDuration": "number", - "maxParticipants": "number", - "joinMuted": "boolean", - "actionHook": "object|string", - "waitHook": "object|string", - "statusEvents": "array", - "statusHook": "object|string", - "enterHook": "object|string", - "record": "#record", - "listen": "#listen", - "distributeDtmf": "boolean" - }, - "required": [ - "name" - ] - }, - "dial": { - "properties": { - "id": "string", - "actionHook": "object|string", - "onHoldHook": "object|string", - "answerOnBridge": "boolean", - "callerId": "string", - "callerName": "string", - "confirmHook": "object|string", - "referHook": "object|string", - "dialMusic": "string", - "dtmfCapture": "object", - "dtmfHook": "object|string", - "headers": "object", - "anchorMedia": "boolean", - "exitMediaPath": "boolean", - "boostAudioSignal": "number|string", - "listen": "#listen", - "stream": "#listen", - "target": ["#target"], - "timeLimit": "number", - "timeout": "number", - "proxy": "string", - "transcribe": "#transcribe", - "amd": "#amd", - "dub": ["#dub"], - "tag": "object", - "forwardPAI": "boolean" - }, - "required": [ - "target" - ] - }, - "dialogflow": { - "properties": { - "id": "string", - "credentials": "object|string", - "project": "string", - "agent": "string", - "environment": "string", - "region": "string", - "model": { - "type": "string", - "enum": ["es", "cx", "ces"] - }, - "lang": "string", - "actionHook": "object|string", - "eventHook": "object|string", - "events": "array", - "welcomeEvent": "string", - "welcomeEventParams": "object", - "noInputTimeout": "number", - "noInputEvent": "string", - "passDtmfAsTextInput": "boolean", - "thinkingMusic": "string", - "tts": "#synthesizer", - "bargein": "boolean", - "queryInput": "#queryInput" - }, - "required": [ - "project", - "credentials", - "lang" - ] - }, - "queryInput": { - "properties": { - "text": "string", - "intent": "string", - "event": "string", - "dtmf": "string" - }, - "required": [ - ] - }, - "dtmf": { - "properties": { - "id": "string", - "dtmf": "string", - "duration": "number" - }, - "required": [ - "dtmf" - ] - }, - "lex": { - "properties": { - "id": "string", - "botId": "string", - "botAlias": "string", - "credentials": "object", - "region": "string", - "locale": "string", - "intent": "#lexIntent", - "welcomeMessage": "string", - "metadata": "object", - "bargein": "boolean", - "passDtmf": "boolean", - "actionHook": "object|string", - "eventHook": "object|string", - "noInputTimeout": "number", - "tts": "#synthesizer" - }, - "required": [ - "botId", - "botAlias", - "region", - "credentials" - ] - }, - "listen": { - "properties": { - "id": "string", - "actionHook": "object|string", - "auth": "#auth", - "finishOnKey": "string", - "maxLength": "number", - "metadata": "object", - "mixType": { - "type": "string", - "enum": ["mono", "stereo", "mixed"] - }, - "passDtmf": "boolean", - "playBeep": "boolean", - "disableBidirectionalAudio": "boolean", - "bidirectionalAudio": "#bidirectionalAudio", - "sampleRate": "number", - "timeout": "number", - "transcribe": "#transcribe", - "url": "string", - "wsAuth": "#auth", - "earlyMedia": "boolean", - "channel": "number" - }, - "required": [ - "url" - ] - }, - "llm": { - "properties": { - "id": "string", - "vendor": "string", - "model": "string", - "auth": "object", - "connectOptions": "object", - "mcpServers": ["#mcpServer"], - "actionHook": "object|string", - "eventHook": "object|string", - "toolHook": "object|string", - "events": "array", - "llmOptions": "object" - }, - "required": [ - "vendor", - "llmOptions" - ] - }, - "mcpServer": { - "properties": { - "url": "string", - "auth": "object", - "roots": ["#root"] - }, - "required": [ - "url" - ] - }, - "message": { - "properties": { - "id": "string", - "carrier": "string", - "account_sid": "string", - "message_sid": "string", - "to": "string", - "from": "string", - "text": "string", - "media": "string|array", - "actionHook": "object|string" - }, - "required": [ - "to", - "from" - ] - }, - "pause": { - "properties": { - "id": "string", - "length": "number" - }, - "required": [ - "length" - ] - }, - "rasa": { - "properties": { - "id": "string", - "url": "string", - "recognizer": "#recognizer", - "tts": "#synthesizer", - "prompt": "string", - "actionHook": "object|string", - "eventHook": "object|string" - }, - "required": [ - "url" - ] - }, - "record": { - "properties": { - "path": "string" - }, - "required": [ - "path" - ] - }, - "recordOptions": { - "properties": { - "action": { - "type": "string", - "enum": ["startCallRecording", "stopCallRecording", "pauseCallRecording", "resumeCallRecording"] - }, - "type" : { - "type" : "string", - "enum" : ["cloud", "siprec"] - - }, - "recordingID": "string", - "siprecServerURL": "string|array", - "headers": "object" - }, - "required": [ - "action" - ] - }, - "redirect": { - "properties": { - "id": "string", - "actionHook": "object|string", - "statusHook": "object|string" - }, - "required": [ - "actionHook" - ] - }, - "rest:dial": { - "properties": { - "id": "string", - "account_sid": "string", - "application_sid": "string", - "call_hook": "object|string", - "call_status_hook": "object|string", - "from": "string", - "callerName": "string", - "fromHost": "string", - "speech_synthesis_vendor": "string", - "speech_synthesis_voice": "string", - "speech_synthesis_language": "string", - "speech_recognizer_vendor": "string", - "speech_recognizer_language": "string", - "tag": "object", - "to": "#target", - "headers": "object", - "timeout": "number", - "amd": "#amd", - "dual_streams": "boolean", - "sipRequestWithinDialogHook": "string", - "referHook": "object|string", - "timeLimit": "number" - }, - "required": [ - "call_hook", - "from", - "to" - ] - }, - "tag": { - "properties": { - "id": "string", - "data": "object" - }, - "required": [ - "data" - ] - }, - "transcribe": { - "properties": { - "id": "string", - "transcriptionHook": "string", - "translationHook": "string", - "recognizer": "#recognizer", - "earlyMedia": "boolean", - "channel": "number" - }, - "required": [ - ] - }, - "target": { - "properties": { - "type": { - "type": "string", - "enum": ["phone", "sip", "user", "teams"] - }, - "confirmHook": "object|string", - "method": { - "type": "string", - "enum": ["GET", "POST"] - }, - "headers": "object", - "from": "#dialFrom", - "name": "string", - "number": "string", - "sipUri": "string", - "auth": "#auth", - "vmail": "boolean", - "tenant": "string", - "trunk": "string", - "overrideTo": "string", - "proxy": "string" - }, - "required": [ - "type" - ] - }, - "dialFrom": { - "properties": { - "user": "string", - "host": "string" - }, - "required": [ - ] - }, - "auth": { - "properties": { - "username": "string", - "password": "string" - }, - "required": [ - "username", - "password" - ] - }, - "synthesizer": { - "properties": { - "vendor": "string", - "label": "string", - "language": "string", - "voice": "string|object", - "fallbackVendor": "string", - "fallbackLabel": "string", - "fallbackLanguage": "string", - "fallbackVoice": "string|object", - "engine": { - "type": "string", - "enum": ["standard", "neural", "generative", "long-form"] - }, - "gender": { - "type": "string", - "enum": ["MALE", "FEMALE", "NEUTRAL"] - }, - "options": "object" - }, - "required": [ - "vendor" - ] - }, - "recognizer": { - "properties": { - "vendor": "string", - "label": "string", - "language": "string", - "fallbackVendor": "string", - "fallbackLabel": "string", - "fallbackLanguage": "string", - "vad": "#vad", - "autogeneratePrompt": "boolean", - "hints": "array", - "hintsBoost": "number", - "altLanguages": "array", - "profanityFilter": "boolean", - "interim": "boolean", - "singleUtterance": "boolean", - "dualChannel": "boolean", - "separateRecognitionPerChannel": "boolean", - "punctuation": "boolean", - "enhancedModel": "boolean", - "words": "boolean", - "diarization": "boolean", - "diarizationMinSpeakers": "number", - "diarizationMaxSpeakers": "number", - "interactionType": { - "type": "string", - "enum": [ - "unspecified", - "discussion", - "presentation", - "phone_call", - "voicemail", - "voice_search", - "voice_command", - "dictation" - ] - }, - "naicsCode": "number", - "identifyChannels": "boolean", - "vocabularyName": "string", - "vocabularyFilterName": "string", - "filterMethod": { - "type": "string", - "enum": [ - "remove", - "mask", - "tag" - ] - }, - "model": "string", - "outputFormat": { - "type": "string", - "enum": [ - "simple", - "detailed" - ] - }, - "profanityOption": { - "type": "string", - "enum": [ - "masked", - "removed", - "raw" - ] - }, - "requestSnr": "boolean", - "initialSpeechTimeoutMs": "number", - "azureServiceEndpoint": "string", - "azureSttEndpointId": "string", - "asrDtmfTerminationDigit": "string", - "asrTimeout": "number", - "fastRecognitionTimeout": "number", - "minConfidence": "number", - "nuanceOptions": "#nuanceOptions", - "deepgramOptions": "#deepgramOptions", - "ibmOptions": "#ibmOptions", - "nvidiaOptions": "#nvidiaOptions", - "sonioxOptions": "#sonioxOptions", - "cobaltOptions": "#cobaltOptions", - "awsOptions": "#awsOptions", - "azureOptions": "#azureOptions", - "assemblyAiOptions": "#assemblyAiOptions", - "googleOptions": "#googleOptions", - "customOptions": "#customOptions", - "verbioOptions": "#verbioOptions", - "speechmaticsOptions": "#speechmaticsOptions", - "openaiOptions": "#openaiOptions", - "houndifyOptions": "#houndifyOptions", - "gladiaOptions": "object", - "elevenlabsOptions": "#elevenlabsOptions" - }, - "required": [ - "vendor" - ] - }, - "customOptions": { - "properties": { - "authToken": "string", - "uri": "string", - "sampleRate": "number", - "options": "object" - }, - "required": [ - ] - }, - "awsOptions": { - "properties": { - "accessKey": "string", - "secretKey": "string", - "securityToken": "string", - "region": "string", - "vocabularyName": "string", - "vocabularyFilterName": "string", - "vocabularyFilterMethod": { - "type": "string", - "enum": [ - "remove", - "mask", - "tag" - ] - }, - "languageModelName": "string", - "piiEntityTypes": "array", - "piiIdentifyEntities": "boolean" - }, - "required": [ - ] - }, - "azureOptions": { - "properties": { - "speechSegmentationSilenceTimeoutMs": "number", - "postProcessing" : "string", - "audioLogging" : "boolean", - "languageIdMode": { - "type": "string", - "enum": [ - "AtStart", - "Continuous" - ] - }, - "speechRecognitionMode": { - "type": "string", - "enum": [ - "CONVERSATION", - "DICTATION", - "INTERACTIVE" - ] - } - }, - "required": [ - ] - }, - "googleOptions" : { - "properties": { - "serviceVersion": { - "type": "string", - "enum": [ - "v1", - "v2" - ] - }, - "recognizerId": "string", - "speechStartTimeoutMs": "number", - "speechEndTimeoutMs": "number", - "enableVoiceActivityEvents": "boolean", - "transcriptNormalization": "array" - } - }, - "houndifyOptions": { - "properties": { - "requestInfo": "object", - "sampleRate": "number", - "latitude": "number", - "longitude": "number", - "city": "string", - "state": "string", - "country": "string", - "timeZone": "string", - "domain": "string", - "audioEndpoint": "string", - "maxSilenceSeconds": "number", - "maxSilenceAfterFullQuerySeconds": "number", - "maxSilenceAfterPartialQuerySeconds": "number", - "vadSensitivity": "number", - "vadTimeout": "number", - "vadMode": "string", - "vadVoiceMs": "number", - "vadSilenceMs": "number", - "vadDebug": "boolean", - "audioFormat": "string", - "enableNoiseReduction": "boolean", - "enableProfanityFilter": "boolean", - "enablePunctuation": "boolean", - "enableCapitalization": "boolean", - "confidenceThreshold": "number", - "enableDisfluencyFilter": "boolean", - "maxResults": "number", - "enableWordTimestamps": "boolean", - "maxAlternatives": "number", - "partialTranscriptInterval": "number", - "sessionTimeout": "number", - "connectionTimeout": "number", - "customVocabulary": "array", - "languageModel": "string", - "audioQueryAbsoluteTimeout": "number" - } - }, - "elevenlabsOptions": { - "properties": { - "includeTimestamps": "boolean", - "commitStrategy": { - "type": "string", - "enum": [ - "manual", - "vad" - ] - }, - "vadSilenceThresholdSecs": "number", - "vadThreshold": "number", - "minSpeechDurationMs": "number", - "minSilenceDurationMs": "number", - "enableLogging": "boolean" - } - }, - "cobaltOptions": { - "properties": { - "serverUri": "string", - "enableConfusionNetwork": "boolean", - "metadata": "string", - "compiledContextData": "string", - "wordTimeOffsets": "boolean", - "contextToken": "string" - }, - "required": [ - ] - }, - "nvidiaOptions": { - "properties": { - "rivaUri": "string", - "maxAlternatives": "number", - "profanityFilter": "boolean", - "punctuation": "boolean", - "wordTimeOffsets": "boolean", - "verbatimTranscripts": "boolean", - "customConfiguration": "object" - }, - "required": [ - ] - }, - "ibmOptions": { - "properties": { - "sttApiKey": "string", - "sttRegion": "string", - "ttsApiKey": "string", - "ttsRegion": "string", - "instanceId": "string", - "model": "string", - "languageCustomizationId": "string", - "acousticCustomizationId": "string", - "baseModelVersion": "string", - "watsonMetadata": "string", - "watsonLearningOptOut": "boolean" - }, - "required": [ - ] - }, - "deepgramOptions": { - "properties": { - "deepgramSttUri": "string", - "deepgramSttUseTls": "boolean", - "apiKey": "string", - "tier": "string", - "model": "string", - "customModel": "string", - "version": "string", - "punctuate": "boolean", - "smartFormatting": "boolean", - "noDelay": "boolean", - "profanityFilter": "boolean", - "redact": { - "type": "string", - "enum": [ - "pci", - "numbers", - "true", - "ssn" - ] - }, - "diarize": "boolean", - "diarizeVersion": "string", - "ner": "boolean", - "multichannel": "boolean", - "alternatives": "number", - "numerals": "boolean", - "search": "array", - "replace": "array", - "keywords": "array", - "keyterms": "array", - "endpointing": "boolean | number", - "utteranceEndMs": "number", - "shortUtterance": "boolean", - "vadTurnoff": "number", - "tag": "string", - "fillerWords" : "boolean", - "eotThreshold": "number", - "eotTimeoutMs": "number", - "mipOptOut": "boolean", - "entityPrompt": "string", - "eagerEotThreshold":"number", - "languageHints": "array" - } - }, - "sonioxOptions": { - "properties": { - "apiKey": "string", - "model": "string", - "endpointDetection": "boolean", - "profanityFilter": "boolean", - "speechContext": "string", - "clientRequestReference": "string", - "storage": "#sonioxStorage" - }, - "required": [ - ] - }, - "verbioOptions": { - "properties": { - "enable_formatting": "boolean", - "enable_diarization": "boolean", - "topic": "number", - "inline_grammar": "string", - "grammar_uri": "string", - "label": "string", - "recognition_timeout": "number", - "speech_complete_timeout": "number", - "speech_incomplete_timeout": "number" - }, - "required": [ - ] - }, - "openaiOptions": { - "properties": { - "apiKey": "string", - "model": "string", - "prompt": "string", - "promptTemplates": "#promptTemplates", - "language": "string", - "input_audio_noise_reduction": { - "type": "string", - "enum": [ - "near_field", - "far_field" - ] - }, - "turn_detection": "#turnDetection" - }, - "required": [ - ] - }, - "promptTemplates": { - "properties": { - "hintsTemplate": "string", - "conversationHistoryTemplate": "string" - }, - "required": [ - ] - }, - "turnDetection": { - "properties": { - "type": { - "type": "string", - "enum": [ - "none", - "server_vad", - "semantic_vad" - ] - }, - "eagerness": { - "type": "string", - "enum": [ - "low", - "medium", - "high", - "auto" - ] - }, - "threshold": "number", - "prefix_padding_ms": "number", - "silence_duration_ms": "number" - }, - "required": [ - "type" - ] - }, - "speechmaticsOptions": { - "properties": { - "host": "string", - "profile": { - "type": "string", - "enum": [ - "adaptive", - "agile", - "smart", - "external" - ] - }, - "transcription_config": "#sm_transcriptionConfig", - "translation_config": "#sm_translationConfig", - "audio_events_config_config": "#sm_audioEventsConfig" - }, - "required": [ - ] - }, - "sm_transcriptionConfig": { - "properties": { - "language": "string", - "domain": "string", - "additional_vocab": "array", - "diarization": "string", - "speaker_diarization_config": "#sm_speakerDiarizationConfig", - "conversation_config": "#sm_conversationConfig", - "enable_partials": "boolean", - "max_delay": "number", - "max_delay_mode": { - "type": "string", - "enum": [ - "fixed", - "flexible" - ] - }, - "output_locale": "string", - "punctuation_overrides": "#sm_puctuationOverrides", - "operating_point": "string", - "enable_entities": "boolean", - "audio_filtering_config": "#sm_audioFilteringConfig", - "transcript_filtering_config": "#sm_transcriptFilteringConfig" - }, - "required": [ - ] - }, - "sm_speakerDiarizationConfig": { - "properties": { - "speaker_sensitivity": "number", - "max_speakers": "number" - }, - "required": [ - ] - }, - "sm_conversationConfig": { - "properties": { - "end_of_utterance_silence_trigger": "number" - }, - "required": [ - ] - }, - "sm_puctuationOverrides": { - "properties": { - "permitted_marks": "array", - "sensitivity": "number" - }, - "required": [ - ] - }, - "sm_audioFilteringConfig": { - "properties": { - "volume_threshold": "number" - }, - "required": [ - "volume_threshold" - ] - }, - "sm_transcriptFilteringConfig": { - "properties": { - "remove_disfluencies": "boolean" - }, - "required": [ - "remove_disfluencies" - ] - }, - "sm_translationConfig": { - "properties": { - "target_languages": "array", - "enable_partials": "boolean" - }, - "required": [ - "target_languages" - ] - }, - "sm_audioEventsConfig": { - "properties": { - "types": { - "type": "array", - "enum": [ - "applause", - "music", - "laughter" - ] - } - }, - "required": [ - ] - }, - "sonioxStorage": { - "properties": { - "id": "string", - "title": "string", - "disableStoreAudio": "boolean", - "disableStoreTranscript": "boolean", - "disableSearch": "boolean", - "metadata": "object" - }, - "required": [ - ] - }, - "nuanceOptions": { - "properties": { - "clientId": "string", - "secret": "string", - "kryptonEndpoint": "string", - "topic": "string", - "utteranceDetectionMode": { - "type": "string", - "enum": [ - "single", - "multiple", - "disabled" - ] - }, - "punctuation": "boolean", - "profanityFilter": "boolean", - "includeTokenization": "boolean", - "discardSpeakerAdaptation": "boolean", - "suppressCallRecording": "boolean", - "maskLoadFailures": "boolean", - "suppressInitialCapitalization": "boolean", - "allowZeroBaseLmWeight": "boolean", - "filterWakeupWord": "boolean", - "resultType": { - "type": "string", - "enum": [ - "final", - "partial", - "immutable_partial" - ] - }, - "noInputTimeoutMs": "number", - "recognitionTimeoutMs": "number", - "utteranceEndSilenceMs": "number", - "maxHypotheses": "number", - "speechDomain": "string", - "formatting": "#formatting", - "clientData": "object", - "userId": "string", - "speechDetectionSensitivity": "number", - "resources": ["#resource"] - }, - "required": [ - ] - }, - "assemblyAiOptions": { - "properties": { - "apiKey": "string", - "serviceVersion": { - "type": "string", - "enum": [ - "v2", - "v3" - ] - }, - "speechModel": "string", - "formatTurns": "boolean", - "endOfTurnConfidenceThreshold": "number", - "minEndOfTurnSilenceWhenConfident": "number", - "maxTurnSilence": "number", - "minTurnSilence": "number", - "keyterms": "array", - "prompt": "string", - "languageDetection": "boolean", - "vadThreshold": "number", - "inactivityTimeout": "number" - } - }, - "resource": { - "properties": { - "externalReference": "#resourceReference", - "inlineWordset": "string", - "builtin": "string", - "inlineGrammar": "string", - "wakeupWord": "array", - "weightName": { - "type": "string", - "enum": [ - "defaultWeight", - "lowest", - "low", - "medium", - "high", - "highest" - ] - }, - "weightValue": "number", - "reuse": { - "type": "string", - "enum": [ - "undefined_reuse", - "low_reuse", - "high_reuse" - ] - } - }, - "required": [ - ] - }, - "resourceReference": { - "properties": { - "type": { - "type": "string", - "enum": [ - "undefined_resource_type", - "wordset", - "compiled_wordset", - "domain_lm", - "speaker_profile", - "grammar", - "settings" - ] - }, - "uri": "string", - "maxLoadFailures": "boolean", - "requestTimeoutMs": "number", - "headers": "object" - }, - "required": [ - ] - }, - "formatting": { - "properties": { - "scheme": "string", - "options": "object" - }, - "required": [ - "scheme", - "options" - ] - }, - "lexIntent": { - "properties": { - "name": "string", - "slots": "object" - }, - "required": [ - "name" - ] - }, - "vad": { - "properties": { - "enable": "boolean", - "voiceMs": "number", - "silenceMs": "number", - "strategy": "string", - "mode": "number", - "vendor": { - "type": "string", - "enum": [ - "webrtc", - "silero" - ] - }, - "threshold": "number", - "speechPadMs": "number" - }, - "required": [ - ] - }, - "amd": { - "properties": { - "actionHook": "object|string", - "thresholdWordCount": "number", - "digitCount": "number", - "timers": "#amdTimers", - "recognizer": "#recognizer" - }, - "required": [ - "actionHook" - ] - }, - "amdTimers": { - "properties": { - "noSpeechTimeoutMs": "number", - "decisionTimeoutMs": "number", - "toneTimeoutMs": "number", - "greetingCompletionTimeoutMs": "number" - } - }, - "actionHookDelayAction" : { - "properties": { - "enabled": "boolean", - "noResponseTimeout": "number", - "noResponseGiveUpTimeout": "number", - "retries": "number", - "actions": "array", - "giveUpActions": "array" - } - }, - "bidirectionalAudio" : { - "properties": { - "enabled": "boolean", - "streaming": "boolean", - "sampleRate": "number" - } - }, - "pipeline": { - "properties": { - "id": "string", - "stt": "#recognizer", - "tts": "#synthesizer", - "llm": "#llm", - "turnDetection": "string|object", - "bargeIn": "#bargeInPipeline", - "actionHook": "object|string", - "eventHook": "object|string", - "toolHook": "object|string", - "greeting": "boolean", - "earlyGeneration": "boolean", - "noiseIsolation": "string|#noiseIsolationPipeline", - "mcpServers": ["#mcpServer"], - "noResponseTimeout": "number" - }, - "required": [ - "llm" - ] - }, - "bargeInPipeline": { - "properties": { - "enable": "boolean", - "minSpeechDuration": "number", - "sticky": "boolean" - } - }, - "noiseIsolationPipeline": { - "properties": { - "mode": "string", - "level": "number", - "direction": { - "enum": ["read", "write"] - }, - "model": "string" - } - }, - "noiseIsolation" : { - "properties": { - "enable": "boolean", - "vendor": "string", - "level": "number", - "model": "string" - } - }, - "turnTaking": { - "properties": { - "enable": "boolean", - "vendor": "string", - "threshold": "number", - "model": "string" - } - } -} diff --git a/src/jambonz_sdk/validator.py b/src/jambonz_sdk/validator.py new file mode 100644 index 0000000..e46b17d --- /dev/null +++ b/src/jambonz_sdk/validator.py @@ -0,0 +1,107 @@ +"""JSON Schema validation for jambonz verb applications. + +Uses the ``jsonschema`` library to validate verb dicts against the +bundled JSON Schema files from @jambonz/schema. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from jsonschema import Draft202012Validator, ValidationError +from referencing import Registry, Resource + + +def _load_schema(path: Path) -> dict[str, Any]: + with path.open() as f: + return json.load(f) + + +class JambonzValidator: + """Validates jambonz verb dicts against JSON Schema (draft 2020-12). + + Schemas are loaded once at construction time from the bundled + ``schema/`` directory. + + Example:: + + validator = JambonzValidator() + errors = validator.validate_verb({"verb": "say", "text": "Hello"}) + assert errors == [] + """ + + def __init__(self, schema_dir: str | Path | None = None) -> None: + self._schema_dir = Path(schema_dir) if schema_dir else ( + Path(__file__).resolve().parent / "schema" + ) + + # Load the root app schema + app_schema_path = self._schema_dir / "jambonz-app.schema.json" + self._app_schema = _load_schema(app_schema_path) + + # Build a registry of all schemas for $ref resolution + resources: list[tuple[str, Resource]] = [] # type: ignore[type-arg] + self._store: dict[str, dict[str, Any]] = {} + + for subdir in ("components", "callbacks", "verbs"): + subdir_path = self._schema_dir / subdir + if not subdir_path.is_dir(): + continue + for schema_file in subdir_path.glob("*.schema.json"): + schema = _load_schema(schema_file) + if "$id" in schema: + self._store[schema["$id"]] = schema + resources.append(( + schema["$id"], + Resource.from_contents(schema), # type: ignore[arg-type] + )) + + # Add the root schema + self._store[self._app_schema["$id"]] = self._app_schema + resources.append(( + self._app_schema["$id"], + Resource.from_contents(self._app_schema), # type: ignore[arg-type] + )) + + self._registry: Registry = Registry().with_resources(resources) # type: ignore[assignment] + + # Pre-compile the app validator + self._app_validator = Draft202012Validator( + self._app_schema, + registry=self._registry, + ) + + def validate_app(self, verbs: list[dict[str, Any]]) -> list[str]: + """Validate a complete verb array against the root app schema. + + Returns a list of error messages (empty if valid). + """ + errors: list[str] = [] + for error in self._app_validator.iter_errors(verbs): + path = "/".join(str(p) for p in error.absolute_path) or "/" + errors.append(f"{path}: {error.message}") + return errors + + def validate_verb(self, verb: dict[str, Any]) -> list[str]: + """Validate a single verb dict against its schema. + + Returns a list of error messages (empty if valid). + """ + verb_name = verb.get("verb") + if not verb_name: + return ["missing 'verb' property"] + + # Look up the verb schema by $id + schema_id = f"https://jambonz.org/schema/verbs/{verb_name}" + schema = self._store.get(schema_id) + if schema is None: + return [f"unknown verb: {verb_name}"] + + validator = Draft202012Validator(schema, registry=self._registry) + errors: list[str] = [] + for error in validator.iter_errors(verb): + path = "/".join(str(p) for p in error.absolute_path) or "/" + errors.append(f"{path}: {error.message}") + return errors diff --git a/src/jambonz_sdk/verb_builder.py b/src/jambonz_sdk/verb_builder.py index c3bfde3..7d5fca1 100644 --- a/src/jambonz_sdk/verb_builder.py +++ b/src/jambonz_sdk/verb_builder.py @@ -1,7 +1,7 @@ """VerbBuilder base class with auto-generated chainable verb methods. -Methods are generated at import time from ``specs.json`` + the verb registry. -When the spec changes, the SDK automatically picks up new parameters — +Methods are generated at import time from JSON Schema files + the verb registry. +When the schema changes, the SDK automatically picks up new parameters — no manual method signatures to maintain. Each generated method has a real ``inspect.Signature`` with typed parameters @@ -21,90 +21,167 @@ logger = logging.getLogger("jambonz_sdk.verb_builder") -# ── Spec type → Python type mapping ──────────────────────────────── -# Used for both docstrings (human-readable) and runtime annotations (IDE). +# ── JSON Schema type → Python type mapping ──────────────────────────── _TYPE_ANNOTATION_MAP: dict[str, type | object] = { "string": str, "number": Union[int, float], + "integer": int, "boolean": bool, "object": dict, "array": list, } +_TYPE_STR_MAP: dict[str, str] = { + "string": "str", + "number": "int | float", + "integer": "int", + "boolean": "bool", + "object": "dict[str, Any]", + "array": "list[Any]", +} + + +def _resolve_type(prop_schema: Any) -> type | object: + """Convert a JSON Schema property definition to a Python type annotation.""" + if isinstance(prop_schema, str): + # Backward compat: simple type string (shouldn't happen with JSON Schema) + return _TYPE_ANNOTATION_MAP.get(prop_schema, Any) + + if not isinstance(prop_schema, dict): + return Any + + # $ref → component reference → dict + if "$ref" in prop_schema: + return dict + + # const → the type of the const value + if "const" in prop_schema: + val = prop_schema["const"] + return type(val) + + # oneOf → union of the branch types + if "oneOf" in prop_schema: + parts: list[type | object] = [] + for branch in prop_schema["oneOf"]: + resolved = _resolve_type(branch) + if resolved is Union[int, float]: + parts.extend([int, float]) + elif resolved not in parts: + parts.append(resolved) + # Deduplicate while preserving order + seen: set[type | object] = set() + unique = [] + for p in parts: + if p not in seen: + seen.add(p) + unique.append(p) + if len(unique) == 1: + return unique[0] + return Union[tuple(unique)] + + # Simple type + schema_type = prop_schema.get("type") + if isinstance(schema_type, str): + return _TYPE_ANNOTATION_MAP.get(schema_type, Any) + + # Array of types + if isinstance(schema_type, list): + parts = [] + for t in schema_type: + resolved = _TYPE_ANNOTATION_MAP.get(t, Any) + if resolved is Union[int, float]: + parts.extend([int, float]) + elif resolved not in parts: + parts.append(resolved) + if len(parts) == 1: + return parts[0] + return Union[tuple(parts)] -def _resolve_type(spec_type: Any) -> type | object: - """Convert a specs.json type descriptor to a Python type annotation.""" - if isinstance(spec_type, str): - if spec_type.startswith("#"): - return dict - if "|" in spec_type: - parts = [] - for t in spec_type.split("|"): - t = t.strip() - if t.startswith("#"): - parts.append(dict) - else: - resolved = _TYPE_ANNOTATION_MAP.get(t) - if resolved is not None: - if resolved is Union[int, float]: - parts.extend([int, float]) - else: - parts.append(resolved) - else: - parts.append(Any) - # Deduplicate while preserving order - seen: set[type | object] = set() - unique = [] - for p in parts: - if p not in seen: - seen.add(p) - unique.append(p) - if len(unique) == 1: - return unique[0] - return Union[tuple(unique)] - return _TYPE_ANNOTATION_MAP.get(spec_type, Any) - if isinstance(spec_type, list): - return list - if isinstance(spec_type, dict): - return _TYPE_ANNOTATION_MAP.get(spec_type.get("type", ""), Any) return Any -def _python_type_str(spec_type: Any) -> str: - """Convert a specs.json type descriptor to a human-readable type string.""" - _str_map = { - "string": "str", - "number": "int | float", - "boolean": "bool", - "object": "dict[str, Any]", - "array": "list[Any]", - } - if isinstance(spec_type, str): - if spec_type.startswith("#"): - return "dict[str, Any]" - if "|" in spec_type: - parts = [_str_map.get(t.strip(), "Any") if not t.strip().startswith("#") else "dict[str, Any]" - for t in spec_type.split("|")] - return " | ".join(dict.fromkeys(parts)) - return _str_map.get(spec_type, "Any") - if isinstance(spec_type, list): - return "list[Any]" - if isinstance(spec_type, dict): - return _str_map.get(spec_type.get("type", ""), "Any") +def _python_type_str(prop_schema: Any) -> str: + """Convert a JSON Schema property definition to a human-readable type string.""" + if isinstance(prop_schema, str): + return _TYPE_STR_MAP.get(prop_schema, "Any") + + if not isinstance(prop_schema, dict): + return "Any" + + if "$ref" in prop_schema: + return "dict[str, Any]" + + if "const" in prop_schema: + return repr(type(prop_schema["const"]).__name__) + + if "oneOf" in prop_schema: + parts = [_python_type_str(branch) for branch in prop_schema["oneOf"]] + return " | ".join(dict.fromkeys(parts)) + + schema_type = prop_schema.get("type") + if isinstance(schema_type, str): + return _TYPE_STR_MAP.get(schema_type, "Any") + if isinstance(schema_type, list): + parts = [_TYPE_STR_MAP.get(t, "Any") for t in schema_type] + return " | ".join(dict.fromkeys(parts)) + return "Any" -# ── Load specs ────────────────────────────────────────────────────── +# ── Load JSON Schemas ────────────────────────────────────────────────── + +def _load_schemas() -> dict[str, Any]: + """Load verb JSON Schemas bundled alongside this package. + + Returns a dict mapping verb spec names (e.g. 'say', 'sip:decline') + to their schema dicts, with a 'properties' key and optionally 'required'. + """ + schema_dir = Path(__file__).resolve().parent / "schema" / "verbs" + schemas: dict[str, Any] = {} + + if not schema_dir.is_dir(): + logger.warning("Schema directory not found: %s", schema_dir) + return schemas + + for schema_file in sorted(schema_dir.glob("*.schema.json")): + with schema_file.open() as f: + schema = json.load(f) + + # Derive the spec name from the $id or filename + schema_id = schema.get("$id", "") + if schema_id: + # e.g. "https://jambonz.org/schema/verbs/say" → "say" + # e.g. "https://jambonz.org/schema/verbs/sip:decline" → "sip:decline" + spec_name = schema_id.rsplit("/", 1)[-1] + else: + # Fallback: filename without .schema.json + spec_name = schema_file.stem.replace(".schema", "") + + # Collect properties, skipping 'verb' (it's a const, not a user param) + properties = {} + for prop_name, prop_def in schema.get("properties", {}).items(): + if prop_name == "verb": + continue + properties[prop_name] = prop_def + + # Handle allOf (used by vendor-specific s2s verbs that extend llm-base) + for entry in schema.get("allOf", []): + if "properties" in entry: + for prop_name, prop_def in entry["properties"].items(): + if prop_name == "verb": + continue + properties[prop_name] = prop_def + + schemas[spec_name] = { + "properties": properties, + "required": schema.get("required", []), + } -def _load_specs() -> dict[str, Any]: - """Load specs.json bundled alongside this package.""" - specs_path = Path(__file__).resolve().parent / "specs.json" - with specs_path.open() as f: - return json.load(f) + return schemas -_SPECS: dict[str, Any] = _load_specs() +_SPECS: dict[str, Any] = _load_schemas() # ── Method factory ────────────────────────────────────────────────── @@ -186,7 +263,7 @@ def verb_method(self: VerbBuilder, **kwargs: Any) -> Self: class VerbBuilder: """Builds an ordered list of jambonz verbs using a fluent API. - All verb methods are auto-generated from ``specs.json`` and accept + All verb methods are auto-generated from JSON Schema files and accept keyword arguments matching the verb's specification. Methods return ``self`` for chaining. @@ -216,12 +293,12 @@ def to_list(self) -> list[AnyVerb]: # ── Attach generated methods to VerbBuilder ───────────────────────── def _build_methods() -> None: - """Generate and attach verb methods to VerbBuilder from specs + registry.""" + """Generate and attach verb methods to VerbBuilder from schemas + registry.""" for verb_def in VERB_DEFS: spec = _SPECS.get(verb_def.spec_name) if spec is None: logger.warning( - "Spec '%s' not found in specs.json for method '%s' — skipping", + "Schema for '%s' not found for method '%s' — skipping", verb_def.spec_name, verb_def.method_name, ) From be4248b2ca067aa893038b0256130e753017da5b Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Tue, 7 Apr 2026 11:10:01 -0400 Subject: [PATCH 2/5] fix workflow --- src/jambonz_sdk/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jambonz_sdk/validator.py b/src/jambonz_sdk/validator.py index e46b17d..a9d2747 100644 --- a/src/jambonz_sdk/validator.py +++ b/src/jambonz_sdk/validator.py @@ -10,7 +10,7 @@ from pathlib import Path from typing import Any -from jsonschema import Draft202012Validator, ValidationError +from jsonschema import Draft202012Validator from referencing import Registry, Resource From 9622cf377b867802c640cfcbb1d693efcc4e7ee4 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Tue, 7 Apr 2026 11:13:58 -0400 Subject: [PATCH 3/5] wip --- .github/workflows/test.yml | 2 +- pyproject.toml | 2 ++ src/jambonz_sdk/verb_builder.py | 8 +++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e8cdcb5..b06594a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,7 +14,7 @@ jobs: python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 diff --git a/pyproject.toml b/pyproject.toml index e904e20..01c5f4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,8 @@ classifiers = [ dependencies = [ "aiohttp>=3.9", "jsonschema>=4.20", + "referencing>=0.31", + "typing_extensions>=4.0; python_version < '3.11'", ] [project.urls] diff --git a/src/jambonz_sdk/verb_builder.py b/src/jambonz_sdk/verb_builder.py index 7d5fca1..ae55f9a 100644 --- a/src/jambonz_sdk/verb_builder.py +++ b/src/jambonz_sdk/verb_builder.py @@ -14,7 +14,13 @@ import json import logging from pathlib import Path -from typing import Any, Self, Union +import sys +from typing import Any, Union + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self from jambonz_sdk.types.verbs import AnyVerb from jambonz_sdk.verb_registry import VERB_DEFS, VerbDef From 3251c234d5872a45703eed4541a5a24cbcc09753 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Tue, 7 Apr 2026 11:17:13 -0400 Subject: [PATCH 4/5] wip --- src/jambonz_sdk/verb_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jambonz_sdk/verb_builder.py b/src/jambonz_sdk/verb_builder.py index ae55f9a..aa0ff29 100644 --- a/src/jambonz_sdk/verb_builder.py +++ b/src/jambonz_sdk/verb_builder.py @@ -13,8 +13,8 @@ import inspect import json import logging -from pathlib import Path import sys +from pathlib import Path from typing import Any, Union if sys.version_info >= (3, 11): From c3ab0d0d9babb9e16cf640ff4a475b4140824b1b Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Tue, 7 Apr 2026 13:52:19 -0400 Subject: [PATCH 5/5] update to latest schema --- .../schema/jambonz-app.schema.json | 3 +- .../schema/verbs/config.schema.json | 15 ++- src/jambonz_sdk/schema/verbs/dial.schema.json | 33 +++-- .../schema/verbs/gather.schema.json | 10 +- .../schema/verbs/hangup.schema.json | 2 +- .../schema/verbs/pipeline.schema.json | 2 +- .../schema/verbs/rest_dial.schema.json | 113 ++++++++++++++++++ .../schema/verbs/sip-decline.schema.json | 2 +- .../schema/verbs/sip-refer.schema.json | 2 +- .../schema/verbs/sip-request.schema.json | 2 +- 10 files changed, 158 insertions(+), 26 deletions(-) create mode 100644 src/jambonz_sdk/schema/verbs/rest_dial.schema.json diff --git a/src/jambonz_sdk/schema/jambonz-app.schema.json b/src/jambonz_sdk/schema/jambonz-app.schema.json index 323da09..4e0ac4d 100644 --- a/src/jambonz_sdk/schema/jambonz-app.schema.json +++ b/src/jambonz_sdk/schema/jambonz-app.schema.json @@ -43,7 +43,8 @@ { "$ref": "verbs/tag" }, { "$ref": "verbs/sip:decline" }, { "$ref": "verbs/sip:request" }, - { "$ref": "verbs/sip:refer" } + { "$ref": "verbs/sip:refer" }, + { "$ref": "verbs/rest:dial" } ], "discriminator": { "propertyName": "verb" diff --git a/src/jambonz_sdk/schema/verbs/config.schema.json b/src/jambonz_sdk/schema/verbs/config.schema.json index 0e40c87..4b09ace 100644 --- a/src/jambonz_sdk/schema/verbs/config.schema.json +++ b/src/jambonz_sdk/schema/verbs/config.schema.json @@ -69,16 +69,19 @@ "additionalProperties": true }, "listen": { - "$ref": "listen", - "description": "Nested listen verb — session-level audio streaming configuration." + "type": "object", + "description": "Session-level audio streaming configuration defaults. Properties match the listen verb but no fields are required here.", + "additionalProperties": true }, "stream": { - "$ref": "stream", - "description": "Nested stream verb — session-level audio streaming configuration. Alias for 'listen'." + "type": "object", + "description": "Session-level audio streaming configuration defaults. Alias for 'listen'.", + "additionalProperties": true }, "transcribe": { - "$ref": "transcribe", - "description": "Nested transcribe verb — session-level real-time transcription configuration." + "type": "object", + "description": "Session-level transcription configuration defaults.", + "additionalProperties": true }, "amd": { "$ref": "../components/amd", diff --git a/src/jambonz_sdk/schema/verbs/dial.schema.json b/src/jambonz_sdk/schema/verbs/dial.schema.json index 54a85af..8d03edc 100644 --- a/src/jambonz_sdk/schema/verbs/dial.schema.json +++ b/src/jambonz_sdk/schema/verbs/dial.schema.json @@ -59,9 +59,19 @@ "description": "URL of an audio file to play to the caller while the outbound call is ringing. Replaces the default ringback tone." }, "dtmfCapture": { - "type": "object", - "description": "Configuration for capturing DTMF digits during the bridged call. Keys are DTMF patterns to capture, values are configuration for each.", - "additionalProperties": true + "oneOf": [ + { + "type": "array", + "items": { "type": "string" }, + "description": "Array of DTMF patterns to capture on both call legs." + }, + { + "type": "object", + "description": "Per-leg DTMF capture configuration with childCall and/or parentCall arrays.", + "additionalProperties": true + } + ], + "description": "Configuration for capturing DTMF digits during the bridged call. Can be a simple array of patterns (applied to both legs) or an object with childCall/parentCall arrays." }, "dtmfHook": { "$ref": "../components/actionHook", @@ -71,7 +81,7 @@ "type": "object", "description": "Custom SIP headers to include on the outbound INVITE.", "additionalProperties": { - "type": "string" + "oneOf": [{ "type": "string" }, { "type": "number" }] } }, "anchorMedia": { @@ -98,16 +108,19 @@ ] }, "listen": { - "$ref": "listen", - "description": "Nested listen verb — streams audio of the bridged call to a websocket endpoint." + "type": "object", + "description": "Nested listen configuration for streaming audio of the bridged call.", + "additionalProperties": true }, "stream": { - "$ref": "stream", - "description": "Nested stream verb — streams audio of the bridged call. Alias for 'listen'." + "type": "object", + "description": "Nested stream configuration for streaming audio of the bridged call. Alias for 'listen'.", + "additionalProperties": true }, "transcribe": { - "$ref": "transcribe", - "description": "Nested transcribe verb — enables real-time transcription of the bridged call." + "type": "object", + "description": "Nested transcribe configuration for real-time transcription of the bridged call.", + "additionalProperties": true }, "timeLimit": { "type": "number", diff --git a/src/jambonz_sdk/schema/verbs/gather.schema.json b/src/jambonz_sdk/schema/verbs/gather.schema.json index cc08fd5..a328623 100644 --- a/src/jambonz_sdk/schema/verbs/gather.schema.json +++ b/src/jambonz_sdk/schema/verbs/gather.schema.json @@ -117,12 +117,14 @@ "description": "Override the session-level STT configuration for this gather." }, "say": { - "$ref": "say", - "description": "A nested 'say' verb to use as the prompt. Played to the caller while listening for input." + "type": "object", + "description": "A nested say prompt played to the caller while listening for input. Accepts the same properties as the say verb (text, synthesizer, etc.) but no fields are required.", + "additionalProperties": true }, "play": { - "$ref": "play", - "description": "A nested 'play' verb to use as the prompt. Played to the caller while listening for input." + "type": "object", + "description": "A nested play prompt played to the caller while listening for input. Accepts the same properties as the play verb (url, etc.) but no fields are required.", + "additionalProperties": true }, "fillerNoise": { "$ref": "../components/fillerNoise", diff --git a/src/jambonz_sdk/schema/verbs/hangup.schema.json b/src/jambonz_sdk/schema/verbs/hangup.schema.json index ef96d03..f1202d1 100644 --- a/src/jambonz_sdk/schema/verbs/hangup.schema.json +++ b/src/jambonz_sdk/schema/verbs/hangup.schema.json @@ -18,7 +18,7 @@ "type": "object", "description": "Custom SIP headers to include on the BYE request.", "additionalProperties": { - "type": "string" + "oneOf": [{ "type": "string" }, { "type": "number" }] } } }, diff --git a/src/jambonz_sdk/schema/verbs/pipeline.schema.json b/src/jambonz_sdk/schema/verbs/pipeline.schema.json index 50f478b..9156cd7 100644 --- a/src/jambonz_sdk/schema/verbs/pipeline.schema.json +++ b/src/jambonz_sdk/schema/verbs/pipeline.schema.json @@ -76,7 +76,7 @@ "default": false } }, - "additionalProperties": false + "additionalProperties": true }, "noResponseTimeout": { "type": "number", diff --git a/src/jambonz_sdk/schema/verbs/rest_dial.schema.json b/src/jambonz_sdk/schema/verbs/rest_dial.schema.json new file mode 100644 index 0000000..8e31b59 --- /dev/null +++ b/src/jambonz_sdk/schema/verbs/rest_dial.schema.json @@ -0,0 +1,113 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://jambonz.org/schema/verbs/rest:dial", + "title": "REST Dial", + "description": "Internal verb used to originate an outbound call via the REST API. Not typically used directly in application verb arrays.", + "type": "object", + "properties": { + "verb": { + "const": "rest:dial" + }, + "id": { + "type": "string" + }, + "account_sid": { + "type": "string" + }, + "application_sid": { + "type": "string" + }, + "call_hook": { + "oneOf": [ + { "type": "string" }, + { "type": "object" } + ], + "description": "Webhook URL or object for call control." + }, + "call_status_hook": { + "oneOf": [ + { "type": "string" }, + { "type": "object" } + ], + "description": "Webhook URL or object for call status notifications." + }, + "from": { + "type": "string", + "description": "The caller ID for the outbound call." + }, + "callerName": { + "type": "string", + "description": "Display name for the caller." + }, + "fromHost": { + "type": "string", + "description": "SIP host to use in the From header." + }, + "speech_synthesis_vendor": { + "type": "string" + }, + "speech_synthesis_voice": { + "type": "string" + }, + "speech_synthesis_language": { + "type": "string" + }, + "speech_recognizer_vendor": { + "type": "string" + }, + "speech_recognizer_language": { + "type": "string" + }, + "tag": { + "type": "object", + "description": "Arbitrary metadata to attach to the call.", + "additionalProperties": true + }, + "to": { + "$ref": "../components/target", + "description": "The call destination." + }, + "headers": { + "type": "object", + "description": "Custom SIP headers to include on the outbound INVITE.", + "additionalProperties": { + "oneOf": [ + { "type": "string" }, + { "type": "number" } + ] + } + }, + "timeout": { + "type": "number", + "description": "Ring timeout in seconds." + }, + "amd": { + "$ref": "../components/amd", + "description": "Answering machine detection configuration." + }, + "dual_streams": { + "type": "boolean", + "description": "If true, send separate audio streams for each call leg." + }, + "sipRequestWithinDialogHook": { + "type": "string", + "description": "Webhook for in-dialog SIP requests." + }, + "referHook": { + "oneOf": [ + { "type": "string" }, + { "type": "object" } + ], + "description": "Webhook for SIP REFER handling." + }, + "timeLimit": { + "type": "number", + "description": "Maximum call duration in seconds." + } + }, + "required": [ + "call_hook", + "from", + "to" + ] +} diff --git a/src/jambonz_sdk/schema/verbs/sip-decline.schema.json b/src/jambonz_sdk/schema/verbs/sip-decline.schema.json index 2bea669..ab64984 100644 --- a/src/jambonz_sdk/schema/verbs/sip-decline.schema.json +++ b/src/jambonz_sdk/schema/verbs/sip-decline.schema.json @@ -36,7 +36,7 @@ "type": "object", "description": "Custom SIP headers to include in the response.", "additionalProperties": { - "type": "string" + "oneOf": [{ "type": "string" }, { "type": "number" }] } } }, diff --git a/src/jambonz_sdk/schema/verbs/sip-refer.schema.json b/src/jambonz_sdk/schema/verbs/sip-refer.schema.json index 1909519..51bd127 100644 --- a/src/jambonz_sdk/schema/verbs/sip-refer.schema.json +++ b/src/jambonz_sdk/schema/verbs/sip-refer.schema.json @@ -33,7 +33,7 @@ "type": "object", "description": "Custom SIP headers to include in the REFER request.", "additionalProperties": { - "type": "string" + "oneOf": [{ "type": "string" }, { "type": "number" }] } }, "actionHook": { diff --git a/src/jambonz_sdk/schema/verbs/sip-request.schema.json b/src/jambonz_sdk/schema/verbs/sip-request.schema.json index 20fc013..cf8c5f5 100644 --- a/src/jambonz_sdk/schema/verbs/sip-request.schema.json +++ b/src/jambonz_sdk/schema/verbs/sip-request.schema.json @@ -30,7 +30,7 @@ "type": "object", "description": "Custom SIP headers to include in the request.", "additionalProperties": { - "type": "string" + "oneOf": [{ "type": "string" }, { "type": "number" }] } }, "actionHook": {