From bc707850f2a19dc4ab387cddb57764e182b11abe Mon Sep 17 00:00:00 2001
From: atasoglu <ahmetatasoglu98@gmail.com>
Date: Sun, 9 Nov 2025 01:06:28 +0300
Subject: [PATCH 1/5] feat: add Hugging Face dataset integration and example
 setup for tool calling generation

- Introduces dataset_to_tools utility to load tools from Hugging Face datasets
- Adds configuration, example script, and README for simple tool calling v1
- Enables parallel processing, schema validation, and dataset generation for tool calling tasks
---
 examples/simple_tool_calling_v1/README.md     |  31 +++++
 examples/simple_tool_calling_v1/config.py     |  34 ++++++
 examples/simple_tool_calling_v1/example.py    |  31 +++++
 examples/simple_tool_calling_v1/utils.py      |  45 ++++++++
 examples/simple_tool_calling_v1/validation.py | 108 ++++++++++++++++++
 5 files changed, 249 insertions(+)
 create mode 100644 examples/simple_tool_calling_v1/README.md
 create mode 100644 examples/simple_tool_calling_v1/config.py
 create mode 100644 examples/simple_tool_calling_v1/example.py
 create mode 100644 examples/simple_tool_calling_v1/utils.py
 create mode 100644 examples/simple_tool_calling_v1/validation.py

diff --git a/examples/simple_tool_calling_v1/README.md b/examples/simple_tool_calling_v1/README.md
new file mode 100644
index 0000000..68cac98
--- /dev/null
+++ b/examples/simple_tool_calling_v1/README.md
@@ -0,0 +1,31 @@
+# Simple Tool Calling v1
+
+Generates 10K tool-calling samples from Hugging Face dataset with parallel processing and schema validation.
+
+## Setup
+
+```bash
+pip install toolsgen datasets python-dotenv
+echo "OPENAI_API_KEY=your-key-here" > .env
+python example.py
+```
+
+## Configuration
+
+- **Dataset**: `argilla-warehouse/python-seed-tools`
+- **Samples**: 10,000 (80% train / 20% val)
+- **Parallel**: 8 workers × 16 batch size
+- **Models**: GPT-5-mini (problem/caller), GPT-5 (judge)
+
+## Files
+
+- `example.py` - Main generation script
+- `config.py` - Generation and model settings
+- `utils.py` - HF dataset loader
+- `validation.py` - Schema validator (ensures arrays have `items`)
+
+## Output
+
+- `output/train.jsonl` - Training set
+- `output/val.jsonl` - Validation set
+- `output/manifest.json` - Metadata
diff --git a/examples/simple_tool_calling_v1/config.py b/examples/simple_tool_calling_v1/config.py
new file mode 100644
index 0000000..1a89c04
--- /dev/null
+++ b/examples/simple_tool_calling_v1/config.py
@@ -0,0 +1,34 @@
+from toolsgen import (
+    GenerationConfig,
+    ModelConfig,
+    RoleBasedModelConfig,
+)
+
+gen_config = GenerationConfig(
+    num_samples=10_000,
+    strategy="random",
+    seed=42,
+    train_split=0.8,
+    language="english",
+    max_attempts=3,
+    k_min=2,
+    k_max=8,
+    shuffle_tools=True,
+    num_workers=8,
+    worker_batch_size=16,
+)
+
+role_config = RoleBasedModelConfig(
+    problem_generator=ModelConfig(
+        model="gpt-5-mini",
+        temperature=1.0,
+    ),
+    tool_caller=ModelConfig(
+        model="gpt-5-mini",
+        temperature=0,
+    ),
+    judge=ModelConfig(
+        model="gpt-5",
+        temperature=0,
+    ),
+)
diff --git a/examples/simple_tool_calling_v1/example.py b/examples/simple_tool_calling_v1/example.py
new file mode 100644
index 0000000..ee6a607
--- /dev/null
+++ b/examples/simple_tool_calling_v1/example.py
@@ -0,0 +1,31 @@
+"""
+Hugging Face example - Using a dataset from Hugging Face
+
+Install datasets library by using `pip install datasets` and set the dataset id.
+"""
+
+from pathlib import Path
+from dotenv import load_dotenv
+from utils import dataset_to_tools
+from config import gen_config, role_config
+from toolsgen import generate_dataset
+
+# Load environment variables from .env file
+load_dotenv()
+
+# Load dataset from Hugging Face
+dataset_id = "argilla-warehouse/python-seed-tools"
+tools = dataset_to_tools(dataset_id, dataset_kwargs={"split": "train"})
+output_dir = Path(__file__).parent / "output"
+
+# Generate dataset
+manifest = generate_dataset(output_dir, gen_config, role_config, tools=tools)
+
+# Print summary
+print(f"\n✓ Generated {manifest['num_generated']}/{manifest['num_requested']} records")
+if manifest["num_failed"] > 0:
+    print(f"  Failed: {manifest['num_failed']} attempts")
+print(f"  Problem Generator: {role_config.problem_generator.model}")
+print(f"  Tool Caller: {role_config.tool_caller.model}")
+print(f"  Judge: {role_config.judge.model}")
+print(f"  Output: {output_dir}")
diff --git a/examples/simple_tool_calling_v1/utils.py b/examples/simple_tool_calling_v1/utils.py
new file mode 100644
index 0000000..22b8ed7
--- /dev/null
+++ b/examples/simple_tool_calling_v1/utils.py
@@ -0,0 +1,45 @@
+import json
+from typing import List, Optional
+
+from datasets import load_dataset
+
+from toolsgen import (
+    ToolFunction,
+    ToolSpec,
+)
+
+from validation import validate_json_schema
+
+
+def dataset_to_tools(
+    dataset_id: str, dataset_kwargs: Optional[dict] = None
+) -> List[ToolSpec]:
+    """Load tools from a Hugging Face dataset.
+
+    Args:
+        dataset_id (str): The Hugging Face dataset identifier.
+        dataset_kwargs (Optional[dict]): Additional arguments for loading the dataset.
+    Returns:
+        List[ToolSpec]: A list of ToolSpec objects.
+    """
+    dataset = load_dataset(dataset_id, **(dataset_kwargs or {}))
+    # Each dataset row contains a list of tools in OpenAI format
+    # Flatten the nested lists: [[tool1], [tool2, tool3], ...] -> [tool1, tool2, tool3, ...]
+    all_tools = []
+    for tools_json in dataset["tools"]:
+        tools_list = json.loads(tools_json)
+        all_tools.extend(tools_list)
+
+    # Convert from OpenAI format to ToolSpec
+    # OpenAI format: {'type': 'function', 'function': {'name': ..., 'description': ..., 'parameters': ...}}
+    return [
+        ToolSpec(
+            function=ToolFunction(
+                name=tool["function"]["name"],
+                description=tool["function"]["description"],
+                parameters=tool["function"]["parameters"],
+            )
+        )
+        for tool in all_tools
+        if validate_json_schema(tool)
+    ]
diff --git a/examples/simple_tool_calling_v1/validation.py b/examples/simple_tool_calling_v1/validation.py
new file mode 100644
index 0000000..8261d15
--- /dev/null
+++ b/examples/simple_tool_calling_v1/validation.py
@@ -0,0 +1,108 @@
+from typing import Any
+
+
+def _validate_schema_recursively(schema: dict[str, Any]) -> bool:
+    """Recursively validate JSON Schema properties.
+
+    OpenAI requires that array types must have 'items' field defined.
+    """
+    if not isinstance(schema, dict):
+        return True
+
+    # Check if this is an array type
+    schema_type = schema.get("type")
+    if schema_type == "array":
+        # Array must have items field
+        if "items" not in schema:
+            return False
+        # Recursively validate items
+        if not _validate_schema_recursively(schema.get("items", {})):
+            return False
+    elif isinstance(schema_type, list) and "array" in schema_type:
+        # Handle union types like ["array", "null"]
+        if "items" not in schema:
+            return False
+        if not _validate_schema_recursively(schema.get("items", {})):
+            return False
+
+    # Check properties recursively
+    if "properties" in schema:
+        properties = schema.get("properties", {})
+        if isinstance(properties, dict):
+            for prop_schema in properties.values():
+                if not _validate_schema_recursively(prop_schema):
+                    return False
+
+    # Check items recursively (for nested arrays)
+    if "items" in schema:
+        items = schema.get("items")
+        if isinstance(items, dict):
+            if not _validate_schema_recursively(items):
+                return False
+
+    # Check additionalProperties if it's a schema
+    if "additionalProperties" in schema:
+        add_props = schema.get("additionalProperties")
+        if isinstance(add_props, dict):
+            if not _validate_schema_recursively(add_props):
+                return False
+
+    return True
+
+
+def validate_json_schema(tool: dict[str, Any]) -> bool:
+    """Validate OpenAI tool schema format.
+
+    Expected format:
+
+    ```json
+    {
+        "type": "function",
+        "function": {
+            "name": "function_name",
+            "description": "function description",
+            "parameters": {"type": "object", "properties": {...}}
+        }
+    }
+    ```
+    Also validates that all array types have 'items' field defined.
+    """
+    try:
+        # Check top-level structure
+        if not isinstance(tool, dict):
+            return False
+
+        if tool.get("type") != "function":
+            return False
+
+        function = tool.get("function")
+        if not isinstance(function, dict):
+            return False
+
+        # Check required function fields
+        if "name" not in function or not isinstance(function["name"], str):
+            return False
+
+        if "description" not in function or not isinstance(
+            function["description"], str
+        ):
+            return False
+
+        # Parameters are optional, but if present must be a dict
+        if "parameters" in function:
+            params = function["parameters"]
+            if not isinstance(params, dict):
+                return False
+
+            # If parameters exist, should have type: object
+            if params.get("type") != "object":
+                return False
+
+            # Recursively validate the schema for array types
+            if not _validate_schema_recursively(params):
+                return False
+
+        return True
+
+    except Exception:
+        return False

From 8e0e0026dc71c9954f2d701914bc3137d65f387f Mon Sep 17 00:00:00 2001
From: atasoglu <ahmetatasoglu98@gmail.com>
Date: Sun, 9 Nov 2025 09:49:21 +0300
Subject: [PATCH 2/5] refactor: rename example files and update configurations
 for Nano Tool Calling v1; add new example script and adjust batch sampling
 progress bar

---
 .../README.md                                 |  4 +-
 .../config.py                                 | 12 +++---
 examples/nano_tool_calling_v1/example.py      | 39 +++++++++++++++++++
 .../utils.py                                  |  0
 .../validation.py                             |  0
 examples/simple_tool_calling_v1/example.py    | 31 ---------------
 src/toolsgen/sampling/batch.py                |  4 +-
 7 files changed, 50 insertions(+), 40 deletions(-)
 rename examples/{simple_tool_calling_v1 => nano_tool_calling_v1}/README.md (89%)
 rename examples/{simple_tool_calling_v1 => nano_tool_calling_v1}/config.py (77%)
 create mode 100644 examples/nano_tool_calling_v1/example.py
 rename examples/{simple_tool_calling_v1 => nano_tool_calling_v1}/utils.py (100%)
 rename examples/{simple_tool_calling_v1 => nano_tool_calling_v1}/validation.py (100%)
 delete mode 100644 examples/simple_tool_calling_v1/example.py

diff --git a/examples/simple_tool_calling_v1/README.md b/examples/nano_tool_calling_v1/README.md
similarity index 89%
rename from examples/simple_tool_calling_v1/README.md
rename to examples/nano_tool_calling_v1/README.md
index 68cac98..268c303 100644
--- a/examples/simple_tool_calling_v1/README.md
+++ b/examples/nano_tool_calling_v1/README.md
@@ -1,4 +1,4 @@
-# Simple Tool Calling v1
+# Nano Tool Calling v1
 
 Generates 10K tool-calling samples from Hugging Face dataset with parallel processing and schema validation.
 
@@ -15,7 +15,7 @@ python example.py
 - **Dataset**: `argilla-warehouse/python-seed-tools`
 - **Samples**: 10,000 (80% train / 20% val)
 - **Parallel**: 8 workers × 16 batch size
-- **Models**: GPT-5-mini (problem/caller), GPT-5 (judge)
+- **Models**: GPT-4.1-nano
 
 ## Files
 
diff --git a/examples/simple_tool_calling_v1/config.py b/examples/nano_tool_calling_v1/config.py
similarity index 77%
rename from examples/simple_tool_calling_v1/config.py
rename to examples/nano_tool_calling_v1/config.py
index 1a89c04..f70cee0 100644
--- a/examples/simple_tool_calling_v1/config.py
+++ b/examples/nano_tool_calling_v1/config.py
@@ -12,23 +12,23 @@
     language="english",
     max_attempts=3,
     k_min=2,
-    k_max=8,
+    k_max=4,
     shuffle_tools=True,
-    num_workers=8,
-    worker_batch_size=16,
+    num_workers=4,
+    worker_batch_size=8,
 )
 
 role_config = RoleBasedModelConfig(
     problem_generator=ModelConfig(
-        model="gpt-5-mini",
+        model="gpt-4.1-nano",
         temperature=1.0,
     ),
     tool_caller=ModelConfig(
-        model="gpt-5-mini",
+        model="gpt-4.1-nano",
         temperature=0,
     ),
     judge=ModelConfig(
-        model="gpt-5",
+        model="gpt-4.1-nano",
         temperature=0,
     ),
 )
diff --git a/examples/nano_tool_calling_v1/example.py b/examples/nano_tool_calling_v1/example.py
new file mode 100644
index 0000000..6fd4d22
--- /dev/null
+++ b/examples/nano_tool_calling_v1/example.py
@@ -0,0 +1,39 @@
+"""
+Hugging Face example - Using a dataset from Hugging Face
+
+Install datasets library by using `pip install datasets` and set the dataset id.
+"""
+
+from pathlib import Path
+from dotenv import load_dotenv
+from utils import dataset_to_tools
+from config import gen_config, role_config
+from toolsgen import generate_dataset
+
+# Load environment variables from .env file
+load_dotenv()
+
+
+def main() -> None:
+    # Load dataset from Hugging Face
+    dataset_id = "argilla-warehouse/python-seed-tools"
+    tools = dataset_to_tools(dataset_id, dataset_kwargs={"split": "train"})
+    output_dir = Path(__file__).parent / "output"
+
+    # Generate dataset
+    manifest = generate_dataset(output_dir, gen_config, role_config, tools=tools)
+
+    # Print summary
+    print(
+        f"\n✓ Generated {manifest['num_generated']}/{manifest['num_requested']} records"
+    )
+    if manifest["num_failed"] > 0:
+        print(f"  Failed: {manifest['num_failed']} attempts")
+    print(f"  Problem Generator: {role_config.problem_generator.model}")
+    print(f"  Tool Caller: {role_config.tool_caller.model}")
+    print(f"  Judge: {role_config.judge.model}")
+    print(f"  Output: {output_dir}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/simple_tool_calling_v1/utils.py b/examples/nano_tool_calling_v1/utils.py
similarity index 100%
rename from examples/simple_tool_calling_v1/utils.py
rename to examples/nano_tool_calling_v1/utils.py
diff --git a/examples/simple_tool_calling_v1/validation.py b/examples/nano_tool_calling_v1/validation.py
similarity index 100%
rename from examples/simple_tool_calling_v1/validation.py
rename to examples/nano_tool_calling_v1/validation.py
diff --git a/examples/simple_tool_calling_v1/example.py b/examples/simple_tool_calling_v1/example.py
deleted file mode 100644
index ee6a607..0000000
--- a/examples/simple_tool_calling_v1/example.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-Hugging Face example - Using a dataset from Hugging Face
-
-Install datasets library by using `pip install datasets` and set the dataset id.
-"""
-
-from pathlib import Path
-from dotenv import load_dotenv
-from utils import dataset_to_tools
-from config import gen_config, role_config
-from toolsgen import generate_dataset
-
-# Load environment variables from .env file
-load_dotenv()
-
-# Load dataset from Hugging Face
-dataset_id = "argilla-warehouse/python-seed-tools"
-tools = dataset_to_tools(dataset_id, dataset_kwargs={"split": "train"})
-output_dir = Path(__file__).parent / "output"
-
-# Generate dataset
-manifest = generate_dataset(output_dir, gen_config, role_config, tools=tools)
-
-# Print summary
-print(f"\n✓ Generated {manifest['num_generated']}/{manifest['num_requested']} records")
-if manifest["num_failed"] > 0:
-    print(f"  Failed: {manifest['num_failed']} attempts")
-print(f"  Problem Generator: {role_config.problem_generator.model}")
-print(f"  Tool Caller: {role_config.tool_caller.model}")
-print(f"  Judge: {role_config.judge.model}")
-print(f"  Output: {output_dir}")
diff --git a/src/toolsgen/sampling/batch.py b/src/toolsgen/sampling/batch.py
index f71d939..0731528 100644
--- a/src/toolsgen/sampling/batch.py
+++ b/src/toolsgen/sampling/batch.py
@@ -1,6 +1,8 @@
 import random
 from typing import Callable, List, Optional, Sequence
 
+from tqdm import tqdm
+
 from .param_aware import sample_param_aware_subset
 from .random import sample_random_subset
 from .semantic import sample_semantic_subset
@@ -84,7 +86,7 @@ def batched_subsets(
     using_chunks = batch_size is not None and batch_size > 0
 
     subsets: List[List[ToolSpec]] = []
-    for i in range(total):
+    for i in tqdm(range(total), desc="Preparing tool subsets", total=total):
         batch = batches[i % len(batches)]
         if using_chunks:
             k = len(batch)

From e6fce67df4bed6d8dfdb848e4a536c41f29654fd Mon Sep 17 00:00:00 2001
From: atasoglu <ahmetatasoglu98@gmail.com>
Date: Sun, 9 Nov 2025 23:16:20 +0300
Subject: [PATCH 3/5] chore: update README with dataset details, usage, and
 licensing information

---
 examples/nano_tool_calling_v1/README.md       | 141 +++++++++++++++---
 .../nano_tool_calling_v1/analyze_functions.py |  15 ++
 examples/nano_tool_calling_v1/config.py       |  11 +-
 examples/nano_tool_calling_v1/push_to_hf.py   |  64 ++++++++
 src/toolsgen/core/generator.py                |   4 +
 src/toolsgen/core/parallel.py                 |  25 ++--
 6 files changed, 225 insertions(+), 35 deletions(-)
 create mode 100644 examples/nano_tool_calling_v1/analyze_functions.py
 create mode 100644 examples/nano_tool_calling_v1/push_to_hf.py

diff --git a/examples/nano_tool_calling_v1/README.md b/examples/nano_tool_calling_v1/README.md
index 268c303..f62b642 100644
--- a/examples/nano_tool_calling_v1/README.md
+++ b/examples/nano_tool_calling_v1/README.md
@@ -1,31 +1,132 @@
+---
+license: mit
+task_categories:
+- text-generation
+language:
+- en
+tags:
+- function-calling
+- tool-calling
+- synthetic
+- openai
+size_categories:
+- n<1K
+---
+
 # Nano Tool Calling v1
 
-Generates 10K tool-calling samples from Hugging Face dataset with parallel processing and schema validation.
+A synthetic tool-calling dataset generated using [ToolsGen](https://github.com/atasoglu/toolsgen) with GPT-4.1-nano models.
+
+## Dataset Details
+
+- **Generated with**: ToolsGen v0.1.0
+- **Source Tools**: [argilla-warehouse/python-seed-tools](https://huggingface.co/datasets/argilla-warehouse/python-seed-tools)
+- **Total Samples**: 989
+- **Language**: English
+- **Format**: Single-turn conversations with tool calls
+
+### Models Used
+
+- **Problem Generator**: gpt-4.1-nano (temp=1.0)
+- **Tool Caller**: gpt-4.1-nano (temp=0.0)
+- **Judge**: gpt-4.1-mini (temp=0.0)
+
+## Dataset Structure
 
-## Setup
+Each record contains:
 
-```bash
-pip install toolsgen datasets python-dotenv
-echo "OPENAI_API_KEY=your-key-here" > .env
-python example.py
+```json
+{
+  "id": "record_000000",
+  "language": "english",
+  "tools": [...],
+  "messages": [
+    {"role": "user", "content": "..."}
+  ],
+  "assistant_calls": [
+    {
+      "id": "call_...",
+      "type": "function",
+      "function": {
+        "name": "function_name",
+        "arguments": "{...}"
+      }
+    }
+  ],
+  "problem_metadata": {...},
+  "judge": {
+    "tool_relevance": 0.4,
+    "argument_quality": 0.38,
+    "clarity": 0.2,
+    "score": 0.98,
+    "verdict": "accept",
+    "rationale": "...",
+    "rubric_version": "0.1.0",
+    "model": "gpt-4.1-mini",
+    "temperature": 0.0
+  },
+  "quality_tags": [],
+  "tools_metadata": {"num_tools": 2}
+}
 ```
 
-## Configuration
+## Generation Details
+
+### Configuration
+
+- **Strategy**: Random tool sampling
+- **Tools per sample**: 1-4 (k_min=1, k_max=4)
+- **Parallel workers**: 16
+- **Worker batch size**: 16
+- **Max attempts**: 3
+- **Seed**: 42
+
+### Quality Control
+
+All samples passed through an LLM-as-a-judge evaluation with a multi-dimensional rubric:
+
+- **Tool Relevance** (40%): Are the selected tools appropriate?
+- **Argument Quality** (38%): Are arguments valid and plausible?
+- **Clarity** (20%): Is the response complete and clear?
 
-- **Dataset**: `argilla-warehouse/python-seed-tools`
-- **Samples**: 10,000 (80% train / 20% val)
-- **Parallel**: 8 workers × 16 batch size
-- **Models**: GPT-4.1-nano
+Samples with `score >= 0.7` and `verdict == "accept"` are included.
 
-## Files
+## Usage
 
-- `example.py` - Main generation script
-- `config.py` - Generation and model settings
-- `utils.py` - HF dataset loader
-- `validation.py` - Schema validator (ensures arrays have `items`)
+```python
+from datasets import load_dataset
+
+dataset = load_dataset("atasoglu/nano-tool-calling-v1")
+
+# Access a sample
+sample = dataset["train"][0]
+print(sample["messages"])
+print(sample["assistant_calls"])
+```
+
+## Source Tools
+
+The dataset uses 38,420 Python function definitions from the [python-seed-tools](https://huggingface.co/datasets/argilla-warehouse/python-seed-tools) dataset, covering diverse programming tasks and domains.
+
+## Limitations
+
+- Single-turn conversations only
+- English language only
+- Synthetic data generated by LLMs (may contain artifacts)
+- No actual tool execution or validation
+- Judge scores are model-based assessments
+
+## Citation
+
+```bibtex
+@software{toolsgen2025,
+  title = {ToolsGen: Synthetic Tool-Calling Dataset Generator},
+  author = {Ataşoğlu, Ahmet},
+  year = {2025},
+  url = {https://github.com/atasoglu/toolsgen}
+}
+```
 
-## Output
+## License
 
-- `output/train.jsonl` - Training set
-- `output/val.jsonl` - Validation set
-- `output/manifest.json` - Metadata
+MIT License
diff --git a/examples/nano_tool_calling_v1/analyze_functions.py b/examples/nano_tool_calling_v1/analyze_functions.py
new file mode 100644
index 0000000..ad38ea9
--- /dev/null
+++ b/examples/nano_tool_calling_v1/analyze_functions.py
@@ -0,0 +1,15 @@
+import json
+from collections import Counter
+
+with open("output/train.jsonl", "r", encoding="utf-8") as f:
+    function_counts = Counter()
+
+    for line in f:
+        record = json.loads(line)
+        for tool in record.get("tools", []):
+            func_name = tool.get("function", {}).get("name")
+            if func_name:
+                function_counts[func_name] += 1
+
+for func, count in function_counts.most_common():
+    print(f"{func}: {count}")
diff --git a/examples/nano_tool_calling_v1/config.py b/examples/nano_tool_calling_v1/config.py
index f70cee0..babd9bb 100644
--- a/examples/nano_tool_calling_v1/config.py
+++ b/examples/nano_tool_calling_v1/config.py
@@ -5,17 +5,16 @@
 )
 
 gen_config = GenerationConfig(
-    num_samples=10_000,
+    num_samples=1_000,
     strategy="random",
     seed=42,
-    train_split=0.8,
     language="english",
     max_attempts=3,
-    k_min=2,
+    k_min=1,
     k_max=4,
     shuffle_tools=True,
-    num_workers=4,
-    worker_batch_size=8,
+    num_workers=16,
+    worker_batch_size=16,
 )
 
 role_config = RoleBasedModelConfig(
@@ -28,7 +27,7 @@
         temperature=0,
     ),
     judge=ModelConfig(
-        model="gpt-4.1-nano",
+        model="gpt-4.1-mini",
         temperature=0,
     ),
 )
diff --git a/examples/nano_tool_calling_v1/push_to_hf.py b/examples/nano_tool_calling_v1/push_to_hf.py
new file mode 100644
index 0000000..aa9486e
--- /dev/null
+++ b/examples/nano_tool_calling_v1/push_to_hf.py
@@ -0,0 +1,64 @@
+import json
+import os
+from pathlib import Path
+from datasets import Dataset, DatasetDict
+from huggingface_hub import DatasetCard
+from dotenv import load_dotenv
+
+
+def load_jsonl(path: Path) -> list[dict]:
+    return [json.loads(line) for line in open(path, encoding="utf-8")]
+
+
+def push_to_hub(
+    dataset_path: Path,
+    repo_id: str,
+    token: str | None = None,
+    private: bool = False,
+    readme_path: Path | None = None,
+):
+    train = load_jsonl(dataset_path / "train.jsonl")
+    val_path = dataset_path / "val.jsonl"
+
+    # Convert to JSON strings to avoid schema issues
+    for record in train:
+        record["tools"] = json.dumps(record["tools"])
+        record["messages"] = json.dumps(record["messages"])
+        record["assistant_calls"] = json.dumps(record["assistant_calls"])
+        record["problem_metadata"] = json.dumps(record["problem_metadata"])
+        record["judge"] = json.dumps(record["judge"])
+        record["quality_tags"] = json.dumps(record["quality_tags"])
+        record["tools_metadata"] = json.dumps(record["tools_metadata"])
+
+    dataset = Dataset.from_list(train)
+
+    if val_path.exists():
+        val = load_jsonl(val_path)
+        for record in val:
+            record["tools"] = json.dumps(record["tools"])
+            record["messages"] = json.dumps(record["messages"])
+            record["assistant_calls"] = json.dumps(record["assistant_calls"])
+            record["problem_metadata"] = json.dumps(record["problem_metadata"])
+            record["judge"] = json.dumps(record["judge"])
+            record["quality_tags"] = json.dumps(record["quality_tags"])
+            record["tools_metadata"] = json.dumps(record["tools_metadata"])
+        dataset = DatasetDict({"train": dataset, "validation": Dataset.from_list(val)})
+
+    dataset.push_to_hub(repo_id, token=token, private=private)
+
+    if readme_path and readme_path.exists():
+        card = DatasetCard(open(readme_path, encoding="utf-8").read())
+        card.push_to_hub(repo_id, token=token)
+
+    print(f"✓ Pushed to https://huggingface.co/datasets/{repo_id}")
+
+
+if __name__ == "__main__":
+    load_dotenv()
+    base_path = Path(__file__).parent
+    push_to_hub(
+        base_path / "output",
+        "atasoglu/nano-tool-calling-v1",
+        os.getenv("HF_TOKEN"),
+        readme_path=base_path / "README.md",
+    )
diff --git a/src/toolsgen/core/generator.py b/src/toolsgen/core/generator.py
index 911ddce..171f0a6 100644
--- a/src/toolsgen/core/generator.py
+++ b/src/toolsgen/core/generator.py
@@ -58,6 +58,10 @@ def _split_records(
             "val": shuffled[split_idx:],
         }
 
+        temp_train = output_dir / "train.jsonl"
+        if temp_train.exists():
+            temp_train.unlink()
+
         for split_name, split_records in splits.items():
             if split_records:
                 split_path = output_dir / f"{split_name}.jsonl"
diff --git a/src/toolsgen/core/parallel.py b/src/toolsgen/core/parallel.py
index 3ad6017..da8c00e 100644
--- a/src/toolsgen/core/parallel.py
+++ b/src/toolsgen/core/parallel.py
@@ -132,6 +132,8 @@ def generate_records_parallel(
         return [], 0
 
     results_by_index: Dict[int, Record] = {}
+    failed_indices: set[int] = set()
+    written_records: List[Record] = []
     failed = 0
     next_id_to_write = 0
 
@@ -157,14 +159,8 @@ def generate_records_parallel(
                     if sample_result.record:
                         record = Record.model_validate(sample_result.record)
                         results_by_index[sample_result.sample_index] = record
-
-                        while next_id_to_write in results_by_index:
-                            rec = results_by_index[next_id_to_write]
-                            rec.id = f"record_{next_id_to_write:06d}"
-                            append_record_jsonl(rec, jsonl_path)
-                            del results_by_index[next_id_to_write]
-                            next_id_to_write += 1
                     else:
+                        failed_indices.add(sample_result.sample_index)
                         tqdm.write(
                             "Warning: Failed to generate sample "
                             f"{sample_result.sample_index} after {gen_config.max_attempts} attempts"
@@ -175,7 +171,18 @@ def generate_records_parallel(
                             )
                         )
 
+                    while (
+                        next_id_to_write in results_by_index
+                        or next_id_to_write in failed_indices
+                    ):
+                        if next_id_to_write in results_by_index:
+                            rec = results_by_index[next_id_to_write]
+                            rec.id = f"record_{next_id_to_write:06d}"
+                            append_record_jsonl(rec, jsonl_path)
+                            written_records.append(rec)
+                            del results_by_index[next_id_to_write]
+                        next_id_to_write += 1
+
                     pbar.update(1)
 
-    all_records = [results_by_index[i] for i in sorted(results_by_index.keys())]
-    return all_records, failed
+    return written_records, failed

From 98a485ac04caeeb9802105b4e0609ef3801f2acf Mon Sep 17 00:00:00 2001
From: atasoglu <ahmetatasoglu98@gmail.com>
Date: Sun, 9 Nov 2025 23:18:57 +0300
Subject: [PATCH 4/5] chore(release): update to version 0.3.0 with new Hugging
 Face integration and workflow enhancements

- Added Hugging Face dataset utilities and dataset upload script
- Included complete Nano Tool Calling v1 dataset generation example
- Improved progress bar display and parallel processing record handling
- Updated project version to 0.3.0
---
 CHANGELOG.md   | 15 +++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 28028aa..ecb55ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,21 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
 
 Nothing yet.
 
+## [0.3.0] - 2025-01-10
+### Added
+- Hugging Face dataset integration utilities in `examples/nano_tool_calling_v1/`
+  - `dataset_to_tools()` function to load tools from Hugging Face datasets
+  - `validate_json_schema()` for OpenAI tool schema validation with recursive array type checking
+  - `push_to_hf.py` script for uploading generated datasets to Hugging Face Hub
+- Complete example workflow for Nano Tool Calling v1 dataset generation
+  - Configuration, generation, validation, and publishing pipeline
+  - Analysis utilities for function inspection
+  - Comprehensive README with dataset card format
+
+### Changed
+- Enhanced batch sampling progress bar display for better user feedback
+- Improved parallel processing record ordering and ID assignment
+
 ## [0.2.2] - 2025-01-09
 ### Changed
 - Records are now written to JSONL file immediately as they complete in parallel mode, rather than waiting for all generation to finish
diff --git a/pyproject.toml b/pyproject.toml
index d0d9b93..502c02d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ toolsgen = ["prompts/*.txt"]
 
 [project]
 name = "toolsgen"
-version = "0.2.2"
+version = "0.3.0"
 description = "Generate tool-calling datasets from OpenAI-compatible tool specs"
 readme = "README.md"
 requires-python = ">=3.9"

From e0437ecbd933e76ee13f15ab877b816a3529ad6a Mon Sep 17 00:00:00 2001
From: atasoglu <ahmetatasoglu98@gmail.com>
Date: Sun, 9 Nov 2025 23:19:19 +0300
Subject: [PATCH 5/5] chore: update toolsgen to version 0.3.0 for latest
 features and fixes

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index 252874b..3ed0922 100644
--- a/uv.lock
+++ b/uv.lock
@@ -396,7 +396,7 @@ wheels = [
 
 [[package]]
 name = "toolsgen"
-version = "0.1.3"
+version = "0.3.0"
 source = { editable = "." }
 dependencies = [
     { name = "openai" },