validmind · juanmleng · Feb 28, 2026 · Feb 27, 2026 · Feb 28, 2026
diff --git a/notebooks/tutorials/model_validation/4-finalize_validation_reporting.ipynb b/notebooks/tutorials/model_validation/4-finalize_validation_reporting.ipynb
@@ -967,7 +967,7 @@
     "    },\n",
     "    'validmind.data_validation.MissingValues:raw_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset'},\n",
-    "        'params': {'min_threshold': 1}\n",
+    "        'params': {'min_percentage_threshold': 1}\n",
     "    },\n",
     "    'validmind.data_validation.ClassImbalance:raw_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset'},\n",
@@ -1010,7 +1010,7 @@
     "    },\n",
     "    'validmind.data_validation.MissingValues:preprocessed_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset_preprocessed'},\n",
-    "        'params': {'min_threshold': 1}\n",
+    "        'params': {'min_percentage_threshold': 1}\n",
     "    },\n",
     "    'validmind.data_validation.TabularNumericalHistograms:preprocessed_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset_preprocessed'}\n",

diff --git a/notebooks/use_cases/credit_risk/application_scorecard_with_bias.ipynb b/notebooks/use_cases/credit_risk/application_scorecard_with_bias.ipynb
@@ -760,7 +760,7 @@
     "        \"dataset\": \"raw_dataset\",\n",
     "    },\n",
     "    params={\n",
-    "        \"min_threshold\": 1,\n",
+    "        \"min_percentage_threshold\": 1,\n",
     "    }\n",
     ")\n",
     "test.log()"

diff --git a/notebooks/use_cases/credit_risk/application_scorecard_with_ml.ipynb b/notebooks/use_cases/credit_risk/application_scorecard_with_ml.ipynb
@@ -785,7 +785,7 @@
     "        \"dataset\": vm_raw_dataset,\n",
     "    },\n",
     "    params={\n",
-    "        \"min_threshold\": 1\n",
+    "        \"min_percentage_threshold\": 1\n",
     "    }\n",
     ").log()"
    ]
@@ -819,7 +819,7 @@
     "        \"dataset\": vm_raw_dataset,\n",
     "    },\n",
     "    params={\n",
-    "        \"min_threshold\": 1\n",
+    "        \"min_percentage_threshold\": 1\n",
     "    }\n",
     ").log()"
    ]
@@ -1747,7 +1747,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [

diff --git a/notebooks/use_cases/credit_risk/document_excel_application_scorecard.ipynb b/notebooks/use_cases/credit_risk/document_excel_application_scorecard.ipynb
@@ -603,7 +603,7 @@
     "    },\n",
     "    'validmind.data_validation.MissingValues:raw_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset'},\n",
-    "        'params': {'min_threshold': 1}\n",
+    "        'params': {'min_percentage_threshold': 1}\n",
     "    },\n",
     "    'validmind.data_validation.ClassImbalance:raw_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset'},\n",
@@ -647,7 +647,7 @@
     "    },\n",
     "    'validmind.data_validation.MissingValues:preprocessed_data': {\n",
     "        'inputs': {'dataset': 'preprocess_dataset'},\n",
-    "        'params': {'min_threshold': 1}\n",
+    "        'params': {'min_percentage_threshold': 1}\n",
     "    },\n",
     "    'validmind.data_validation.TabularNumericalHistograms:preprocessed_data': {\n",
     "        'inputs': {'dataset': 'preprocess_dataset'}\n",

diff --git a/notebooks/use_cases/model_validation/validate_application_scorecard.ipynb b/notebooks/use_cases/model_validation/validate_application_scorecard.ipynb
@@ -1629,7 +1629,7 @@
     "    },\n",
     "    'validmind.data_validation.MissingValues:raw_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset'},\n",
-    "        'params': {'min_threshold': 1}\n",
+    "        'params': {'min_percentage_threshold': 1}\n",
     "    },\n",
     "    'validmind.data_validation.ClassImbalance:raw_data': {\n",
     "        'inputs': {'dataset': 'raw_dataset'},\n",
@@ -1672,7 +1672,7 @@
     "    },\n",
     "    'validmind.data_validation.MissingValues:preprocessed_data': {\n",
     "        'inputs': {'dataset': 'preprocess_dataset'},\n",
-    "        'params': {'min_threshold': 1}\n",
+    "        'params': {'min_percentage_threshold': 1}\n",
     "    },\n",
     "    'validmind.data_validation.TabularNumericalHistograms:preprocessed_data': {\n",
     "        'inputs': {'dataset': 'preprocess_dataset'}\n",

diff --git a/notebooks/use_cases/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb b/notebooks/use_cases/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb
@@ -623,7 +623,7 @@
     "        \"dataset\": vm_monitoring_ds,\n",
     "    },\n",
     "    params={\n",
-    "        \"min_threshold\": 1\n",
+    "        \"min_percentage_threshold\": 1\n",
     "    }\n",
     ").log()"
    ]

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "validmind"
-version = "2.12.0"
+version = "2.12.1"
 description = "ValidMind Library"
 readme = "README.pypi.md"
 requires-python = ">=3.9,<3.13"

diff --git a/tests/unit_tests/data_validation/test_MissingValues.py b/tests/unit_tests/data_validation/test_MissingValues.py
@@ -61,7 +61,7 @@ def test_missing_values_counts(self):
         self.assertEqual(some_missing["Percentage of Missing Values (%)"], 20.0)
         self.assertEqual(all_missing["Percentage of Missing Values (%)"], 100.0)
 
-        # Check Pass/Fail status (with default min_threshold=1)
+        # Check Pass/Fail status (with default min_percentage_threshold=1.0)
         self.assertEqual(no_missing["Pass/Fail"], "Pass")
         self.assertEqual(some_missing["Pass/Fail"], "Fail")
         self.assertEqual(all_missing["Pass/Fail"], "Fail")
@@ -71,7 +71,9 @@ def test_missing_values_counts(self):
 
     def test_threshold_parameter(self):
         # Test with higher threshold that allows some missing values
-        summary, passed, raw_data = MissingValues(self.vm_dataset, min_threshold=25)
+        summary, passed, raw_data = MissingValues(
+            self.vm_dataset, min_percentage_threshold=25
+        )
 
         # Get results
         some_missing = next(s for s in summary if s["Column"] == "some_missing")

diff --git a/validmind/datasets/credit_risk/lending_club.py b/validmind/datasets/credit_risk/lending_club.py
@@ -514,7 +514,7 @@ def get_demo_test_config(
         "inputs": {
             "dataset": "raw_dataset",
         },
-        "params": {"min_threshold": 1},
+        "params": {"min_percentage_threshold": 1},
     }
     default_config["validmind.data_validation.ClassImbalance:raw_data"] = {
         "inputs": {
@@ -582,7 +582,7 @@ def get_demo_test_config(
         "inputs": {
             "dataset": "preprocess_dataset",
         },
-        "params": {"min_threshold": 1},
+        "params": {"min_percentage_threshold": 1},
     }
     default_config[
         "validmind.data_validation.TabularNumericalHistograms:preprocessed_data"

diff --git a/validmind/tests/data_validation/MissingValues.py b/validmind/tests/data_validation/MissingValues.py
@@ -11,29 +11,30 @@
 @tags("tabular_data", "data_quality")
 @tasks("classification", "regression")
 def MissingValues(
-    dataset: VMDataset, min_threshold: int = 1
+    dataset: VMDataset,
+    min_percentage_threshold: float = 1.0,
 ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
     """
-    Evaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold.
+    Evaluates dataset quality by ensuring missing value percentage across all features does not exceed a set threshold.
 
     ### Purpose
 
     The Missing Values test is designed to evaluate the quality of a dataset by measuring the number of missing values
     across all features. The objective is to ensure that the ratio of missing data to total data is less than a
-    predefined threshold, defaulting to 1, in order to maintain the data quality necessary for reliable predictive
-    strength in a machine learning model.
+    predefined threshold (as a percentage), defaulting to 1.0, in order to maintain the data quality necessary for
+    reliable predictive strength in a machine learning model.
 
     ### Test Mechanism
 
     The mechanism for this test involves iterating through each column of the dataset, counting missing values
     (represented as NaNs), and calculating the percentage they represent against the total number of rows. The test
-    then checks if these missing value counts are less than the predefined `min_threshold`. The results are shown in a
-    table summarizing each column, the number of missing values, the percentage of missing values in each column, and a
-    Pass/Fail status based on the threshold comparison.
+    then checks if the missing value percentage is less than or equal to the predefined `min_percentage_threshold`. The results are
+    shown in a table summarizing each column, the number of missing values, the percentage of missing values in each
+    column, and a Pass/Fail status based on the threshold comparison.
 
     ### Signs of High Risk
 
-    - When the number of missing values in any column exceeds the `min_threshold` value.
+    - When the missing value percentage in any column exceeds the `min_percentage_threshold` value.
     - Presence of missing values across many columns, leading to multiple instances of failing the threshold.
 
     ### Strengths
@@ -45,24 +46,28 @@ def MissingValues(
     ### Limitations
 
     - Does not suggest the root causes of the missing values or recommend ways to impute or handle them.
-    - May overlook features with significant missing data but still less than the `min_threshold`, potentially
+    - May overlook features with significant missing data but still less than the `min_percentage_threshold`, potentially
     impacting the model.
     - Does not account for data encoded as values like "-999" or "None," which might not technically classify as
     missing but could bear similar implications.
     """
     df = dataset.df
     missing = df.isna().sum()
+    n_rows = df.shape[0]
+    missing_pct = (missing / n_rows * 100) if n_rows else (missing * 0.0)
 
     return (
         [
             {
                 "Column": col,
                 "Number of Missing Values": missing[col],
-                "Percentage of Missing Values (%)": missing[col] / df.shape[0] * 100,
-                "Pass/Fail": "Pass" if missing[col] < min_threshold else "Fail",
+                "Percentage of Missing Values (%)": missing_pct[col],
+                "Pass/Fail": "Pass"
+                if missing_pct[col] <= min_percentage_threshold
+                else "Fail",
             }
             for col in missing.index
         ],
-        all(missing[col] < min_threshold for col in missing.index),
+        all(missing_pct[col] <= min_percentage_threshold for col in missing.index),
         RawData(missing_values=missing, dataset=dataset.input_id),
     )