nsv-format · namingbe · Mar 14, 2026 · Feb 18, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -24,7 +24,7 @@ jobs:
     - name: Install package
       run: |
         python -m pip install --upgrade pip
-        pip install -e .
+        pip install -e ".[pandas]"
 
     - name: Run tests
       run: python -m unittest discover -s tests -p 'test*.py' -v
diff --git a/nsv/__init__.py b/nsv/__init__.py
@@ -12,18 +12,41 @@ def patch_pandas():
     if 'pandas' not in sys.modules:
         return
     pd = sys.modules['pandas']
+    from pandas.io.parsers.readers import STR_NA_VALUES
 
-    def read_nsv(filepath_or_buffer, **kwargs):
+    bool_values = frozenset({'true', 'false'})
+
+    def _infer_column(col):
+        na_mask = col.isin(STR_NA_VALUES)
+        col_na = col.where(~na_mask)
+
+        converted = pd.to_numeric(col_na, errors='coerce')
+        if not (converted.isna() & col_na.notna()).any():
+            return converted
+
+        non_na = col_na.dropna()
+        if len(non_na) > 0 and non_na.str.lower().isin(bool_values).all():
+            as_bool = col_na.map(lambda x: x.lower() == 'true' if pd.notna(x) else x)
+            return as_bool if na_mask.any() else as_bool.astype(bool)
+
+        return col_na
+
+    def read_nsv(filepath_or_buffer, dtype=None, **kwargs):
         if isinstance(filepath_or_buffer, str):
             with open(filepath_or_buffer, 'r') as f:
                 data = load(f)
         else:
             data = load(filepath_or_buffer)
-        return pd.DataFrame(data)
+        df = pd.DataFrame(data)
+        if dtype is not None:
+            df = df.astype(dtype)
+        else:
+            for col in df.columns:
+                df[col] = _infer_column(df[col])
+        return df
 
     def to_nsv(self, path_or_buf=None, **kwargs):
-        # TODO: this is naive, pandas can have non-string values
-        data = self.values
+        data = [['' if pd.isna(v) else str(v) for v in row] for row in self.values]
 
         if path_or_buf is None:
             return dumps(data)

diff --git a/nsv/core.py b/nsv/core.py
@@ -30,7 +30,7 @@ def dump(data: Iterable[Iterable[str]], file_obj):
 def dumps(data: Iterable[Iterable[str]]) -> str:
     """Write elements to an NSV string."""
     lines = []
-    for i, row in enumerate(data):
+    for row in data:
         for cell in row:
             lines.append(Writer.escape(cell))
         lines.append('')

diff --git a/nsv/reader.py b/nsv/reader.py
@@ -63,7 +63,7 @@ def check(s: str):
             else:
                 col += 1
         if escaped:
-            sus.append(len(s) - 1)
+            sus.append((len(s) - 1, line, col))
         for pos, line, col in sus:
             print(f'WARNING: Unescaped backslash at position {pos} ({line}:{col})')
         if s[-1] != '\n':

diff --git a/tests/test_pandas.py b/tests/test_pandas.py
@@ -0,0 +1,173 @@
+import unittest
+from io import StringIO
+
+import pandas as pd
+import numpy as np
+
+import nsv
+
+
+def setUpModule():
+    nsv.patch_pandas()
+
+
+class TestReadNsvTypeInference(unittest.TestCase):
+    """read_nsv should infer types the same way read_csv does."""
+
+    def _compare_with_csv(self, rows):
+        """Assert that read_nsv produces the same dtypes and values as read_csv."""
+        nsv_str = nsv.dumps(rows)
+        csv_str = '\n'.join(','.join(row) for row in rows) + '\n'
+
+        nsv_df = pd.read_nsv(StringIO(nsv_str))
+        csv_df = pd.read_csv(StringIO(csv_str), header=None)
+
+        self.assertEqual(list(nsv_df.dtypes), list(csv_df.dtypes),
+                         f"dtype mismatch for rows={rows}")
+        pd.testing.assert_frame_equal(nsv_df, csv_df)
+
+    def test_integers(self):
+        self._compare_with_csv([['1', '2'], ['3', '4']])
+
+    def test_floats(self):
+        self._compare_with_csv([['1.5', '2.5'], ['3.5', '4.5']])
+
+    def test_mixed_int_float(self):
+        self._compare_with_csv([['1', '2.5'], ['3', '4.5']])
+
+    def test_strings(self):
+        self._compare_with_csv([['hello', 'world'], ['foo', 'bar']])
+
+    def test_mixed_numeric_and_string(self):
+        self._compare_with_csv([['123', 'abc'], ['456', 'def']])
+
+    def test_empty_fields_in_numeric_column(self):
+        self._compare_with_csv([['1', 'a'], ['', 'b'], ['3', 'c']])
+
+    def test_scientific_notation(self):
+        self._compare_with_csv([['1.23e5', '4.56e-2'], ['7.89e1', '0.12e3']])
+
+    def test_negative_numbers(self):
+        self._compare_with_csv([['-1', '-2.5'], ['3', '4.5']])
+
+    def test_all_empty(self):
+        self._compare_with_csv([['', ''], ['', '']])
+
+
+class TestReadNsvNullInference(unittest.TestCase):
+    """read_nsv should treat the same strings as NaN that read_csv does."""
+
+    def _compare_with_csv(self, rows):
+        nsv_str = nsv.dumps(rows)
+        csv_str = '\n'.join(','.join(row) for row in rows) + '\n'
+        nsv_df = pd.read_nsv(StringIO(nsv_str))
+        csv_df = pd.read_csv(StringIO(csv_str), header=None)
+        self.assertEqual(list(nsv_df.dtypes), list(csv_df.dtypes),
+                         f"dtype mismatch for rows={rows}")
+        pd.testing.assert_frame_equal(nsv_df, csv_df)
+
+    def test_na_string_in_numeric_column(self):
+        self._compare_with_csv([['NA', '1'], ['2', '3']])
+
+    def test_nan_string_in_numeric_column(self):
+        self._compare_with_csv([['NaN', '1'], ['2', '3']])
+
+    def test_nan_lowercase_in_numeric_column(self):
+        self._compare_with_csv([['nan', '1'], ['2', '3']])
+
+    def test_null_string_in_numeric_column(self):
+        self._compare_with_csv([['null', '1'], ['2', '3']])
+
+    def test_none_string_in_numeric_column(self):
+        self._compare_with_csv([['None', '1'], ['2', '3']])
+
+    def test_na_string_in_string_column(self):
+        self._compare_with_csv([['hello', 'NA'], ['world', 'there']])
+
+    def test_all_na_column(self):
+        self._compare_with_csv([['NA', 'a'], ['NaN', 'b'], ['null', 'c']])
+
+
+class TestReadNsvBoolInference(unittest.TestCase):
+    """read_nsv should infer bool columns the same way read_csv does."""
+
+    def _compare_with_csv(self, rows):
+        nsv_str = nsv.dumps(rows)
+        csv_str = '\n'.join(','.join(row) for row in rows) + '\n'
+        nsv_df = pd.read_nsv(StringIO(nsv_str))
+        csv_df = pd.read_csv(StringIO(csv_str), header=None)
+        self.assertEqual(list(nsv_df.dtypes), list(csv_df.dtypes),
+                         f"dtype mismatch for rows={rows}")
+        pd.testing.assert_frame_equal(nsv_df, csv_df)
+
+    def test_bool_true_false(self):
+        self._compare_with_csv([['True', 'False'], ['True', 'False']])
+
+    def test_bool_lowercase(self):
+        self._compare_with_csv([['true', 'false'], ['true', 'false']])
+
+    def test_bool_uppercase(self):
+        self._compare_with_csv([['TRUE', 'FALSE'], ['TRUE', 'FALSE']])
+
+    def test_bool_mixed_case(self):
+        self._compare_with_csv([['True', 'false'], ['FALSE', 'True']])
+
+    def test_bool_with_na(self):
+        # NA mixed in: read_csv returns object with Python bools and nan
+        self._compare_with_csv([['True', 'a'], ['NA', 'b'], ['False', 'c']])
+
+    def test_not_bool_T_F(self):
+        # 'T'/'F' are NOT inferred as bool by read_csv
+        self._compare_with_csv([['T', 'F'], ['T', 'F']])
+
+
+class TestReadNsvDtype(unittest.TestCase):
+    """read_nsv should support explicit dtype parameter."""
+
+    def test_dtype_str_suppresses_inference(self):
+        data = [['123', '456'], ['789', '012']]
+        nsv_str = nsv.dumps(data)
+        df = pd.read_nsv(StringIO(nsv_str), dtype=str)
+        for col in df.columns:
+            self.assertFalse(pd.api.types.is_numeric_dtype(df[col]))
+        self.assertEqual(df.iloc[0, 0], '123')
+
+    def test_dtype_per_column(self):
+        data = [['123', '4.5'], ['789', '6.7']]
+        nsv_str = nsv.dumps(data)
+        df = pd.read_nsv(StringIO(nsv_str), dtype={0: float, 1: str})
+        self.assertTrue(pd.api.types.is_float_dtype(df[0]))
+        self.assertFalse(pd.api.types.is_numeric_dtype(df[1]))
+
+
+class TestToNsv(unittest.TestCase):
+    """to_nsv should handle non-string types gracefully."""
+
+    def test_roundtrip_integers(self):
+        df = pd.DataFrame({0: [1, 2, 3], 1: [4, 5, 6]})
+        nsv_str = df.to_nsv()
+        self.assertIsInstance(nsv_str, str)
+        df2 = pd.read_nsv(StringIO(nsv_str))
+        pd.testing.assert_frame_equal(df, df2)
+
+    def test_roundtrip_floats(self):
+        df = pd.DataFrame({0: [1.5, 2.5], 1: [3.5, 4.5]})
+        nsv_str = df.to_nsv()
+        df2 = pd.read_nsv(StringIO(nsv_str))
+        pd.testing.assert_frame_equal(df, df2)
+
+    def test_roundtrip_mixed(self):
+        df = pd.DataFrame({0: [1, 2], 1: ['x', 'y']})
+        nsv_str = df.to_nsv()
+        df2 = pd.read_nsv(StringIO(nsv_str))
+        pd.testing.assert_frame_equal(df, df2)
+
+    def test_nan_becomes_empty(self):
+        df = pd.DataFrame({'a': [1.0, float('nan'), 3.0]})
+        nsv_str = df.to_nsv()
+        rows = nsv.loads(nsv_str)
+        self.assertEqual(rows[1], [''])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -42,7 +42,7 @@ def dump_then_load(data):
 
 
 def load_then_dump(s):
-    return nsv.dumps(*nsv.loads(s))
+    return nsv.dumps(nsv.loads(s))
 
 
 def load_sample(name):