From 90280900691dbbed8d7757ead16d254ff1ed0d5f Mon Sep 17 00:00:00 2001 From: Hiren Date: Wed, 25 Feb 2026 15:54:48 -0500 Subject: [PATCH 1/4] Fix: Escape ? and | characters in TokenEscaper Added ? and | to the escape patterns in TokenEscaper to comply with RediSearch query syntax requirements. Changes: - Updated DEFAULT_ESCAPED_CHARS regex to include ? and | - Updated ESCAPED_CHARS_NO_WILDCARD regex to include ? and | - Enabled previously commented-out test cases for pipe and question mark - Updated test expectations for symbols test case Fixes #490 --- redisvl/utils/token_escaper.py | 4 ++-- tests/unit/test_token_escaper.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/redisvl/utils/token_escaper.py b/redisvl/utils/token_escaper.py index 04e04cd2..e5aca607 100644 --- a/redisvl/utils/token_escaper.py +++ b/redisvl/utils/token_escaper.py @@ -10,10 +10,10 @@ class TokenEscaper: # Characters that RediSearch requires us to escape during queries. # Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization - DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]" + DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ?|]" # Same as above but excludes * to allow wildcard patterns - ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ ]" + ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ ?|]" def __init__(self, escape_chars_re: Optional[Pattern] = None): if escape_chars_re: diff --git a/tests/unit/test_token_escaper.py b/tests/unit/test_token_escaper.py index 0adb2d11..5d1d8778 100644 --- a/tests/unit/test_token_escaper.py +++ b/tests/unit/test_token_escaper.py @@ -19,8 +19,8 @@ def escaper(): ), ( r"& symbols, like * and ?", - r"\&\ symbols\,\ like\ \*\ and\ ?", - ), # TODO: question marks are not caught? + r"\&\ symbols\,\ like\ \*\ and\ \?", + ), # underscores are ignored (r"-dashes_and_underscores-", r"\-dashes_and_underscores\-"), ], @@ -52,12 +52,12 @@ def test_escape_text_chars(escaper, test_input, expected): # Tags with less common, but legal characters ("_underscore_", r"_underscore_"), ("dot.tag", r"dot\.tag"), - # ("pipe|tag", r"pipe\|tag"), #TODO - pipes are not caught? + ("pipe|tag", r"pipe\|tag"), # More edge cases with special characters ("(parentheses)", r"\(parentheses\)"), ("[brackets]", r"\[brackets\]"), ("{braces}", r"\{braces\}"), - # ("question?mark", r"question\?mark"), #TODO - question marks are not caught? + ("question?mark", r"question\?mark"), # Unicode characters in tags ("你好", r"你好"), # Assuming non-Latin characters don't need escaping ("emoji:😊", r"emoji\:😊"), @@ -78,9 +78,11 @@ def test_escape_text_chars(escaper, test_input, expected): "hyphen", "underscore", "dot", + "pipe", "parentheses", "brackets", "braces", + "question", "non-latin", "emoji", ], From 70ceefb9888698d7f2a92c893a98675f07dae79e Mon Sep 17 00:00:00 2001 From: Hiren Date: Thu, 26 Feb 2026 08:20:26 -0500 Subject: [PATCH 2/4] docs: update Redis docs URL to current documentation --- redisvl/utils/token_escaper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redisvl/utils/token_escaper.py b/redisvl/utils/token_escaper.py index e5aca607..57becd62 100644 --- a/redisvl/utils/token_escaper.py +++ b/redisvl/utils/token_escaper.py @@ -9,7 +9,7 @@ class TokenEscaper: """ # Characters that RediSearch requires us to escape during queries. - # Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization + # Source: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/escaping/#tokenization-rules-for-text-fields DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ?|]" # Same as above but excludes * to allow wildcard patterns From 699e332cc14f0e02c46160874dba324398d14121 Mon Sep 17 00:00:00 2001 From: Hiren Date: Sat, 28 Feb 2026 08:20:05 -0500 Subject: [PATCH 3/4] fix: preserve ? wildcard alongside * in preserve_wildcards mode - Remove ? from ESCAPED_CHARS_NO_WILDCARD to match * behavior - ? is a single-character wildcard in RediSearch queries - Add tests for preserve_wildcards behavior with * and ? --- redisvl/utils/token_escaper.py | 4 ++-- tests/unit/test_token_escaper.py | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/redisvl/utils/token_escaper.py b/redisvl/utils/token_escaper.py index 57becd62..a7d2357a 100644 --- a/redisvl/utils/token_escaper.py +++ b/redisvl/utils/token_escaper.py @@ -12,8 +12,8 @@ class TokenEscaper: # Source: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/escaping/#tokenization-rules-for-text-fields DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ?|]" - # Same as above but excludes * to allow wildcard patterns - ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ ?|]" + # Same as above but excludes * and ? to allow wildcard patterns + ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ |]" def __init__(self, escape_chars_re: Optional[Pattern] = None): if escape_chars_re: diff --git a/tests/unit/test_token_escaper.py b/tests/unit/test_token_escaper.py index 5d1d8778..afca368e 100644 --- a/tests/unit/test_token_escaper.py +++ b/tests/unit/test_token_escaper.py @@ -122,3 +122,26 @@ def test_escape_long_string(escaper): # Use pytest's benchmark fixture to check performance escaped = escaper.escape(long_str) assert escaped == expected + + +@pytest.mark.parametrize( + ("test_input,expected"), + [ + ("wild*card", "wild*card"), + ("single?char", "single?char"), + ("combo*test?", "combo*test?"), + ("mixed*and|pipe", "mixed*and\|pipe"), + ("question?and|pipe", "question\?and\|pipe"), # ? escaped when not preserving + ], + ids=["star", "question", "both", "star-only", "question-escaped"], +) +def test_escape_preserve_wildcards(escaper, test_input, expected): + """Test that * and ? are preserved when preserve_wildcards=True.""" + # These tests verify wildcard preservation behavior + if "*" in test_input and "?" in test_input: + result = escaper.escape(test_input, preserve_wildcards=True) + assert "*" in result and "?" in result + elif "*" in test_input: + assert "*" in escaper.escape(test_input, preserve_wildcards=True) + elif "?" in test_input: + assert "?" in escaper.escape(test_input, preserve_wildcards=True) From 204a50b9befdeb5fb8872961b3572d68fae41974 Mon Sep 17 00:00:00 2001 From: Hiren Date: Thu, 5 Mar 2026 11:18:01 -0500 Subject: [PATCH 4/4] fix: update test and docstring for preserve_wildcards - Test now uses expected parameter properly instead of ignoring it - Docstring updated to reflect that both * and ? are preserved Signed-off-by: Hiren --- redisvl/utils/token_escaper.py | 4 ++-- tests/unit/test_token_escaper.py | 20 +++++++------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/redisvl/utils/token_escaper.py b/redisvl/utils/token_escaper.py index a7d2357a..374534fd 100644 --- a/redisvl/utils/token_escaper.py +++ b/redisvl/utils/token_escaper.py @@ -27,8 +27,8 @@ def escape(self, value: str, preserve_wildcards: bool = False) -> str: Args: value: The string value to escape. - preserve_wildcards: If True, preserves * characters for wildcard - matching. Defaults to False. + preserve_wildcards: If True, preserves * and ? characters for + wildcard matching. Defaults to False. Returns: The escaped string. diff --git a/tests/unit/test_token_escaper.py b/tests/unit/test_token_escaper.py index afca368e..2836ed19 100644 --- a/tests/unit/test_token_escaper.py +++ b/tests/unit/test_token_escaper.py @@ -127,21 +127,15 @@ def test_escape_long_string(escaper): @pytest.mark.parametrize( ("test_input,expected"), [ - ("wild*card", "wild*card"), - ("single?char", "single?char"), - ("combo*test?", "combo*test?"), - ("mixed*and|pipe", "mixed*and\|pipe"), - ("question?and|pipe", "question\?and\|pipe"), # ? escaped when not preserving + ("wild*card", r"wild*card"), + ("single?char", r"single?char"), + ("combo*test?", r"combo*test?"), + ("mixed*and|pipe", r"mixed*and\|pipe"), + ("question?and|pipe", r"question\?and\|pipe"), # ? escaped when not preserving ], ids=["star", "question", "both", "star-only", "question-escaped"], ) def test_escape_preserve_wildcards(escaper, test_input, expected): """Test that * and ? are preserved when preserve_wildcards=True.""" - # These tests verify wildcard preservation behavior - if "*" in test_input and "?" in test_input: - result = escaper.escape(test_input, preserve_wildcards=True) - assert "*" in result and "?" in result - elif "*" in test_input: - assert "*" in escaper.escape(test_input, preserve_wildcards=True) - elif "?" in test_input: - assert "?" in escaper.escape(test_input, preserve_wildcards=True) + result = escaper.escape(test_input, preserve_wildcards=True) + assert result == expected