Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions redisvl/utils/token_escaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ class TokenEscaper:
"""

# Characters that RediSearch requires us to escape during queries.
# Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization
DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"
# Source: https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/escaping/#tokenization-rules-for-text-fields
DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ?|]"

# Same as above but excludes * to allow wildcard patterns
ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ ]"
# Same as above but excludes * and ? to allow wildcard patterns
ESCAPED_CHARS_NO_WILDCARD = r"[,.<>{}\[\]\\\"\':;!@#$%^&()\-+=~\/ |]"
Comment on lines +15 to +16
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR description says ESCAPED_CHARS_NO_WILDCARD added both ? and |, but the code intentionally excludes ? so it can be preserved when preserve_wildcards=True (only | is added). Please update the PR description (or the code) so they match, since this affects the documented behavior of wildcard queries.

Copilot uses AI. Check for mistakes.
Comment on lines +15 to +16
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment on ESCAPED_CHARS_NO_WILDCARD says it excludes * and ? (so preserve_wildcards=True preserves both), but the escape() docstring still says it only preserves *. Please update the preserve_wildcards parameter documentation to reflect the actual behavior (preserving both * and ?), so callers don’t assume ? will be escaped/preserved incorrectly.

Copilot uses AI. Check for mistakes.

def __init__(self, escape_chars_re: Optional[Pattern] = None):
if escape_chars_re:
Expand All @@ -27,8 +27,8 @@ def escape(self, value: str, preserve_wildcards: bool = False) -> str:

Args:
value: The string value to escape.
preserve_wildcards: If True, preserves * characters for wildcard
matching. Defaults to False.
preserve_wildcards: If True, preserves * and ? characters for
wildcard matching. Defaults to False.

Returns:
The escaped string.
Expand Down
27 changes: 23 additions & 4 deletions tests/unit/test_token_escaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def escaper():
),
(
r"& symbols, like * and ?",
r"\&\ symbols\,\ like\ \*\ and\ ?",
), # TODO: question marks are not caught?
r"\&\ symbols\,\ like\ \*\ and\ \?",
),
# underscores are ignored
(r"-dashes_and_underscores-", r"\-dashes_and_underscores\-"),
],
Expand Down Expand Up @@ -52,12 +52,12 @@ def test_escape_text_chars(escaper, test_input, expected):
# Tags with less common, but legal characters
("_underscore_", r"_underscore_"),
("dot.tag", r"dot\.tag"),
# ("pipe|tag", r"pipe\|tag"), #TODO - pipes are not caught?
("pipe|tag", r"pipe\|tag"),
# More edge cases with special characters
("(parentheses)", r"\(parentheses\)"),
("[brackets]", r"\[brackets\]"),
("{braces}", r"\{braces\}"),
# ("question?mark", r"question\?mark"), #TODO - question marks are not caught?
("question?mark", r"question\?mark"),
# Unicode characters in tags
("你好", r"你好"), # Assuming non-Latin characters don't need escaping
("emoji:😊", r"emoji\:😊"),
Expand All @@ -78,9 +78,11 @@ def test_escape_text_chars(escaper, test_input, expected):
"hyphen",
"underscore",
"dot",
"pipe",
"parentheses",
"brackets",
"braces",
"question",
"non-latin",
"emoji",
],
Expand Down Expand Up @@ -120,3 +122,20 @@ def test_escape_long_string(escaper):
# Use pytest's benchmark fixture to check performance
escaped = escaper.escape(long_str)
assert escaped == expected


@pytest.mark.parametrize(
("test_input,expected"),
[
("wild*card", r"wild*card"),
("single?char", r"single?char"),
("combo*test?", r"combo*test?"),
("mixed*and|pipe", r"mixed*and\|pipe"),
("question?and|pipe", r"question\?and\|pipe"), # ? escaped when not preserving
],
ids=["star", "question", "both", "star-only", "question-escaped"],
)
def test_escape_preserve_wildcards(escaper, test_input, expected):
"""Test that * and ? are preserved when preserve_wildcards=True."""
result = escaper.escape(test_input, preserve_wildcards=True)
assert result == expected