From 79daacd5cd5b1c4bebf8ee806e3a10fb1425efca Mon Sep 17 00:00:00 2001 From: aromarious <6535448+aromarious@users.noreply.github.com> Date: Wed, 4 Mar 2026 11:00:54 +0900 Subject: [PATCH] feat: check cursor position within CJK substring match on hover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, hovering anywhere in a CJK token (e.g. 注文が届く) would show definitions for all matching substrings regardless of cursor position. Now the hover handler computes the cursor offset within the token and only returns definitions for terms whose matched substring spans the cursor position. - Add Lexer.getWithStart to return token string with start column - Add TextDocument.findTokenWithStart for hover (findToken kept for completion) - Filter findMatchingTermsBySubstring by cursor offset in token - Add positive and negative E2E hover tests for CJK position accuracy - Add CJKDemo workspace fixture for manual testing Co-Authored-By: Claude Opus 4.6 --- .../E2e/HoverTests.fs | 15 ++++++---- .../Contextive.LanguageServer/Hover.fs | 28 +++++++++++-------- .../Contextive.LanguageServer/Server.fs | 2 +- .../Contextive.LanguageServer/TextDocument.fs | 11 ++++++++ .../Contextive.LanguageServer/Tokeniser.fs | 6 ++++ .../simple_workspace/CJKDemo/cjk.glossary.yml | 16 +++++++++++ .../simple_workspace/CJKDemo/test.txt | 7 +++++ 7 files changed, 68 insertions(+), 17 deletions(-) create mode 100644 src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/cjk.glossary.yml create mode 100644 src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/test.txt diff --git a/src/language-server/Contextive.LanguageServer.Tests/E2e/HoverTests.fs b/src/language-server/Contextive.LanguageServer.Tests/E2e/HoverTests.fs index 7632a67d..feb297ce 100644 --- a/src/language-server/Contextive.LanguageServer.Tests/E2e/HoverTests.fs +++ b/src/language-server/Contextive.LanguageServer.Tests/E2e/HoverTests.fs @@ -99,10 +99,12 @@ let tests = "購入する", Position(0, 0), "購入", "cjk" "配送についての質問", Position(0, 0), "配送", "cjk" "注文と配送", Position(0, 0), "注文", "cjk" - "注文と配送", Position(0, 0), "配送", "cjk" + "注文と配送", Position(0, 3), "配送", "cjk" "オーダー", Position(0, 0), "注文", "cjk" "购物车", Position(0, 0), "购物车", "cjk" - "사용자", Position(0, 0), "사용자", "cjk" ] + "사용자", Position(0, 0), "사용자", "cjk" + "注文が届く", Position(0, 1), "注文", "cjk" + "注文と配送", Position(0, 4), "配送", "cjk" ] |> List.map testHoverTermFoundWithDefaultGlossary |> testList "CJK term found when hovering via substring matching" @@ -192,7 +194,10 @@ let tests = "peere", Position(0, 0), "three" "Something", Position(0, 0), "empty_terms_list" "料理", Position(0, 0), "cjk" - "ユーザー", Position(0, 0), "cjk" ] + "ユーザー", Position(0, 0), "cjk" + "注文が届く", Position(0, 2), "cjk" + "注文が届く", Position(0, 3), "cjk" + "注文と配送", Position(0, 2), "cjk" ] |> List.map testHoverTermNotFound |> testList "Nothing found when hovering" @@ -237,7 +242,7 @@ let tests = testAsync "Test hover with context info and no match" { let terms = [] - let foundToken = Some "term" + let foundToken = Some("term", 0) let hoverHandler = Hover.handler @@ -248,7 +253,7 @@ let tests = (fun _ _ -> foundToken) let hoverParams = - HoverParams(TextDocument = TextDocumentItem(Uri = System.Uri("file:///blah"))) + HoverParams(TextDocument = TextDocumentItem(Uri = System.Uri("file:///blah")), Position = Position(0, 0)) let! result = hoverHandler hoverParams null null |> Async.AwaitTask diff --git a/src/language-server/Contextive.LanguageServer/Hover.fs b/src/language-server/Contextive.LanguageServer/Hover.fs index bf9fddbd..ee4b2265 100644 --- a/src/language-server/Contextive.LanguageServer/Hover.fs +++ b/src/language-server/Contextive.LanguageServer/Hover.fs @@ -67,22 +67,25 @@ module private Filtering = // Both token and keys are normalised with simpleNormalize (NFKD + lowercase + // Singularize). Singularize is a no-op for CJK text in Humanizer, so the // normalisation is effectively NFKD + lowercase on both sides. - let findMatchingTermsBySubstring (context: GlossaryFile.Context) (token: string) = + // cursorOffsetInToken restricts matches to keys that span the cursor position. + let findMatchingTermsBySubstring (context: GlossaryFile.Context) (token: string) (cursorOffsetInToken: int) = let normalizedToken = Normalization.simpleNormalize token context.Index.Keys - |> Seq.filter (fun key -> normalizedToken.Contains(key)) + |> Seq.filter (fun key -> + let idx = normalizedToken.IndexOf(key) + idx >= 0 && cursorOffsetInToken >= idx && cursorOffsetInToken < idx + key.Length) |> Seq.collect (fun key -> context.Index[key]) |> Seq.distinctBy (fun t -> t.Name) - let termFilterForCandidateTermsWithIndex tokenAndCandidateTerms = + let termFilterForCandidateTermsWithIndex cursorOffsetInToken tokenAndCandidateTerms = Seq.map (fun (c: GlossaryFile.Context) -> let token = tokenAndCandidateTerms |> Seq.head |> fst let terms = if CandidateTerms.containsCJK token then - findMatchingTermsBySubstring c token + findMatchingTermsBySubstring c token cursorOffsetInToken else findMatchingTermsInIndex c tokenAndCandidateTerms @@ -92,7 +95,7 @@ module private Filtering = module private TextDocument = - let getTokenAtPosition (p: HoverParams) (tokenFinder: TextDocument.TokenFinder) = + let getTokenWithStartAtPosition (p: HoverParams) (tokenFinder: DocumentUri -> Position -> (string * int) option) = match p.TextDocument with | null -> None | document -> tokenFinder document.Uri p.Position @@ -111,10 +114,11 @@ let private hoverResult (contexts: GlossaryFile.FindResult) = let private hoverContentForToken (uri: string) (termFinder: GlossaryFile.Finder) + (cursorOffsetInToken: int) (tokensAndCandidateTerms: CandidateTerms.TokenAndCandidateTerms seq) = async { - let! findResult = termFinder uri (Filtering.termFilterForCandidateTermsWithIndex tokensAndCandidateTerms) + let! findResult = termFinder uri (Filtering.termFilterForCandidateTermsWithIndex cursorOffsetInToken tokensAndCandidateTerms) return if Seq.isEmpty findResult then @@ -125,16 +129,18 @@ let private hoverContentForToken let handler (termFinder: GlossaryFile.Finder) - (tokenFinder: TextDocument.TokenFinder) + (tokenFinder: DocumentUri -> Position -> (string * int) option) (p: HoverParams) (_: HoverCapability) _ = async { return! - match TextDocument.getTokenAtPosition p tokenFinder with + match TextDocument.getTokenWithStartAtPosition p tokenFinder with | None -> async { return Lsp.noHoverResult } - | tokenAtPosition -> + | Some(token, tokenStart) -> + let cursorOffsetInToken = p.Position.Character - tokenStart + let uriPath = try p.TextDocument.Uri.ToUri().LocalPath @@ -146,9 +152,9 @@ let handler dp - tokenAtPosition + Some token |> CandidateTerms.tokenToTokenAndCandidateTerms - |> hoverContentForToken uriPath termFinder + |> hoverContentForToken uriPath termFinder cursorOffsetInToken } |> Async.StartAsTask diff --git a/src/language-server/Contextive.LanguageServer/Server.fs b/src/language-server/Contextive.LanguageServer/Server.fs index 16b15442..e3085365 100644 --- a/src/language-server/Contextive.LanguageServer/Server.fs +++ b/src/language-server/Contextive.LanguageServer/Server.fs @@ -75,7 +75,7 @@ let private configureServer (input: Stream) (output: Stream) (opts: LanguageServ .OnHover( Hover.handler <| GlossaryManager.lookup glossaryManager - <| TextDocument.findToken, + <| TextDocument.findTokenWithStart, Hover.registrationOptions ) diff --git a/src/language-server/Contextive.LanguageServer/TextDocument.fs b/src/language-server/Contextive.LanguageServer/TextDocument.fs index 7f2af048..4d7e17e7 100644 --- a/src/language-server/Contextive.LanguageServer/TextDocument.fs +++ b/src/language-server/Contextive.LanguageServer/TextDocument.fs @@ -27,6 +27,12 @@ let getTokenAtPosition (lines: IList) (position: Position) = |> Lexer.getEnd position.Character |> Lexer.get +let getTokenWithStartAtPosition (lines: IList) (position: Position) = + Lexer.ofLine lines position.Line + |> Lexer.getStart position.Character + |> Lexer.getEnd position.Character + |> Lexer.getWithStart + type TokenFinder = DocumentUri -> Position -> string option let findToken (documentUri: DocumentUri) (position: Position) = @@ -34,6 +40,11 @@ let findToken (documentUri: DocumentUri) (position: Position) = | None -> None | Some(documentLines) -> getTokenAtPosition documentLines position +let findTokenWithStart (documentUri: DocumentUri) (position: Position) = + match getDocument documentUri with + | None -> None + | Some(documentLines) -> getTokenWithStartAtPosition documentLines position + let private linesFromText (document: string) : IList = document.ReplaceLineEndings().Split(System.Environment.NewLine) diff --git a/src/language-server/Contextive.LanguageServer/Tokeniser.fs b/src/language-server/Contextive.LanguageServer/Tokeniser.fs index eec786dd..fc3dffac 100644 --- a/src/language-server/Contextive.LanguageServer/Tokeniser.fs +++ b/src/language-server/Contextive.LanguageServer/Tokeniser.fs @@ -72,3 +72,9 @@ type Lexer = function | Token(line, start, _) as t when t.HasLength -> line.Substring(start, t.Length.Value) |> trim |> Some | _ -> None + + static member getWithStart = + function + | Token(line, start, _) as t when t.HasLength -> + line.Substring(start, t.Length.Value) |> trim |> fun s -> Some(s, start) + | _ -> None diff --git a/src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/cjk.glossary.yml b/src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/cjk.glossary.yml new file mode 100644 index 00000000..8afcf733 --- /dev/null +++ b/src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/cjk.glossary.yml @@ -0,0 +1,16 @@ +contexts: + - name: CJK Demo + domainVisionStatement: To illustrate CJK substring matching in the contextive glossary. + terms: + - name: 注文 + definition: An order placed by a customer. + aliases: + - オーダー + - name: 購入 + definition: A purchase transaction. + - name: 配送 + definition: Delivery of goods. + - name: 사용자 + definition: A user of the system. + - name: 购物车 + definition: Shopping cart. diff --git a/src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/test.txt b/src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/test.txt new file mode 100644 index 00000000..4f664529 --- /dev/null +++ b/src/vscode/contextive/test/single-root/fixtures/simple_workspace/CJKDemo/test.txt @@ -0,0 +1,7 @@ +注文が届く +購入する +配送についての質問 +注文と配送 +オーダー +购物车 +사용자