feat(parser): add scope-aware dot operator tokenization

🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-16 17:47:50 -07:00 · 2025-10-16 17:47:50 -07:00 · 7e819f9c67
commit 7e819f9c67
parent 219397339c
1 changed files with 16 additions and 1 deletions
--- a/src/parser/tokenizer.ts
+++ b/src/parser/tokenizer.ts
@ -1,5 +1,6 @@
 import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr'
 import { Identifier, Word } from './shrimp.terms'
+import type { Scope } from './scopeTracker'

 // The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF.

@ -14,6 +15,20 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack
  while (true) {
    ch = getFullCodePoint(input, pos)

+    // Check for dot and scope - property access detection
+    if (ch === 46 /* . */ && isValidIdentifier) {
+      const identifierText = input.read(0, pos)
+      const scope = stack.context as Scope | undefined
+
+      if (scope?.has(identifierText)) {
+        // In scope - stop here, let grammar parse property access
+        input.advance(pos)
+        input.acceptToken(Identifier)
+        return
+      }
+      // Not in scope - continue consuming as Word (fall through)
+    }
+
    if (!isWordChar(ch)) break

    // Certain characters might end a word or identifier if they are followed by whitespace.
@ -34,7 +49,7 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack

  input.advance(pos)
  input.acceptToken(isValidIdentifier ? Identifier : Word)
-})
+}, { contextual: true })

 const isWhiteSpace = (ch: number): boolean => {
  return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */