From 7e819f9c676482516bdd01de0e78856b623adb62 Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Thu, 16 Oct 2025 17:47:50 -0700 Subject: [PATCH] feat(parser): add scope-aware dot operator tokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/parser/tokenizer.ts | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 07fbc97..3011d4b 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -1,5 +1,6 @@ import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' import { Identifier, Word } from './shrimp.terms' +import type { Scope } from './scopeTracker' // The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF. @@ -14,6 +15,20 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack while (true) { ch = getFullCodePoint(input, pos) + // Check for dot and scope - property access detection + if (ch === 46 /* . */ && isValidIdentifier) { + const identifierText = input.read(0, pos) + const scope = stack.context as Scope | undefined + + if (scope?.has(identifierText)) { + // In scope - stop here, let grammar parse property access + input.advance(pos) + input.acceptToken(Identifier) + return + } + // Not in scope - continue consuming as Word (fall through) + } + if (!isWordChar(ch)) break // Certain characters might end a word or identifier if they are followed by whitespace. @@ -34,7 +49,7 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack input.advance(pos) input.acceptToken(isValidIdentifier ? Identifier : Word) -}) +}, { contextual: true }) const isWhiteSpace = (ch: number): boolean => { return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */