cache it

light cleanup
Better names
2025-11-06 13:32:41 -08:00 · 2025-11-06 13:32:41 -08:00 · 2025-11-06 10:22:37 -08:00 · 2025-11-06 10:22:37 -08:00 · 2025-11-06 10:22:37 -08:00 · 2025-11-06 09:23:18 -08:00
15 changed files with 101 additions and 43 deletions
--- a/.gitignore
+++ b/.gitignore
@ -35,3 +35,5 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
 /tmp
 /docs
 *.vsix
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -200,7 +200,7 @@ function parseExpression(input: string) {
 - **Not in scope** → Parses as `Word("obj.prop")` → compiles to `PUSH 'obj.prop'` (treated as file path/string)
 Implementation files:
- **src/parser/scopeTracker.ts**: ContextTracker that maintains immutable scope chain
+- **src/parser/parserScopeContext.ts**: ContextTracker that maintains immutable scope chain
 - **src/parser/tokenizer.ts**: External tokenizer checks `stack.context` to decide if dot creates DotGet or Word
 - Scope tracking: Captures variables from assignments (`x = 5`) and function parameters (`fn x:`)
 - See `src/parser/tests/dot-get.test.ts` for comprehensive examples
--- a/src/compiler/tests/pipe.test.ts
+++ b/src/compiler/tests/pipe.test.ts
@ -78,4 +78,18 @@ describe('pipe expressions', () => {
      div = do a b: a / b end
      sub 3 1 | div (sub 110 9 | sub 1) _ | div 5`).toEvaluateTo(10)
  })
  test('pipe with prelude functions (list.reverse and list.map)', () => {
    expect(`
      double = do x: x * 2 end
      range 1 3 | list.reverse | list.map double
    `).toEvaluateTo([6, 4, 2])
  })
  test('pipe with prelude function (echo)', () => {
    expect(`
      get-msg = do: 'hello' end
      get-msg | echo
    `).toEvaluateTo(null)
  })
 })
--- a/src/parser/parserScopeContext.ts
+++ b/src/parser/parserScopeContext.ts
--- a/src/parser/shrimp.grammar
+++ b/src/parser/shrimp.grammar
@ -1,6 +1,6 @@
@external propSource highlighting from "./highlight"
-@context trackScope from "./scopeTracker"
+@context trackScope from "./parserScopeContext"
@skip { space | Comment }
--- a/src/parser/shrimp.ts
+++ b/src/parser/shrimp.ts
@ -2,7 +2,7 @@
 import {LRParser, LocalTokenGroup} from "@lezer/lr"
 import {operatorTokenizer} from "./operatorTokenizer"
 import {tokenizer, specializeKeyword} from "./tokenizer"
-import {trackScope} from "./scopeTracker"
+import {trackScope} from "./parserScopeContext"
 import {highlighting} from "./highlight"
 const spec_Identifier = {__proto__:null,if:66, null:94, catch:100, finally:106, end:108, else:116, while:130, try:136, throw:140}
 export const parser = LRParser.deserialize({
--- a/src/parser/tokenizer.ts
+++ b/src/parser/tokenizer.ts
@ -119,7 +119,13 @@ const consumeWordToken = (
    }
    // Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
-    if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && ch !== 63 /* ? */ && !isEmojiOrUnicode(ch)) {
+    if (
      !isLowercaseLetter(ch) &&
      !isDigit(ch) &&
      ch !== 45 /* - */ &&
      ch !== 63 /* ? */ &&
      !isEmojiOrUnicode(ch)
    ) {
      if (!canBeWord) break
      isValidIdentifier = false
    }
@ -159,7 +165,9 @@ const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number |
  // If identifier is in scope, this is property access (e.g., obj.prop)
  // If not in scope, it should be consumed as a Word (e.g., file.txt)
-  return context?.scope.has(identifierText) || globals.includes(identifierText) ? IdentifierBeforeDot : null
+  return context?.scope.has(identifierText) || globals.includes(identifierText)
    ? IdentifierBeforeDot
    : null
 }
 // Decide between AssignableIdentifier and Identifier using grammar state + peek-ahead
@ -187,7 +195,10 @@ const chooseIdentifierToken = (input: InputStream, stack: Stack): number => {
  const nextCh2 = getFullCodePoint(input, peekPos + 1)
  // Check for compound assignment operators: +=, -=, *=, /=, %=
-  if ([43/* + */, 45/* - */, 42/* * */, 47/* / */, 37/* % */].includes(nextCh) && nextCh2 === 61/* = */) {
+  if (
    [43 /* + */, 45 /* - */, 42 /* * */, 47 /* / */, 37 /* % */].includes(nextCh) &&
    nextCh2 === 61 /* = */
  ) {
    // Found compound operator, check if it's followed by whitespace
    const charAfterOp = getFullCodePoint(input, peekPos + 2)
    if (isWhiteSpace(charAfterOp) || charAfterOp === -1 /* EOF */) {
--- a/src/testSetup.ts
+++ b/src/testSetup.ts
@ -1,6 +1,7 @@
 import { expect } from 'bun:test'
 import { parser } from '#parser/shrimp'
 import { setGlobals } from '#parser/tokenizer'
 import { globals as prelude } from '#prelude'
 import { $ } from 'bun'
 import { assert, errorMessage } from '#utils/utils'
 import { Compiler } from '#compiler/compiler'
@ -43,7 +44,8 @@ expect.extend({
  toMatchTree(received: unknown, expected: string, globals?: Record<string, any>) {
    assert(typeof received === 'string', 'toMatchTree can only be used with string values')
-    if (globals) setGlobals(Object.keys(globals))
+    const allGlobals = { ...prelude, ...(globals || {}) }
    setGlobals(Object.keys(allGlobals))
    const tree = parser.parse(received)
    const actual = treeToString(tree, received)
    const normalizedExpected = trimWhitespace(expected)
@ -99,9 +101,10 @@ expect.extend({
    assert(typeof received === 'string', 'toEvaluateTo can only be used with string values')
    try {
-      if (globals) setGlobals(Object.keys(globals))
+      const allGlobals = { ...prelude, ...(globals || {}) }
      setGlobals(Object.keys(allGlobals))
      const compiler = new Compiler(received)
-      const result = await run(compiler.bytecode, globals)
+      const result = await run(compiler.bytecode, allGlobals)
      let value = VMResultToValue(result)
      // Just treat regex as strings for comparison purposes
--- a/vscode-extension/.gitignore
+++ b/vscode-extension/.gitignore
@ -1,4 +0,0 @@
 node_modules
 client/dist
 server/dist
 *.vsix
--- a/vscode-extension/server/src/diagnostics.ts
+++ b/vscode-extension/server/src/diagnostics.ts
@ -1,12 +1,12 @@
 import { TextDocument, Position } from 'vscode-languageserver-textdocument'
 import { Diagnostic, DiagnosticSeverity } from 'vscode-languageserver/node'
-import { parser } from '../../../src/parser/shrimp'
+import { Tree } from '@lezer/common'
 import { Compiler } from '../../../src/compiler/compiler'
 import { CompilerError } from '../../../src/compiler/compilerError'
-export const buildDiagnostics = (textDocument: TextDocument): Diagnostic[] => {
+export const buildDiagnostics = (textDocument: TextDocument, tree: Tree): Diagnostic[] => {
  const text = textDocument.getText()
-  const diagnostics = getParseErrors(textDocument)
+  const diagnostics = getParseErrors(textDocument, tree)
  if (diagnostics.length > 0) {
    return diagnostics
@ -59,9 +59,7 @@ const unknownDiagnostic = (message: string): Diagnostic => {
  return diagnostic
 }
-const getParseErrors = (textDocument: TextDocument): Diagnostic[] => {
+const getParseErrors = (textDocument: TextDocument, tree: Tree): Diagnostic[] => {
  const tree = parser.parse(textDocument.getText())
  const ranges: { start: Position; end: Position }[] = []
  tree.iterate({
    enter(n) {
--- a/vscode-extension/server/src/editorScopeAnalyzer.test.ts
+++ b/vscode-extension/server/src/editorScopeAnalyzer.test.ts
@ -1,10 +1,10 @@
 import { test, expect, describe } from 'bun:test'
-import { ScopeTracker } from './scopeTracker'
+import { EditorScopeAnalyzer } from './editorScopeAnalyzer'
 import { TextDocument } from 'vscode-languageserver-textdocument'
 import { parser } from '../../../src/parser/shrimp'
 import * as Terms from '../../../src/parser/shrimp.terms'
-describe('ScopeTracker', () => {
+describe('EditorScopeAnalyzer', () => {
  test('top-level assignment is in scope', () => {
    const code = 'x = 5\necho x'
    const { tree, tracker } = parseAndGetScope(code)
@ -135,11 +135,17 @@ end`
    const xInEcho = identifiers[identifiers.length - 1]
    expect(tracker.isInScope('x', xInEcho)).toBe(true)
  })
  test('the prelude functions are always in scope', () => {
    const code = `echo "Hello, World!"`
    const { tree, tracker } = parseAndGetScope(code)
    expect(tracker.isInScope('echo', tree.topNode)).toBe(true)
  })
 })
 const parseAndGetScope = (code: string) => {
  const document = TextDocument.create('test://test.sh', 'shrimp', 1, code)
  const tree = parser.parse(code)
-  const tracker = new ScopeTracker(document)
+  const tracker = new EditorScopeAnalyzer(document)
  return { document, tree, tracker }
 }
--- a/vscode-extension/server/src/editorScopeAnalyzer.ts
+++ b/vscode-extension/server/src/editorScopeAnalyzer.ts
@ -1,17 +1,20 @@
 import { SyntaxNode } from '@lezer/common'
 import { TextDocument } from 'vscode-languageserver-textdocument'
 import * as Terms from '../../../src/parser/shrimp.terms'
 import { globals } from '../../../src/prelude'
 /**
 * Tracks variables in scope at a given position in the parse tree.
 * Used to distinguish identifiers (in scope) from words (not in scope).
 */
-export class ScopeTracker {
+export class EditorScopeAnalyzer {
  private document: TextDocument
  private scopeCache = new Map<number, Set<string>>()
  constructor(document: TextDocument) {
    this.document = document
    const preludeKeys = Object.keys(globals)
    this.scopeCache.set(0, new Set(preludeKeys))
  }
  /**
--- a/vscode-extension/server/src/semanticTokens.ts
+++ b/vscode-extension/server/src/semanticTokens.ts
@ -1,13 +1,13 @@
 import { parser } from '../../../src/parser/shrimp'
 import * as Terms from '../../../src/parser/shrimp.terms'
-import { SyntaxNode } from '@lezer/common'
+import { SyntaxNode, Tree } from '@lezer/common'
 import { TextDocument } from 'vscode-languageserver-textdocument'
 import {
  SemanticTokensBuilder,
  SemanticTokenTypes,
  SemanticTokenModifiers,
 } from 'vscode-languageserver/node'
-import { ScopeTracker } from './scopeTracker'
+import { EditorScopeAnalyzer } from './editorScopeAnalyzer'
 export const TOKEN_TYPES = [
  SemanticTokenTypes.function,
@ -28,11 +28,9 @@ export const TOKEN_MODIFIERS = [
  SemanticTokenModifiers.readonly,
 ]
-export function buildSemanticTokens(document: TextDocument): number[] {
+export function buildSemanticTokens(document: TextDocument, tree: Tree): number[] {
  const text = document.getText()
  const tree = parser.parse(text)
  const builder = new SemanticTokensBuilder()
-  const scopeTracker = new ScopeTracker(document)
+  const scopeTracker = new EditorScopeAnalyzer(document)
  walkTree(tree.topNode, document, builder, scopeTracker)
@ -77,7 +75,7 @@ function walkTree(
  node: SyntaxNode,
  document: TextDocument,
  builder: SemanticTokensBuilder,
-  scopeTracker: ScopeTracker
+  scopeTracker: EditorScopeAnalyzer
 ) {
  // Special handling for NamedArgPrefix to split "name=" into two tokens
  if (node.type.id === Terms.NamedArgPrefix) {
@ -104,7 +102,7 @@ type TokenInfo = { type: number; modifiers: number } | undefined
 function getTokenType(
  node: SyntaxNode,
  document: TextDocument,
-  scopeTracker: ScopeTracker
+  scopeTracker: EditorScopeAnalyzer
 ): TokenInfo {
  const nodeTypeId = node.type.id
  const parentTypeId = node.parent?.type.id
--- a/vscode-extension/server/src/server.ts
+++ b/vscode-extension/server/src/server.ts
@ -3,6 +3,7 @@ import { buildDiagnostics } from './diagnostics'
 import { buildSemanticTokens, TOKEN_MODIFIERS, TOKEN_TYPES } from './semanticTokens'
 import { parser } from '../../../src/parser/shrimp'
 import { Compiler } from '../../../src/compiler/compiler'
 import { Tree } from '@lezer/common'
 import {
  InitializeResult,
  TextDocuments,
@ -10,18 +11,23 @@ import {
  createConnection,
  ProposedFeatures,
  CompletionItemKind,
  TextDocumentChangeEvent,
 } from 'vscode-languageserver/node'
 const connection = createConnection(ProposedFeatures.all)
 const documents = new TextDocuments(TextDocument)
 documents.listen(connection)
 const documentTrees = new Map<string, Tree>()
 // Server capabilities
 connection.onInitialize(handleInitialize)
 // Language features
 connection.languages.semanticTokens.on(handleSemanticTokens)
 documents.onDidOpen(handleDocumentOpen)
 documents.onDidChangeContent(handleDocumentChange)
 documents.onDidClose(handleDocumentClose)
 connection.onCompletion(handleCompletion)
 // Debug commands
@ -31,10 +37,7 @@ connection.onRequest('shrimp/bytecode', handleBytecode)
 // Start listening
 connection.listen()
 // ============================================================================
 // Handler implementations
 // ============================================================================
 function handleInitialize(): InitializeResult {
  connection.console.log('🦐 Server initialized with capabilities')
  const result: InitializeResult = {
@ -56,21 +59,40 @@ function handleInitialize(): InitializeResult {
  return result
 }
 function handleDocumentOpen(event: TextDocumentChangeEvent<TextDocument>) {
  const document = event.document
  const tree = parser.parse(document.getText())
  documentTrees.set(document.uri, tree)
 }
 function handleSemanticTokens(params: any) {
  const document = documents.get(params.textDocument.uri)
  if (!document) return { data: [] }
-  const data = buildSemanticTokens(document)
+  const tree = documentTrees.get(params.textDocument.uri)
  if (!tree) return { data: [] }
  const data = buildSemanticTokens(document, tree)
  return { data }
 }
-function handleDocumentChange(change: any) {
+function handleDocumentChange(change: TextDocumentChangeEvent<TextDocument>) {
-  const textDocument = change.document
+  const document = change.document
-  const diagnostics = buildDiagnostics(textDocument)
+
-  connection.sendDiagnostics({ uri: textDocument.uri, diagnostics })
+  // Parse and cache
  const tree = parser.parse(document.getText())
  documentTrees.set(document.uri, tree)
  // Build diagnostics using cached tree
  const diagnostics = buildDiagnostics(document, tree)
  connection.sendDiagnostics({ uri: document.uri, diagnostics })
 }
-function handleCompletion(params: any) {
+function handleDocumentClose(event: TextDocumentChangeEvent<TextDocument>) {
  documentTrees.delete(event.document.uri)
 }
 function handleCompletion() {
  const keywords = ['if', 'else', 'do', 'end', 'and', 'or', 'true', 'false', 'null']
  return keywords.map((keyword) => ({
@ -84,8 +106,13 @@ function handleParseTree(params: { uri: string }) {
  const document = documents.get(params.uri)
  if (!document) return 'Document not found'
  const tree = documentTrees.get(params.uri)
  if (!tree) {
    connection.console.error(`🦐 No cached tree for ${params.uri}`)
    return 'No cached parse tree available'
  }
  const text = document.getText()
  const tree = parser.parse(text)
  const cursor = tree.cursor()
  let formatted = ''
Author	SHA1	Message	Date
Corey Johnson	6505031da8	cache it	2025-11-06 13:32:41 -08:00
Corey Johnson	b9a0f6d485	light cleanup	2025-11-06 13:32:41 -08:00
Corey Johnson	061452a334	Better names	2025-11-06 10:22:37 -08:00
Chris Wanstrath	4494cbce91	Revert "use string.quoted for strings (for now) (until defunkt gets a new vscode theme)" This reverts commit `47c3fda4c8`.	2025-11-06 10:22:37 -08:00
Chris Wanstrath	47d1ea1a0b	use string.quoted for strings (for now) (until defunkt gets a new vscode theme)	2025-11-06 10:22:37 -08:00
Corey Johnson	ab12212df2	Make the extension know about the prelude	2025-11-06 09:23:18 -08:00