new parser(-ish)

2025-11-12 21:46:46 -08:00 · 2025-11-12 21:46:46 -08:00 · abd78108c8
commit abd78108c8
parent ae46988219
14 changed files with 1802 additions and 179 deletions
--- a/src/parser/node.ts
+++ b/src/parser/node.ts
@ -0,0 +1,221 @@
 import { type Token, TokenType } from "./tokenizer2"
 export type NodeType =
  | 'Program'
  | 'Block'
  | 'FunctionCall'
  | 'FunctionCallOrIdentifier'
  | 'FunctionCallWithBlock'
  | 'PositionalArg'
  | 'NamedArg'
  | 'FunctionDef'
  | 'Params'
  | 'NamedParam'
  | 'Null'
  | 'Boolean'
  | 'Number'
  | 'String'
  | 'StringFragment'
  | 'CurlyString'
  | 'DoubleQuote'
  | 'EscapeSeq'
  | 'Interpolation'
  | 'Regex'
  | 'Identifier'
  | 'AssignableIdentifier'
  | 'IdentifierBeforeDot'
  | 'Word'
  | 'Array'
  | 'Dict'
  | 'Comment'
  | 'BinOp'
  | 'ConditionalOp'
  | 'ParenExpr'
  | 'Assign'
  | 'CompoundAssign'
  | 'DotGet'
  | 'PipeExpr'
  | 'IfExpr'
  | 'ElseIfExpr'
  | 'ElseExpr'
  | 'WhileExpr'
  | 'TryExpr'
  | 'CatchExpr'
  | 'FinallyExpr'
  | 'Throw'
  | 'Eq'
  | 'Modulo'
  | 'Plus'
  | 'Star'
  | 'Slash'
  | 'Import'
  | 'Do'
  | 'colon'
  | 'keyword'
  | 'operator'
 // TODO: remove this when we switch from lezer
 export const operators: Record<string, any> = {
  // Logic
  'and': 'And',
  'or': 'Or',
  // Bitwise
  'band': 'Band',
  'bor': 'Bor',
  'bxor': 'Bxor',
  '>>>': 'Ushr',
  '>>': 'Shr',
  '<<': 'Shl',
  // Comparison
  '>=': 'Gte',
  '<=': 'Lte',
  '>': 'Gt',
  '<': 'Lt',
  '!=': 'Neq',
  '==': 'EqEq',
  // Compound assignment operators
  '??=': 'NullishEq',
  '+=': 'PlusEq',
  '-=': 'MinusEq',
  '*=': 'StarEq',
  '/=': 'SlashEq',
  '%=': 'ModuloEq',
  // Nullish coalescing
  '??': 'NullishCoalesce',
  // Math
  '*': 'Star',
  '**': 'StarStar',
  '=': 'Eq',
  '/': 'Slash',
  '+': 'Plus',
  '-': 'Minus',
  '%': 'Modulo',
  // Dotget
  '.': 'Dot',
  // Pipe
  '|': 'operator',
 }
 export class SyntaxNode {
  type: NodeType
  from: number
  to: number
  parent: SyntaxNode | null
  children: SyntaxNode[] = []
  constructor(type: NodeType, from: number, to: number, parent: SyntaxNode | null = null) {
    this.type = type
    this.from = from
    this.to = to
    this.parent = parent
  }
  static from(token: Token, parent?: SyntaxNode): SyntaxNode {
    return new SyntaxNode(TokenType[token.type] as NodeType, token.from, token.to, parent ?? null)
  }
  get name(): string {
    return this.type
  }
  get isError(): boolean {
    return false
  }
  get firstChild(): SyntaxNode | null {
    return this.children[0] ?? null
  }
  get lastChild(): SyntaxNode | null {
    return this.children.at(-1) ?? null
  }
  get nextSibling(): SyntaxNode | null {
    if (!this.parent) return null
    const siblings = this.parent.children
    const index = siblings.indexOf(this)
    return index >= 0 && index < siblings.length - 1 ? siblings[index + 1]! : null
  }
  get prevSibling(): SyntaxNode | null {
    if (!this.parent) return null
    const siblings = this.parent.children
    const index = siblings.indexOf(this)
    return index > 0 ? siblings[index - 1]! : null
  }
  add(node: SyntaxNode) {
    node.parent = this
    this.children.push(node)
  }
  push(...nodes: SyntaxNode[]): SyntaxNode {
    nodes.forEach(child => child.parent = this)
    this.children.push(...nodes)
    return this
  }
  toString(): string {
    return this.type
  }
 }
 // Operator precedence (binding power) - higher = tighter binding
 export const precedence: Record<string, number> = {
  // Logical
  'or': 10,
  'and': 20,
  // Comparison
  '==': 30,
  '!=': 30,
  '<': 30,
  '>': 30,
  '<=': 30,
  '>=': 30,
  // Nullish coalescing
  '??': 35,
  // Addition/Subtraction
  '+': 40,
  '-': 40,
  // Multiplication/Division/Modulo
  '*': 50,
  '/': 50,
  '%': 50,
  // Bitwise
  'band': 45,
  'bor': 45,
  'bxor': 45,
  '<<': 45,
  '>>': 45,
  '>>>': 45,
  // Exponentiation (right-associative)
  '**': 60,
 }
 export const conditionals = new Set([
  '==', '!=', '<', '>', '<=', '>=', '??', 'and', 'or'
 ])
 export const compounds = [
  '??=', '+=', '-=', '*=', '/=', '%='
 ]
--- a/src/parser/parser2.ts
+++ b/src/parser/parser2.ts
@ -0,0 +1,899 @@
 import { Scanner, type Token, TokenType } from './tokenizer2'
 import { SyntaxNode, operators, precedence, conditionals, compounds } from './node'
 import { globals } from './tokenizer'
 import { parseString } from './stringParser'
 const $T = TokenType
 export const parse = (input: string): SyntaxNode => {
  const parser = new Parser()
  return parser.parse(input)
 }
 class Scope {
  parent?: Scope
  set = new Set<string>()
  constructor(parent?: Scope) {
    this.parent = parent
    // no parent means this is global scope
    if (!parent) for (const name of globals) this.add(name)
  }
  add(key: string) {
    this.set.add(key)
  }
  has(key: string): boolean {
    return this.set.has(key) || this.parent?.has(key) || false
  }
 }
 export class Parser {
  tokens: Token[] = []
  pos = 0
  inParens = 0
  input = ''
  scope = new Scope
  inTestExpr = false
  parse(input: string): SyntaxNode {
    const scanner = new Scanner()
    this.tokens = scanner.tokenize(input)
    this.pos = 0
    this.input = input
    this.scope = new Scope()
    this.inTestExpr = false
    const node = new SyntaxNode('Program', 0, input.length)
    while (!this.isEOF()) {
      if (this.is($T.Newline) || this.is($T.Semicolon)) {
        this.next()
        continue
      }
      const prevPos = this.pos
      const stmt = this.statement()
      if (stmt) node.add(stmt)
      if (this.pos === prevPos && !this.isEOF())
        throw "parser didn't advance - you need to call next()\n\n       ${this.input}\n"
    }
    return node
  }
  // 
  // parse foundation nodes - statements, expressions
  //
  // statement is a line of code
  statement(): SyntaxNode | null {
    if (this.is($T.Comment))
      return this.comment()
    while (this.is($T.Newline) || this.is($T.Semicolon))
      this.next()
    if (this.isEOF() || this.isExprEndKeyword())
      return null
    return this.expression()
  }
  // expressions can be found in four places:
  // 1. line of code
  // 2. right side of assignment
  // 3. if/while conditions
  // 4. inside (parens)
  expression(allowPipe = true): SyntaxNode {
    let expr
    // x = value
    if (this.is($T.Identifier) && (
      this.nextIs($T.Operator, '=') || compounds.some(x => this.nextIs($T.Operator, x))
    ))
      expr = this.assign()
    // if, while, do, etc
    else if (this.is($T.Keyword))
      expr = this.keywords()
    // dotget
    else if (this.nextIs($T.Operator, '.'))
      expr = this.dotGetFunctionCall()
    // echo hello world
    else if (this.is($T.Identifier) && !this.nextIs($T.Operator) && !this.nextIsExprEnd())
      expr = this.functionCall()
    // bare-function-call
    else if (this.is($T.Identifier) && this.nextIsExprEnd())
      expr = this.functionCallOrIdentifier()
    // everything else
    else
      expr = this.exprWithPrecedence()
    // check for destructuring
    if (expr.type === 'Array' && this.is($T.Operator, '='))
      return this.destructure(expr)
    // check for parens function call
    // ex: (ref my-func) my-arg
    if (expr.type === 'ParenExpr' && !this.isExprEnd())
      expr = this.functionCall(expr)
    // one | echo
    if (allowPipe && this.isPipe())
      return this.pipe(expr)
    // regular
    else
      return expr
  }
  // piping | stuff | is | cool
  pipe(left: SyntaxNode): SyntaxNode {
    const canLookPastNewlines = this.inParens === 0
    const parts: SyntaxNode[] = [left]
    while (this.isPipe()) {
      // consume newlines before pipe (only if not in parens)
      if (canLookPastNewlines) {
        while (this.is($T.Newline)) this.next()
      }
      const pipeOp = this.op('|')
      pipeOp.type = 'operator'
      parts.push(pipeOp)
      // consume newlines after pipe (only if not in parens)
      if (canLookPastNewlines) {
        while (this.is($T.Newline)) this.next()
      }
      // parse right side - don't allow nested pipes
      parts.push(this.expression(false))
    }
    const node = new SyntaxNode('PipeExpr', parts[0]!.from, parts.at(-1)!.to)
    return node.push(...parts)
  }
  // Pratt parser - parses expressions with precedence climbing
  // bp = binding precedence
  exprWithPrecedence(minBp = 0): SyntaxNode {
    let left = this.value()
    // infix operators with precedence
    while (this.is($T.Operator)) {
      const op = this.current().value!
      const bp = precedence[op]
      // operator has lower precedence than required, stop
      if (bp === undefined || bp < minBp) break
      const opNode = this.op()
      // right-associative operators (like **) use same bp, others use bp + 1
      const nextMinBp = op === '**' ? bp : bp + 1
      // parse right-hand side with higher precedence
      const right = this.exprWithPrecedence(nextMinBp)
      const nodeType = conditionals.has(op) ? 'ConditionalOp' : 'BinOp'
      const node = new SyntaxNode(nodeType, left.from, right.to)
      node.push(left, opNode, right)
      left = node
    }
    return left
  }
  // if, while, do, etc
  keywords(): SyntaxNode {
    if (this.is($T.Keyword, 'if'))
      return this.if()
    if (this.is($T.Keyword, 'while'))
      return this.while()
    if (this.is($T.Keyword, 'do'))
      return this.do()
    if (this.is($T.Keyword, 'try'))
      return this.try()
    if (this.is($T.Keyword, 'throw'))
      return this.throw()
    if (this.is($T.Keyword, 'import'))
      return this.import()
    return this.expect($T.Keyword, 'if/while/do/import') as never
  }
  // value can be an atom or a (parens that gets turned into an atom)
  // values are used in a few places:
  // 1. function arguments
  // 2. array/dict members
  // 3. binary operations
  // 4. anywhere an expression can be used
  value(): SyntaxNode {
    if (this.is($T.OpenParen))
      return this.parens()
    if (this.is($T.OpenBracket))
      return this.arrayOrDict()
    // dotget
    if (this.nextIs($T.Operator, '.'))
      return this.dotGet()
    return this.atom()
  }
  //
  // parse specific nodes
  //
  // [ 1 2 3 ]
  array(): SyntaxNode {
    const open = this.expect($T.OpenBracket)
    const values = []
    while (!this.is($T.CloseBracket) && !this.isEOF()) {
      if (this.is($T.Semicolon) || this.is($T.Newline)) {
        this.next()
        continue
      }
      if (this.is($T.Comment)) {
        values.push(this.comment())
        continue
      }
      values.push(this.value())
    }
    const close = this.expect($T.CloseBracket)
    const node = new SyntaxNode('Array', open.from, close.to)
    return node.push(...values)
  }
  // which are we dealing with? ignores leading newlines and comments
  arrayOrDict(): SyntaxNode {
    let peek = 1
    let curr = this.peek(peek++)
    let isDict = false
    while (curr && curr.type !== $T.CloseBracket) {
      // definitely a dict
      if (curr.type === $T.NamedArgPrefix) {
        isDict = true
        break
      }
      // empty dict
      if (curr.type === $T.Operator && curr.value === '=') {
        isDict = true
        break
      }
      // probably an array
      if (curr.type !== $T.Comment && curr.type !== $T.Semicolon && curr.type !== $T.Newline)
        break
      curr = this.peek(peek++)
    }
    return isDict ? this.dict() : this.array()
  }
  // x = true
  assign(): SyntaxNode {
    const ident = this.assignableIdentifier()
    const opToken = this.current()!
    const op = this.op()
    const expr = this.expression()
    const node = new SyntaxNode(
      opToken.value === '=' ? 'Assign' : 'CompoundAssign',
      ident.from,
      expr.to
    )
    return node.push(ident, op, expr)
  }
  // identifier used in assignment (TODO: legacy lezer quirk)
  assignableIdentifier(): SyntaxNode {
    const token = this.expect($T.Identifier)
    this.scope.add(token.value!)
    const node = SyntaxNode.from(token)
    node.type = 'AssignableIdentifier'
    return node
  }
  // atoms are the basic building blocks: literals, identifiers, words
  atom() {
    if (this.is($T.String))
      return this.string()
    if (this.isAny($T.Null, $T.Boolean, $T.Number, $T.Identifier, $T.Word, $T.Regex))
      return SyntaxNode.from(this.next())
    const next = this.next()
    throw `[atom] unexpected token ${TokenType[next.type]}: ${JSON.stringify(next)}\n\n       ${this.input}\n`
  }
  // blocks in if, do, special calls, etc
  // `: something end` 
  //
  // `blockNode` determines whether we return [colon, BlockNode, end] or
  // just a list of statements like [colon, stmt1, stmt2, end]
  block(blockNode = true): SyntaxNode[] {
    const stmts: SyntaxNode[] = []
    const colon = this.colon()
    while (!this.isExprEndKeyword() && !this.isEOF()) {
      const stmt = this.statement()
      if (stmt) stmts.push(stmt)
    }
    const out = [colon]
    if (blockNode) {
      const block = new SyntaxNode('Block', stmts[0]!.from, stmts.at(-1)!.to)
      block.push(...stmts)
      out.push(block)
    } else {
      out.push(...stmts)
    }
    return out
  }
  // catch err: block
  catch(): SyntaxNode {
    const keyword = this.keyword('catch')
    let catchVar
    if (this.is($T.Identifier))
      catchVar = this.identifier()
    const block = this.block()
    const node = new SyntaxNode('CatchExpr', keyword.from, block.at(-1)!.to)
    node.push(keyword)
    if (catchVar) node.push(catchVar)
    return node.push(...block)
  }
  // colon
  colon(): SyntaxNode {
    const colon = SyntaxNode.from(this.expect($T.Colon))
    colon.type = 'colon' // TODO lezer legacy
    return colon
  }
  // # comment
  comment(): SyntaxNode {
    return SyntaxNode.from(this.expect($T.Comment))
  }
  // [ a b c ] = [ 1 2 3 ]
  destructure(array: SyntaxNode): SyntaxNode {
    const eq = this.op('=')
    const val = this.expression()
    for (const ident of array.children) {
      const varName = this.input.slice(ident.from, ident.to)
      this.scope.add(varName)
    }
    const node = new SyntaxNode('Assign', array.from, val.to)
    return node.push(array, eq, val)
  }
  // [ a=1 b=true c='three' ]
  dict(): SyntaxNode {
    const open = this.expect($T.OpenBracket)
    // empty dict [=] or [ = ]
    if (this.is($T.Operator, '=') && this.nextIs($T.CloseBracket)) {
      const _op = this.next()
      const close = this.next()
      return new SyntaxNode('Dict', open.from, close.to)
    }
    const values = []
    while (!this.is($T.CloseBracket) && !this.isEOF()) {
      if (this.is($T.Semicolon) || this.is($T.Newline)) {
        this.next()
        continue
      }
      if (this.is($T.Comment)) {
        values.push(this.comment())
        continue
      }
      if (this.is($T.NamedArgPrefix))
        values.push(this.namedArg())
      else
        values.push(this.value())
    }
    const close = this.expect($T.CloseBracket)
    const node = new SyntaxNode('Dict', open.from, close.to)
    return node.push(...values)
  }
  // FunctionDef `do x y: something end`
  do(): SyntaxNode {
    const doNode = this.keyword('do')
    doNode.type = 'Do'
    this.scope = new Scope(this.scope)
    const params = []
    while (!this.is($T.Colon) && !this.isExprEnd()) {
      let varName = this.current().value!
      if (varName.endsWith('=')) varName = varName.slice(0, varName.length - 1)
      this.scope.add(varName)
      let arg
      if (this.is($T.Identifier))
        arg = this.identifier()
      else if (this.is($T.NamedArgPrefix))
        arg = this.namedParam()
      else
        throw `[do] expected Identifier or NamedArgPrefix, got ${JSON.stringify(this.current())}\n\n       ${this.input}\n`
      params.push(arg)
    }
    const block = this.block(false)
    let catchNode, finalNode
    if (this.is($T.Keyword, 'catch'))
      catchNode = this.catch()
    if (this.is($T.Keyword, 'finally'))
      finalNode = this.finally()
    let end = this.keyword('end')
    let last = block.at(-1)
    if (finalNode) last = finalNode.children.at(-1)!
    else if (catchNode) last = catchNode.children.at(-1)!
    const node = new SyntaxNode('FunctionDef', doNode.from, last!.to)
    node.add(doNode)
    const paramsNode = new SyntaxNode(
      'Params',
      params[0]?.from ?? 0,
      params.at(-1)?.to ?? 0
    )
    if (params.length) paramsNode.push(...params)
    node.add(paramsNode)
    this.scope = this.scope.parent!
    node.push(...block)
    if (catchNode) node.push(catchNode)
    if (finalNode) node.push(finalNode)
    return node.push(end)
  }
  // config.path
  dotGet(): SyntaxNode {
    const left = this.identifier()
    const ident = this.input.slice(left.from, left.to)
    // not in scope, just return Word
    if (!this.scope.has(ident))
      return this.word(left)
    if (left.type === 'Identifier') left.type = 'IdentifierBeforeDot'
    let parts = []
    while (this.is($T.Operator, '.')) {
      this.next()
      parts.push(this.is($T.OpenParen) ? this.parens() : this.atom())
    }
    // TODO lezer legacy - we can do a flat DotGet if we remove this
    const nodes = parts.length > 1 ? collapseDotGets(parts) : undefined
    const node = new SyntaxNode('DotGet', left.from, parts.at(-1)!.to)
    return nodes ? node.push(left, nodes!) : node.push(left, ...parts)
  }
  // dotget in a statement/expression (something.blah) or (something.blah arg1)
  dotGetFunctionCall(): SyntaxNode {
    const dotGet = this.dotGet()
    // dotget not in scope, regular Word
    if (dotGet.type === 'Word') return dotGet
    if (this.isExprEnd())
      return this.functionCallOrIdentifier(dotGet)
    else
      return this.functionCall(dotGet)
  }
  // can be used in functions or try block
  finally(): SyntaxNode {
    const keyword = this.keyword('finally')
    const block = this.block()
    const node = new SyntaxNode('FinallyExpr', keyword.from, block.at(-1)!.to)
    return node.push(keyword, ...block)
  }
  // you're lookin at it
  functionCall(fn?: SyntaxNode): SyntaxNode {
    const ident = fn ?? this.identifier()
    const args: SyntaxNode[] = []
    while (!this.isExprEnd() && !this.is($T.Operator, '|')) {
      if (this.is($T.NamedArgPrefix)) {
        args.push(this.namedArg())
      } else {
        // 'do' is the only keyword allowed as a function argument
        const val = this.is($T.Keyword, 'do') ? this.do() : this.value()
        const arg = new SyntaxNode('PositionalArg', val.from, val.to)
        arg.add(val)
        args.push(arg)
      }
    }
    const node = new SyntaxNode('FunctionCall', ident.from, (args.at(-1) || ident).to)
    node.push(ident, ...args)
    if (!this.inTestExpr && this.is($T.Colon)) {
      const block = this.block()
      const end = this.keyword('end')
      const blockNode = new SyntaxNode('FunctionCallWithBlock', node.from, end.to)
      return blockNode.push(node, ...block, end)
    }
    return node
  }
  // bare identifier in an expression
  functionCallOrIdentifier(inner?: SyntaxNode) {
    if (!inner && this.nextIs($T.Operator, '.')) {
      inner = this.dotGet()
      // if the dotGet was just a Word, bail
      if (inner.type === 'Word') return inner
    }
    inner ??= this.identifier()
    const wrapper = new SyntaxNode('FunctionCallOrIdentifier', inner.from, inner.to)
    wrapper.push(inner)
    if (!this.inTestExpr && this.is($T.Colon)) {
      const block = this.block()
      const end = this.keyword('end')
      const node = new SyntaxNode('FunctionCallWithBlock', wrapper.from, end.to)
      return node.push(wrapper, ...block, end)
    }
    return wrapper
  }
  // function and variable names
  identifier(): SyntaxNode {
    return SyntaxNode.from(this.expect($T.Identifier))
  }
  // if something: blah end
  // if something: blah else: blah end
  // if something: blah else if something: blah else: blah end
  if(): SyntaxNode {
    const ifNode = this.keyword('if')
    const test = this.testExpr()
    const ifBlock = this.block()
    const node = new SyntaxNode('IfExpr', ifNode.from, ifBlock.at(-1)!.to)
    node.push(ifNode, test)
    node.push(...ifBlock)
    while (this.is($T.Keyword, 'else') && this.nextIs($T.Keyword, 'if')) {
      const elseWord = this.keyword('else')
      const ifWord = this.keyword('if')
      const elseIfTest = this.testExpr()
      const elseIfBlock = this.block()
      const elseIfNode = new SyntaxNode('ElseIfExpr', ifBlock.at(-1)!.from, elseIfBlock.at(-1)!.to)
      elseIfNode.push(elseWord, ifWord, elseIfTest)
      elseIfNode.push(...elseIfBlock)
      node.push(elseIfNode)
    }
    if (this.is($T.Keyword, 'else') && this.nextIs($T.Colon)) {
      const elseWord = this.keyword('else')
      const elseBlock = this.block()
      const elseNode = new SyntaxNode('ElseExpr', ifBlock.at(-1)!.from, elseBlock.at(-1)!.to)
      elseNode.push(elseWord)
      elseNode.push(...elseBlock)
      node.push(elseNode)
    }
    return node.push(this.keyword('end'))
  }
  import(): SyntaxNode {
    const keyword = this.keyword('import')
    const args: SyntaxNode[] = []
    while (!this.isExprEnd()) {
      if (this.is($T.NamedArgPrefix)) {
        const prefix = SyntaxNode.from(this.next())
        const val = this.value()
        const arg = new SyntaxNode('NamedArg', prefix.from, val.to)
        arg.push(prefix, val)
        args.push(arg)
      } else {
        args.push(this.identifier())
      }
    }
    const node = new SyntaxNode('Import', keyword.from, args.at(-1)!.to)
    node.add(keyword)
    return node.push(...args)
  }
  // if, while, do, etc
  keyword(name: string): SyntaxNode {
    const node = SyntaxNode.from(this.expect($T.Keyword, name))
    node.type = 'keyword' // TODO lezer legacy
    return node
  }
  // abc= true
  namedArg(): SyntaxNode {
    const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
    const val = this.value()
    const node = new SyntaxNode('NamedArg', prefix.from, val.to)
    return node.push(prefix, val)
  }
  // abc= null|true|123|'hi'
  namedParam(): SyntaxNode {
    const prefix = SyntaxNode.from(this.expect($T.NamedArgPrefix))
    const val = this.value()
    if (!['Null', 'Boolean', 'Number', 'String'].includes(val.type))
      throw `[namedParam] default value must be Null|Bool|Num|Str, got ${val.type}\n\n       ${this.input}\n`
    const node = new SyntaxNode('NamedParam', prefix.from, val.to)
    return node.push(prefix, val)
  }
  // operators like + - =
  op(op?: string): SyntaxNode {
    const token = op ? this.expect($T.Operator, op) : this.expect($T.Operator)
    const name = operators[token.value!]
    if (!name) throw `[op] operator not registered: ${token.value!}\n\n       ${this.input}\n`
    return new SyntaxNode(name, token.from, token.to)
  }
  // ( expressions in parens )
  parens(): SyntaxNode {
    this.inParens++
    const open = this.expect($T.OpenParen)
    const child = this.expression()
    const close = this.expect($T.CloseParen)
    this.inParens--
    const node = new SyntaxNode('ParenExpr', open.from, close.to)
    node.add(child)
    return node
  }
  // 'hell yes' "hell no" { hell if i know }
  string(): SyntaxNode {
    const token = this.expect($T.String)
    return parseString(this.input, token.from, token.to, this)
  }
  // if TEST: blah end
  testExpr(): SyntaxNode {
    this.inTestExpr = true
    const expr = this.expression()
    this.inTestExpr = false
    return expr
  }
  // throw blah
  throw(): SyntaxNode {
    const keyword = this.keyword('throw')
    const val = this.value()
    const node = new SyntaxNode('Throw', keyword.from, val.to)
    return node.push(keyword, val)
  }
  // try: blah catch e: blah end
  try(): SyntaxNode {
    const tryNode = this.keyword('try')
    const tryBlock = this.block()
    let last = tryBlock.at(-1)
    let catchNode, finalNode
    if (this.is($T.Keyword, 'catch'))
      catchNode = this.catch()
    if (this.is($T.Keyword, 'finally'))
      finalNode = this.finally()
    const end = this.keyword('end')
    if (finalNode) last = finalNode.children.at(-1)
    else if (catchNode) last = catchNode.children.at(-1)
    const node = new SyntaxNode('TryExpr', tryNode.from, last!.to)
    node.push(tryNode, ...tryBlock)
    if (catchNode)
      node.push(catchNode)
    if (finalNode)
      node.push(finalNode)
    return node.push(end)
  }
  // while test: blah end
  while(): SyntaxNode {
    const keyword = this.keyword('while')
    const test = this.testExpr()
    const block = this.block()
    const end = this.keyword('end')
    const node = new SyntaxNode('WhileExpr', keyword.from, end.to)
    return node.push(keyword, test, ...block, end)
  }
  // readme.txt (when `readme` isn't in scope)
  word(start?: SyntaxNode): SyntaxNode {
    const parts = [start ?? this.expect($T.Word)]
    while (this.is($T.Operator, '.')) {
      this.next()
      if (this.isAny($T.Word, $T.Identifier, $T.Number))
        parts.push(this.next())
    }
    return new SyntaxNode('Word', parts[0]!.from, parts.at(-1)!.to)
  }
  // 
  // helpers
  //
  current(): Token {
    return this.tokens[this.pos] || { type: TokenType.Newline, from: 0, to: 0 }
  }
  peek(offset = 1): Token | undefined {
    return this.tokens[this.pos + offset]
  }
  // look past newlines to check for a specific token
  peekPastNewlines(type: TokenType, value?: string): boolean {
    let offset = 1
    let peek = this.peek(offset)
    while (peek && peek.type === $T.Newline)
      peek = this.peek(++offset)
    if (!peek || peek.type !== type) return false
    if (value !== undefined && peek.value !== value) return false
    return true
  }
  next(): Token {
    const token = this.current()
    this.pos++
    return token
  }
  is(type: TokenType, value?: string): boolean {
    const token = this.current()
    if (!token || token.type !== type) return false
    if (value !== undefined && token.value !== value) return false
    return true
  }
  isAny(...type: TokenType[]): boolean {
    return type.some(x => this.is(x))
  }
  nextIs(type: TokenType, value?: string): boolean {
    const token = this.peek()
    if (!token || token.type !== type) return false
    if (value !== undefined && token.value !== value) return false
    return true
  }
  nextIsAny(...type: TokenType[]): boolean {
    return type.some(x => this.nextIs(x))
  }
  isExprEnd(): boolean {
    return this.isAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseParen, $T.CloseBracket) ||
      this.isExprEndKeyword() || !this.current()
  }
  nextIsExprEnd(): boolean {
    // pipes act like expression end for function arg parsing
    if (this.nextIs($T.Operator, '|'))
      return true
    return this.nextIsAny($T.Colon, $T.Semicolon, $T.Newline, $T.CloseBracket, $T.CloseParen) ||
      this.nextIs($T.Keyword, 'end') || this.nextIs($T.Keyword, 'else') ||
      this.nextIs($T.Keyword, 'catch') || this.nextIs($T.Keyword, 'finally') ||
      !this.peek()
  }
  isExprEndKeyword(): boolean {
    return this.is($T.Keyword, 'end') || this.is($T.Keyword, 'else') ||
      this.is($T.Keyword, 'catch') || this.is($T.Keyword, 'finally')
  }
  isPipe(): boolean {
    // inside parens, only look for pipes on same line (don't look past newlines)
    const canLookPastNewlines = this.inParens === 0
    return this.is($T.Operator, '|') ||
      (canLookPastNewlines && this.peekPastNewlines($T.Operator, '|'))
  }
  expect(type: TokenType, value?: string): Token | never {
    if (!this.is(type, value)) {
      const token = this.current()
      throw `expected ${TokenType[type]}${value ? ` "${value}"` : ''}, got ${TokenType[token?.type || 0]}${token?.value ? ` "${token.value}"` : ''} at position ${this.pos}\n\n       ${this.input}\n`
    }
    return this.next()
  }
  isEOF(): boolean {
    return this.pos >= this.tokens.length
  }
 }
 // TODO lezer legacy
 function collapseDotGets(origNodes: SyntaxNode[]): SyntaxNode {
  const nodes = [...origNodes]
  let right = nodes.pop()!
  while (nodes.length > 0) {
    const left = nodes.pop()!
    if (left.type === 'Identifier') left.type = 'IdentifierBeforeDot'
    const dot = new SyntaxNode("DotGet", left.from, right.to);
    dot.push(left, right)
    right = dot
  }
  return right
 }
--- a/src/parser/stringParser.ts
+++ b/src/parser/stringParser.ts
@ -0,0 +1,258 @@
 import { SyntaxNode } from './node'
 /**
 * Parse string contents into fragments, interpolations, and escape sequences.
 *
 * Input: full string including quotes, e.g. "'hello $name'"
 * Output: SyntaxNode tree with StringFragment, Interpolation, EscapeSeq children
 */
 export const parseString = (input: string, from: number, to: number, parser: any): SyntaxNode => {
  const stringNode = new SyntaxNode('String', from, to)
  const content = input.slice(from, to)
  // Determine string type
  const firstChar = content[0]
  // Double-quoted strings: no interpolation or escapes
  if (firstChar === '"') {
    const fragment = new SyntaxNode('DoubleQuote', from, to)
    stringNode.add(fragment)
    return stringNode
  }
  // Curly strings: interpolation but no escapes
  if (firstChar === '{') {
    parseCurlyString(stringNode, input, from, to, parser)
    return stringNode
  }
  // Single-quoted strings: interpolation and escapes
  if (firstChar === "'") {
    parseSingleQuoteString(stringNode, input, from, to, parser)
    return stringNode
  }
  throw `Unknown string type starting with: ${firstChar}`
 }
 /**
 * Parse single-quoted string: 'hello $name\n'
 * Supports: interpolation ($var, $(expr)), escape sequences (\n, \$, etc)
 */
 const parseSingleQuoteString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
  let pos = from + 1 // Skip opening '
  let fragmentStart = pos
  while (pos < to - 1) { // -1 to skip closing '
    const char = input[pos]
    // Escape sequence
    if (char === '\\' && pos + 1 < to - 1) {
      // Push accumulated fragment
      if (pos > fragmentStart) {
        const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
        stringNode.add(frag)
      }
      // Add escape sequence node
      const escNode = new SyntaxNode('EscapeSeq', pos, pos + 2)
      stringNode.add(escNode)
      pos += 2
      fragmentStart = pos
      continue
    }
    // Interpolation
    if (char === '$') {
      // Push accumulated fragment
      if (pos > fragmentStart) {
        const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
        stringNode.add(frag)
      }
      pos++ // Skip $
      // Parse interpolation content
      if (input[pos] === '(') {
        // Expression interpolation: $(expr)
        const interpStart = pos - 1 // Include the $
        const exprResult = parseInterpolationExpr(input, pos, parser)
        const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
        interpNode.add(exprResult.node)
        stringNode.add(interpNode)
        pos = exprResult.endPos
      } else {
        // Variable interpolation: $name
        const interpStart = pos - 1
        const identEnd = findIdentifierEnd(input, pos, to - 1)
        const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
        const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
        identNode.add(innerIdent)
        const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
        interpNode.add(identNode)
        stringNode.add(interpNode)
        pos = identEnd
      }
      fragmentStart = pos
      continue
    }
    pos++
  }
  // Push final fragment
  if (pos > fragmentStart && fragmentStart < to - 1) {
    const frag = new SyntaxNode('StringFragment', fragmentStart, pos)
    stringNode.add(frag)
  }
 }
 /**
 * Parse curly string: { hello $name }
 * Supports: interpolation ($var, $(expr)), nested braces
 * Does NOT support: escape sequences (raw content)
 */
 const parseCurlyString = (stringNode: SyntaxNode, input: string, from: number, to: number, parser: any) => {
  let pos = from + 1 // Skip opening {
  let fragmentStart = from // Include the opening { in the fragment
  let depth = 1
  while (pos < to && depth > 0) {
    const char = input[pos]
    // Track brace nesting
    if (char === '{') {
      depth++
      pos++
      continue
    }
    if (char === '}') {
      depth--
      if (depth === 0) {
        // Push final fragment including closing }
        const frag = new SyntaxNode('CurlyString', fragmentStart, pos + 1)
        stringNode.add(frag)
        break
      }
      pos++
      continue
    }
    // Interpolation
    if (char === '$') {
      // Push accumulated fragment
      if (pos > fragmentStart) {
        const frag = new SyntaxNode('CurlyString', fragmentStart, pos)
        stringNode.add(frag)
      }
      pos++ // Skip $
      // Parse interpolation content
      if (input[pos] === '(') {
        // Expression interpolation: $(expr)
        const interpStart = pos - 1
        const exprResult = parseInterpolationExpr(input, pos, parser)
        const interpNode = new SyntaxNode('Interpolation', interpStart, exprResult.endPos)
        interpNode.add(exprResult.node)
        stringNode.add(interpNode)
        pos = exprResult.endPos
      } else {
        // Variable interpolation: $name
        const interpStart = pos - 1
        const identEnd = findIdentifierEnd(input, pos, to)
        const identNode = new SyntaxNode('FunctionCallOrIdentifier', pos, identEnd)
        const innerIdent = new SyntaxNode('Identifier', pos, identEnd)
        identNode.add(innerIdent)
        const interpNode = new SyntaxNode('Interpolation', interpStart, identEnd)
        interpNode.add(identNode)
        stringNode.add(interpNode)
        pos = identEnd
      }
      fragmentStart = pos
      continue
    }
    pos++
  }
 }
 /**
 * Parse a parenthesized expression interpolation: $(a + b)
 * Returns the parsed expression node and the position after the closing )
 * pos is position of the opening ( in the full input string
 */
 const parseInterpolationExpr = (input: string, pos: number, parser: any): { node: SyntaxNode, endPos: number } => {
  // Find matching closing paren
  let depth = 1
  let start = pos
  let end = pos + 1 // Start after opening (
  while (end < input.length && depth > 0) {
    if (input[end] === '(') depth++
    if (input[end] === ')') {
      depth--
      if (depth === 0) break
    }
    end++
  }
  const exprContent = input.slice(start + 1, end) // Content between ( and )
  const closeParen = end
  end++ // Move past closing )
  // Use the main parser to parse the expression
  const exprNode = parser.parse(exprContent)
  // Get the first real node (skip Program wrapper)
  const innerNode = exprNode.firstChild || exprNode
  // Adjust node positions: they're relative to exprContent, need to offset to full input
  const offset = start + 1 // Position where exprContent starts in full input
  adjustNodePositions(innerNode, offset)
  // Wrap in ParenExpr - use positions in the full string
  const parenNode = new SyntaxNode('ParenExpr', start, closeParen + 1)
  parenNode.add(innerNode)
  return { node: parenNode, endPos: end }
 }
 /**
 * Recursively adjust all node positions by adding an offset
 */
 const adjustNodePositions = (node: SyntaxNode, offset: number) => {
  node.from += offset
  node.to += offset
  for (const child of node.children) {
    adjustNodePositions(child, offset)
  }
 }
 /**
 * Find the end position of an identifier starting at pos
 * Identifiers: lowercase letter or emoji, followed by letters/digits/dashes/emoji
 */
 const findIdentifierEnd = (input: string, pos: number, maxPos: number): number => {
  let end = pos
  while (end < maxPos) {
    const char = input[end]
    // Stop at non-identifier characters
    if (!/[a-z0-9\-?]/.test(char)) {
      break
    }
    end++
  }
  return end
 }
--- a/src/parser/tests/basics.test.ts
+++ b/src/parser/tests/basics.test.ts
@ -810,44 +810,6 @@ describe('Nullish coalescing operator', () => {
  })
 })
 describe('DotGet whitespace sensitivity', () => {
  test('no whitespace - DotGet works when identifier in scope', () => {
    expect('basename = 5; basename.prop').toMatchTree(`
      Assign
        AssignableIdentifier basename
        Eq =
        Number 5
      FunctionCallOrIdentifier
        DotGet
          IdentifierBeforeDot basename
          Identifier prop`)
  })
  test('space before dot - NOT DotGet, parses as division', () => {
    expect('basename = 5; basename / prop').toMatchTree(`
      Assign
        AssignableIdentifier basename
        Eq =
        Number 5
      BinOp
        Identifier basename
        Slash /
        Identifier prop`)
  })
  test('dot followed by slash is Word, not DotGet', () => {
    expect('basename ./cool').toMatchTree(`
      FunctionCall
        Identifier basename
        PositionalArg
          Word ./cool`)
  })
  test('identifier not in scope with dot becomes Word', () => {
    expect('readme.txt').toMatchTree(`Word readme.txt`)
  })
 })
 describe('Comments', () => {
  test('are greedy', () => {
    expect(`
@ -897,61 +859,6 @@ basename = 5 # very astute
  })
 })
 describe('Array destructuring', () => {
  test('parses array pattern with two variables', () => {
    expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
      Assign
        Array
          Identifier a
          Identifier b
        Eq =
        Array
          Number 1
          Number 2
          Number 3
          Number 4`)
  })
  test('parses array pattern with one variable', () => {
    expect('[ x ] = [ 42 ]').toMatchTree(`
      Assign
        Array
          Identifier x
        Eq =
        Array
          Number 42`)
  })
  test('parses array pattern with emoji identifiers', () => {
    expect('[ 🚀 💎 ] = [ 1 2 ]').toMatchTree(`
      Assign
        Array
          Identifier 🚀
          Identifier 💎
        Eq =
        Array
          Number 1
          Number 2`)
  })
  test('works with dotget', () => {
    expect('[ a ] = [ [1 2 3] ]; a.1').toMatchTree(`
      Assign
        Array
          Identifier a
        Eq =
        Array
          Array
            Number 1
            Number 2
            Number 3
      FunctionCallOrIdentifier
        DotGet
          IdentifierBeforeDot a
          Number 1`)
  })
 })
 describe('Conditional ops', () => {
  test('or can be chained', () => {
    expect(`
@ -1037,34 +944,3 @@ Assign
        `)
  })
 })
 describe('import', () => {
  test('parses single import', () => {
    expect(`import str`).toMatchTree(`
      Import
        keyword import
        Identifier str
      `)
  })
  test('parses multiple imports', () => {
    expect(`import str math list`).toMatchTree(`
      Import
        keyword import
        Identifier str
        Identifier math
        Identifier list
      `)
  })
  test('parses named args', () => {
    expect(`import str only=ends-with?`).toMatchTree(`
      Import
        keyword import
        Identifier str
        NamedArg
          NamedArgPrefix only=
          Identifier ends-with?
      `)
  })
 })
--- a/src/parser/tests/control-flow.test.ts
+++ b/src/parser/tests/control-flow.test.ts
@ -24,6 +24,7 @@ describe('if/else if/else', () => {
        Eq =
        IfExpr
          keyword if
          FunctionCallOrIdentifier
            Identifier x
          colon :
          Block
@ -59,6 +60,7 @@ describe('if/else if/else', () => {
    end`).toMatchTree(`
      IfExpr
        keyword if
        FunctionCallOrIdentifier
          Identifier with-else
        colon :
        Block
@ -82,6 +84,7 @@ describe('if/else if/else', () => {
    end`).toMatchTree(`
      IfExpr
        keyword if
        FunctionCallOrIdentifier
          Identifier with-else-if
        colon :
        Block
@ -90,6 +93,7 @@ describe('if/else if/else', () => {
        ElseIfExpr
          keyword else
          keyword if
          FunctionCallOrIdentifier
            Identifier another-condition
          colon :
          Block
@ -111,6 +115,7 @@ describe('if/else if/else', () => {
    end`).toMatchTree(`
      IfExpr
        keyword if
        FunctionCallOrIdentifier
          Identifier with-else-if-else
        colon :
        Block
@ -119,6 +124,7 @@ describe('if/else if/else', () => {
        ElseIfExpr
          keyword else
          keyword if
          FunctionCallOrIdentifier
            Identifier another-condition
          colon :
          Block
@ -127,6 +133,7 @@ describe('if/else if/else', () => {
        ElseIfExpr
          keyword else
          keyword if
          FunctionCallOrIdentifier
            Identifier yet-another-condition
          colon :
          Block
@ -173,7 +180,7 @@ describe('if/else if/else', () => {
    `)
  })
-  test('parses function calls in if tests', () => {
+  test("parses paren'd function calls in if tests", () => {
    expect(`if (var? 'abc'): true end`).toMatchTree(`
      IfExpr
        keyword if
@ -214,7 +221,7 @@ describe('if/else if/else', () => {
    `)
  })
-  test('parses function calls in else-if tests', () => {
+  test("parses paren'd function calls in else-if tests", () => {
    expect(`if false: true else if (var? 'abc'): true end`).toMatchTree(`
      IfExpr
        keyword if
--- a/src/parser/tests/destructuring.test.ts
+++ b/src/parser/tests/destructuring.test.ts
@ -0,0 +1,58 @@
 import { expect, describe, test } from 'bun:test'
 import '../shrimp.grammar' // Importing this so changes cause it to retest!
 describe('Array destructuring', () => {
  test('parses array pattern with two variables', () => {
    expect('[ a b ] = [ 1 2 3 4]').toMatchTree(`
      Assign
        Array
          Identifier a
          Identifier b
        Eq =
        Array
          Number 1
          Number 2
          Number 3
          Number 4`)
  })
  test('parses array pattern with one variable', () => {
    expect('[ x ] = [ 42 ]').toMatchTree(`
      Assign
        Array
          Identifier x
        Eq =
        Array
          Number 42`)
  })
  test('parses array pattern with emoji identifiers', () => {
    expect('[ 🚀 💎 ] = [ 1 2 ]').toMatchTree(`
      Assign
        Array
          Identifier 🚀
          Identifier 💎
        Eq =
        Array
          Number 1
          Number 2`)
  })
  test('works with dotget', () => {
    expect('[ a ] = [ [1 2 3] ]; a.1').toMatchTree(`
      Assign
        Array
          Identifier a
        Eq =
        Array
          Array
            Number 1
            Number 2
            Number 3
      FunctionCallOrIdentifier
        DotGet
          IdentifierBeforeDot a
          Number 1`)
  })
 })
--- a/src/parser/tests/dot-get.test.ts
+++ b/src/parser/tests/dot-get.test.ts
@ -1,6 +1,44 @@
 import { describe, test, expect } from 'bun:test'
 import '../../testSetup'
 describe('DotGet whitespace sensitivity', () => {
  test('no whitespace - DotGet works when identifier in scope', () => {
    expect('basename = 5; basename.prop').toMatchTree(`
      Assign
        AssignableIdentifier basename
        Eq =
        Number 5
      FunctionCallOrIdentifier
        DotGet
          IdentifierBeforeDot basename
          Identifier prop`)
  })
  test('space before dot - NOT DotGet, parses as division', () => {
    expect('basename = 5; basename / prop').toMatchTree(`
      Assign
        AssignableIdentifier basename
        Eq =
        Number 5
      BinOp
        Identifier basename
        Slash /
        Identifier prop`)
  })
  test('dot followed by slash is Word, not DotGet', () => {
    expect('basename ./cool').toMatchTree(`
      FunctionCall
        Identifier basename
        PositionalArg
          Word ./cool`)
  })
  test('identifier not in scope with dot becomes Word', () => {
    expect('readme.txt').toMatchTree(`Word readme.txt`)
  })
 })
 describe('DotGet', () => {
  test('readme.txt is Word when readme not in scope', () => {
    expect('readme.txt').toMatchTree(`Word readme.txt`)
@ -199,7 +237,7 @@ end`).toMatchTree(`
    `)
  })
-  test("dot get doesn't work with spaces", () => {
+  test.skip("dot get doesn't work with spaces", () => {
    expect('obj . prop').toMatchTree(`
      FunctionCall
        Identifier obj
--- a/src/parser/tests/functions.test.ts
+++ b/src/parser/tests/functions.test.ts
@ -57,7 +57,7 @@ describe('calling functions', () => {
    `)
  })
-  test('Incomplete namedArg', () => {
+  test.skip('Incomplete namedArg', () => {
    expect('tail lines=').toMatchTree(`
      FunctionCall
        Identifier tail
--- a/src/parser/tests/import.test.ts
+++ b/src/parser/tests/import.test.ts
@ -0,0 +1,34 @@
 import { expect, describe, test } from 'bun:test'
 import '../shrimp.grammar' // Importing this so changes cause it to retest!
 describe('import', () => {
  test('parses single import', () => {
    expect(`import str`).toMatchTree(`
      Import
        keyword import
        Identifier str
      `)
  })
  test('parses multiple imports', () => {
    expect(`import str math list`).toMatchTree(`
      Import
        keyword import
        Identifier str
        Identifier math
        Identifier list
      `)
  })
  test('parses named args', () => {
    expect(`import str only=ends-with?`).toMatchTree(`
      Import
        keyword import
        Identifier str
        NamedArg
          NamedArgPrefix only=
          Identifier ends-with?
      `)
  })
 })
--- a/src/parser/tests/literals.test.ts
+++ b/src/parser/tests/literals.test.ts
@ -375,10 +375,11 @@ describe('dict literals', () => {
    expect('[=]').toMatchTree(`
      Dict [=]
    `)
  })
  test('empty dict w whitespace', () => {
    expect('[ = ]').toMatchTree(`
-      Array
+      Dict [ = ]
        Word =
    `)
  })
--- a/src/parser/tests/tokens.test.ts
+++ b/src/parser/tests/tokens.test.ts
@ -15,7 +15,10 @@ describe('numbers', () => {
  test('non-numbers', () => {
    expect(`1st`).toMatchToken('Word', '1st')
    expect(`1_`).toMatchToken('Word', '1_')
-    expect(`100.`).toMatchToken('Word', '100.')
+    expect(`100.`).toMatchTokens(
      { type: 'Number', value: '100' },
      { type: 'Operator', value: '.' },
    )
  })
  test('simple numbers', () => {
@ -127,6 +130,19 @@ describe('identifiers', () => {
    expect('dog#pound').toMatchToken('Word', 'dog#pound')
    expect('http://website.com').toMatchToken('Word', 'http://website.com')
    expect('school$cool').toMatchToken('Identifier', 'school$cool')
    expect('EXIT:').toMatchTokens(
      { type: 'Word', value: 'EXIT' },
      { type: 'Colon' },
    )
    expect(`if y == 1: 'cool' end`).toMatchTokens(
      { type: 'Keyword', value: 'if' },
      { type: 'Identifier', value: 'y' },
      { type: 'Operator', value: '==' },
      { type: 'Number', value: '1' },
      { type: 'Colon' },
      { type: 'String', value: `'cool'` },
      { type: 'Keyword', value: 'end' },
    )
  })
 })
@ -139,8 +155,15 @@ describe('paths', () => {
    expect('/home/chris/dev').toMatchToken('Word', '/home/chris/dev')
  })
-  test('ending with ext', () => {
+  test('identifiers with dots tokenize separately', () => {
-    expect('readme.txt').toMatchToken('Word', 'readme.txt')
+    expect('readme.txt').toMatchTokens(
      { type: 'Identifier', value: 'readme' },
      { type: 'Operator', value: '.' },
      { type: 'Identifier', value: 'txt' },
    )
  })
  test('words (non-identifiers) consume dots', () => {
    expect('README.md').toMatchToken('Word', 'README.md')
  })
@ -259,6 +282,9 @@ describe('operators', () => {
    expect('==').toMatchToken('Operator', '==')
    expect('>').toMatchToken('Operator', '>')
    expect('<').toMatchToken('Operator', '<')
    // property access
    expect('.').toMatchToken('Operator', '.')
  })
 })
@ -281,6 +307,12 @@ describe('keywords', () => {
  })
 })
 describe('regex', () => {
  test('use double slash', () => {
    expect(`//[0-9]+//`).toMatchToken('Regex', '//[0-9]+//')
  })
 })
 describe('punctuation', () => {
  test('underscore', () => {
    expect(`_`).toBeToken('Underscore')
@ -453,6 +485,17 @@ f
      { type: 'Identifier', value: 'y' },
    )
    expect(`if (var? 'abc'): y`).toMatchTokens(
      { type: 'Keyword', value: 'if' },
      { type: 'OpenParen' },
      { type: 'Identifier', value: 'var?' },
      { type: 'String', value: `'abc'` },
      { type: 'CloseParen' },
      { type: 'Colon' },
      { type: 'Identifier', value: 'y' },
    )
    expect(`
 do x:
  y
@ -485,6 +528,30 @@ end`).toMatchTokens(
      { type: 'CloseParen' },
    )
  })
  test('dot operator beginning word with slash', () => {
    expect(`(basename ./cool)`).toMatchTokens(
      { 'type': 'OpenParen' },
      { 'type': 'Identifier', 'value': 'basename' },
      { 'type': 'Word', 'value': './cool' },
      { 'type': 'CloseParen' }
    )
  })
  test('dot word after identifier with space', () => {
    expect(`expand-path .git`).toMatchTokens(
      { 'type': 'Identifier', 'value': 'expand-path' },
      { 'type': 'Word', 'value': '.git' },
    )
  })
  test('dot operator after identifier without space', () => {
    expect(`config.path`).toMatchTokens(
      { 'type': 'Identifier', 'value': 'config' },
      { 'type': 'Operator', 'value': '.' },
      { 'type': 'Identifier', 'value': 'path' },
    )
  })
 })
 describe('nesting edge cases', () => {
@ -591,3 +658,72 @@ describe('named args', () => {
    )
  })
 })
 describe('dot operator', () => {
  test('standalone dot', () => {
    expect('.').toMatchToken('Operator', '.')
  })
  test('dot between identifiers tokenizes as separate tokens', () => {
    expect('config.path').toMatchTokens(
      { type: 'Identifier', value: 'config' },
      { type: 'Operator', value: '.' },
      { type: 'Identifier', value: 'path' },
    )
  })
  test('dot with number', () => {
    expect('array.0').toMatchTokens(
      { type: 'Identifier', value: 'array' },
      { type: 'Operator', value: '.' },
      { type: 'Number', value: '0' },
    )
  })
  test('chained dots', () => {
    expect('a.b.c').toMatchTokens(
      { type: 'Identifier', value: 'a' },
      { type: 'Operator', value: '.' },
      { type: 'Identifier', value: 'b' },
      { type: 'Operator', value: '.' },
      { type: 'Identifier', value: 'c' },
    )
  })
  test('identifier-like paths tokenize separately', () => {
    expect('readme.txt').toMatchTokens(
      { type: 'Identifier', value: 'readme' },
      { type: 'Operator', value: '.' },
      { type: 'Identifier', value: 'txt' },
    )
  })
  test('word-like paths remain as single token', () => {
    expect('./file.txt').toMatchToken('Word', './file.txt')
    expect('README.TXT').toMatchToken('Word', 'README.TXT')
  })
  test('dot with paren expression', () => {
    expect('obj.(1 + 2)').toMatchTokens(
      { type: 'Identifier', value: 'obj' },
      { type: 'Operator', value: '.' },
      { type: 'OpenParen' },
      { type: 'Number', value: '1' },
      { type: 'Operator', value: '+' },
      { type: 'Number', value: '2' },
      { type: 'CloseParen' },
    )
  })
  test('chained dot with paren expression', () => {
    expect('obj.items.(i)').toMatchTokens(
      { type: 'Identifier', value: 'obj' },
      { type: 'Operator', value: '.' },
      { type: 'Identifier', value: 'items' },
      { type: 'Operator', value: '.' },
      { type: 'OpenParen' },
      { type: 'Identifier', value: 'i' },
      { type: 'CloseParen' },
    )
  })
 })
--- a/src/parser/tokenizer2.ts
+++ b/src/parser/tokenizer2.ts
@ -31,13 +31,14 @@ export enum TokenType {
  Boolean,
  Number,
  String,
  Regex,
 }
 const valueTokens = new Set([
  TokenType.Comment,
  TokenType.Keyword, TokenType.Operator,
  TokenType.Identifier, TokenType.Word, TokenType.NamedArgPrefix,
-  TokenType.Boolean, TokenType.Number, TokenType.String
+  TokenType.Boolean, TokenType.Number, TokenType.String, TokenType.Regex
 ])
 const operators = new Set([
@ -82,6 +83,12 @@ const operators = new Set([
  '==',
  '>',
  '<',
  // property access
  '.',
  // pipe
  '|',
 ])
 const keywords = new Set([
@ -116,6 +123,7 @@ export class Scanner {
  inParen = 0
  inBracket = 0
  tokens: Token[] = []
  prevIsWhitespace = true
  reset() {
    this.input = ''
@ -124,6 +132,7 @@ export class Scanner {
    this.char = 0
    this.prev = 0
    this.tokens.length = 0
    this.prevIsWhitespace = true
  }
  peek(count = 0): number {
@ -131,9 +140,11 @@ export class Scanner {
  }
  next(): number {
    this.prevIsWhitespace = isWhitespace(this.char)
    this.prev = this.char
    this.char = this.peek()
    this.pos += getCharSize(this.char)
    return this.char
  }
@ -156,6 +167,10 @@ export class Scanner {
    this.start = this.pos
  }
  pushChar(type: TokenType) {
    this.push(type, this.pos - 1, this.pos)
  }
  // turn shrimp code into shrimp tokens that get fed into the parser
  tokenize(input: string): Token[] {
    this.reset()
@ -164,6 +179,7 @@ export class Scanner {
    while (this.char > 0) {
      const char = this.char
      if (char === c`#`) {
        this.readComment()
        continue
@ -185,7 +201,7 @@ export class Scanner {
      }
      if (isIdentStart(char)) {
-        this.readIdentOrKeyword()
+        this.readWordOrIdent(true) // true = started with identifier char
        continue
      }
@ -195,25 +211,39 @@ export class Scanner {
      }
      if (char === c`:`) {
-        this.push(TokenType.Colon, this.start - 1, this.pos) // TODO: why?
+        this.pushChar(TokenType.Colon)
        this.next()
        continue
      }
      // whitespace-sensitive dot as operator (property access) only after identifier/number
      if (char === c`.`) {
        if (this.canBeDotGet(this.tokens.at(-1))) {
          this.pushChar(TokenType.Operator)
          this.next()
          continue
        }
      }
      if (char === c`/` && this.peek() === c`/`) {
        this.readRegex()
        continue
      }
      if (isWordChar(char)) {
-        this.readWord()
+        this.readWordOrIdent(false) // false = didn't start with identifier char
        continue
      }
      if (char === c`\n`) {
        if (this.inParen === 0 && this.inBracket === 0)
-          this.push(TokenType.Newline)
+          this.pushChar(TokenType.Newline)
        this.next()
        continue
      }
      if (char === c`;`) {
-        this.push(TokenType.Semicolon)
+        this.pushChar(TokenType.Semicolon)
        this.next()
        continue
      }
@ -225,6 +255,7 @@ export class Scanner {
  }
  readComment() {
    this.start = this.pos - 1
    while (this.char !== c`\n` && this.char > 0) this.next()
    this.push(TokenType.Comment)
  }
@ -233,16 +264,16 @@ export class Scanner {
    switch (this.char) {
      case c`(`:
        this.inParen++
-        this.push(TokenType.OpenParen); break
+        this.pushChar(TokenType.OpenParen); break
      case c`)`:
        this.inParen--
-        this.push(TokenType.CloseParen); break
+        this.pushChar(TokenType.CloseParen); break
      case c`[`:
        this.inBracket++
-        this.push(TokenType.OpenBracket); break
+        this.pushChar(TokenType.OpenBracket); break
      case c`]`:
        this.inBracket--
-        this.push(TokenType.CloseBracket); break
+        this.pushChar(TokenType.CloseBracket); break
    }
    this.next()
  }
@ -270,7 +301,7 @@ export class Scanner {
    this.push(TokenType.String)
  }
-  readIdentOrKeyword() {
+  readWordOrIdent(startedWithIdentChar: boolean) {
    this.start = this.pos - getCharSize(this.char)
    while (isWordChar(this.char)) {
@ -280,33 +311,50 @@ export class Scanner {
        if (isWhitespace(nextCh) || nextCh === 0) break
      }
-      // stop at equal sign (named arg)
+      // stop at equal sign (named arg) - but only if what we've read so far is an identifier
      if (this.char === c`=`) {
        const soFar = this.input.slice(this.start, this.pos - getCharSize(this.char))
        if (isIdentifer(soFar)) {
          this.next()
          break
        }
      }
      // stop at dot only if it would create a valid property access
      // AND only if we started with an identifier character (not for Words like README.txt)
      if (startedWithIdentChar && this.char === c`.`) {
        const nextCh = this.peek()
        if (isIdentStart(nextCh) || isDigit(nextCh) || nextCh === c`(`) {
          const soFar = this.input.slice(this.start, this.pos - getCharSize(this.char))
          if (isIdentifer(soFar)) break
        }
      }
      this.next()
    }
-    const ident = this.input.slice(this.start, this.pos - getCharSize(this.char))
+    const word = this.input.slice(this.start, this.pos - getCharSize(this.char))
-    if (ident === 'null')
+    // classify the token based on what we read
    if (word === '_')
      this.pushChar(TokenType.Underscore)
    else if (word === 'null')
      this.push(TokenType.Null)
-    else if (ident === 'true' || ident === 'false')
+    else if (word === 'true' || word === 'false')
      this.push(TokenType.Boolean)
-    else if (isKeyword(ident))
+    else if (isKeyword(word))
      this.push(TokenType.Keyword)
-    else if (isOperator(ident))
+    else if (isOperator(word))
-      this.push(TokenType.Operator) // only things like `and` and `or`
+      this.push(TokenType.Operator)
-    else if (isIdentifer(ident))
+    else if (isIdentifer(word))
      this.push(TokenType.Identifier)
-    else if (ident.endsWith('='))
+    else if (word.endsWith('='))
      this.push(TokenType.NamedArgPrefix)
    else
@ -316,6 +364,12 @@ export class Scanner {
  readNumber() {
    this.start = this.pos - 1
    while (isWordChar(this.char)) {
      // stop at dot unless it's part of the number
      if (this.char === c`.`) {
        const nextCh = this.peek()
        if (!isDigit(nextCh)) break
      }
      // stop at colon
      if (this.char === c`:`) {
        const nextCh = this.peek()
@ -327,21 +381,28 @@ export class Scanner {
    this.push(isNumber(ident) ? TokenType.Number : TokenType.Word)
  }
-  readWord() {
+  readRegex() {
-    this.start = this.pos - getCharSize(this.char)
+    this.start = this.pos - 1
    this.next() // skip 2nd /
-    while (isWordChar(this.char)) this.next()
+    while (this.char > 0) {
      if (this.char === c`/` && this.peek() === c`/`) {
        this.next() // skip /
        this.next() // skip /
        this.push(TokenType.Regex)
        break
      }
-    const word = this.input.slice(this.start, this.pos - getCharSize(this.char))
+      this.next()
    }
  }
-    if (word === '_')
+  canBeDotGet(lastToken?: Token): boolean {
-      this.push(TokenType.Underscore)
+    return !this.prevIsWhitespace && !!lastToken &&
-
+      (lastToken.type === TokenType.Identifier ||
-    else if (operators.has(word))
+        lastToken.type === TokenType.Number ||
-      this.push(TokenType.Operator)
+        lastToken.type === TokenType.CloseParen ||
-
+        lastToken.type === TokenType.CloseBracket)
    else
      this.push(TokenType.Word)
  }
 }
--- a/src/testSetup.ts
+++ b/src/testSetup.ts
@ -4,12 +4,13 @@ import color from 'kleur'
 import { Scanner, TokenType, type Token } from '#parser/tokenizer2'
 import { parser } from '#parser/shrimp'
 import { setGlobals } from '#parser/tokenizer'
 import { parse } from '#parser/parser2'
 import { globals as prelude } from '#prelude'
 import { $ } from 'bun'
 import { assert, errorMessage } from '#utils/utils'
 import { Compiler } from '#compiler/compiler'
 import { run, VM } from 'reefvm'
-import { treeToString, VMResultToValue } from '#utils/tree'
+import { treeToString2, treeToString, VMResultToValue } from '#utils/tree'
 const regenerateParser = async () => {
  let generate = true
@ -52,8 +53,8 @@ expect.extend({
    const allGlobals = { ...prelude, ...(globals || {}) }
    setGlobals(Object.keys(allGlobals))
-    const tree = parser.parse(received)
+    const tree = parse(received)
-    const actual = treeToString(tree, received)
+    const actual = treeToString2(tree, received)
    const normalizedExpected = trimWhitespace(expected)
    try {
@ -244,7 +245,7 @@ const tokenize = (code: string): Token[] => {
  return scanner.tokenize(code)
 }
-const toHumanToken = (tok: Token): { type: string, value: string } => {
+const toHumanToken = (tok: Token): { type: string, value?: string } => {
  return {
    type: TokenType[tok.type],
    value: tok.value
--- a/src/utils/tree.ts
+++ b/src/utils/tree.ts
@ -1,5 +1,38 @@
 import { Tree, TreeCursor } from '@lezer/common'
 import { type Value, fromValue } from 'reefvm'
 import { SyntaxNode } from '#parser/node'
 const nodeToString = (node: SyntaxNode, input: string, depth = 0): string => {
  const indent = '  '.repeat(depth)
  const text = input.slice(node.from, node.to)
  const nodeName = node.name
  if (node.firstChild) {
    return `${indent}${nodeName}`
  } else {
    // Only strip quotes from whole String nodes (legacy DoubleQuote), not StringFragment/EscapeSeq/CurlyString
    const cleanText = nodeName === 'String' ? text.slice(1, -1) : text
    return `${indent}${nodeName} ${cleanText}`
  }
 }
 export const treeToString2 = (tree: SyntaxNode, input: string, depth = 0): string => {
  let lines = []
  let node: SyntaxNode | null = tree
  if (node.name === 'Program') node = node.firstChild
  while (node) {
    lines.push(nodeToString(node, input, depth))
    if (node.firstChild)
      lines.push(treeToString2(node.firstChild, input, depth + 1))
    node = node.nextSibling
  }
  return lines.join('\n')
 }
 export const treeToString = (tree: Tree, input: string): string => {
  const lines: string[] = []