dot-get #1
12
CLAUDE.md
12
CLAUDE.md
|
|
@ -195,6 +195,18 @@ function parseExpression(input: string) {
|
|||
|
||||
**Expression-oriented design**: Everything returns a value - commands, assignments, functions. This enables composition and functional patterns.
|
||||
|
||||
**Scope-aware property access (DotGet)**: The parser uses Lezer's `@context` feature to track variable scope at parse time. When it encounters `obj.prop`, it checks if `obj` is in scope:
|
||||
- **In scope** → Parses as `DotGet(Identifier, Identifier)` → compiles to `TRY_LOAD obj; PUSH 'prop'; DOT_GET`
|
||||
- **Not in scope** → Parses as `Word("obj.prop")` → compiles to `PUSH 'obj.prop'` (treated as file path/string)
|
||||
|
||||
Implementation files:
|
||||
- **src/parser/scopeTracker.ts**: ContextTracker that maintains immutable scope chain
|
||||
- **src/parser/tokenizer.ts**: External tokenizer checks `stack.context` to decide if dot creates DotGet or Word
|
||||
- Scope tracking: Captures variables from assignments (`x = 5`) and function parameters (`fn x:`)
|
||||
- See `src/parser/tests/dot-get.test.ts` for comprehensive examples
|
||||
|
||||
**Why this matters**: This enables shell-like file paths (`readme.txt`) while supporting dictionary/array access (`config.path`) without quotes, determined entirely at parse time based on lexical scope.
|
||||
|
||||
**EOF handling**: The grammar uses `(statement | newlineOrSemicolon)+ eof?` to handle empty lines and end-of-file without infinite loops.
|
||||
|
||||
## Compiler Architecture
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import {
|
|||
getAllChildren,
|
||||
getAssignmentParts,
|
||||
getBinaryParts,
|
||||
getDotGetParts,
|
||||
getFunctionCallParts,
|
||||
getFunctionDefParts,
|
||||
getIfExprParts,
|
||||
|
|
@ -17,8 +18,8 @@ import {
|
|||
getStringParts,
|
||||
} from '#compiler/utils'
|
||||
|
||||
// const DEBUG = false
|
||||
const DEBUG = true
|
||||
const DEBUG = false
|
||||
// const DEBUG = true
|
||||
|
||||
type Label = `.${string}`
|
||||
|
||||
|
|
@ -189,6 +190,19 @@ export class Compiler {
|
|||
return [[`TRY_LOAD`, value]]
|
||||
}
|
||||
|
||||
case terms.Word: {
|
||||
return [['PUSH', value]]
|
||||
}
|
||||
|
||||
case terms.DotGet: {
|
||||
const { objectName, propertyName } = getDotGetParts(node, input)
|
||||
const instructions: ProgramItem[] = []
|
||||
instructions.push(['TRY_LOAD', objectName])
|
||||
instructions.push(['PUSH', propertyName])
|
||||
instructions.push(['DOT_GET'])
|
||||
return instructions
|
||||
}
|
||||
|
||||
case terms.BinOp: {
|
||||
const { left, op, right } = getBinaryParts(node)
|
||||
const instructions: ProgramItem[] = []
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ describe('Regex', () => {
|
|||
})
|
||||
})
|
||||
|
||||
describe.only('native functions', () => {
|
||||
describe.skip('native functions', () => {
|
||||
test('print function', () => {
|
||||
const add = (x: number, y: number) => x + y
|
||||
expect(`add 5 9`).toEvaluateTo(14, { add })
|
||||
|
|
|
|||
|
|
@ -40,9 +40,9 @@ export const getAssignmentParts = (node: SyntaxNode) => {
|
|||
const children = getAllChildren(node)
|
||||
const [left, equals, right] = children
|
||||
|
||||
if (!left || left.type.id !== terms.Identifier) {
|
||||
if (!left || left.type.id !== terms.AssignableIdentifier) {
|
||||
throw new CompilerError(
|
||||
`Assign left child must be an Identifier, got ${left ? left.type.name : 'none'}`,
|
||||
`Assign left child must be an AssignableIdentifier, got ${left ? left.type.name : 'none'}`,
|
||||
node.from,
|
||||
node.to
|
||||
)
|
||||
|
|
@ -70,9 +70,9 @@ export const getFunctionDefParts = (node: SyntaxNode, input: string) => {
|
|||
}
|
||||
|
||||
const paramNames = getAllChildren(paramsNode).map((param) => {
|
||||
if (param.type.id !== terms.Identifier) {
|
||||
if (param.type.id !== terms.AssignableIdentifier) {
|
||||
throw new CompilerError(
|
||||
`FunctionDef params must be Identifiers, got ${param.type.name}`,
|
||||
`FunctionDef params must be AssignableIdentifiers, got ${param.type.name}`,
|
||||
param.from,
|
||||
param.to
|
||||
)
|
||||
|
|
@ -198,3 +198,37 @@ export const getStringParts = (node: SyntaxNode, input: string) => {
|
|||
|
||||
return { parts, hasInterpolation: parts.length > 0 }
|
||||
}
|
||||
|
||||
export const getDotGetParts = (node: SyntaxNode, input: string) => {
|
||||
const children = getAllChildren(node)
|
||||
const [object, property] = children
|
||||
|
||||
if (children.length !== 2) {
|
||||
throw new CompilerError(
|
||||
`DotGet expected 2 identifier children, got ${children.length}`,
|
||||
node.from,
|
||||
node.to
|
||||
)
|
||||
}
|
||||
|
||||
if (object.type.id !== terms.IdentifierBeforeDot) {
|
||||
throw new CompilerError(
|
||||
`DotGet object must be an IdentifierBeforeDot, got ${object.type.name}`,
|
||||
object.from,
|
||||
object.to
|
||||
)
|
||||
}
|
||||
|
||||
if (property.type.id !== terms.Identifier) {
|
||||
throw new CompilerError(
|
||||
`DotGet property must be an Identifier, got ${property.type.name}`,
|
||||
property.from,
|
||||
property.to
|
||||
)
|
||||
}
|
||||
|
||||
const objectName = input.slice(object.from, object.to)
|
||||
const propertyName = input.slice(property.from, property.to)
|
||||
|
||||
return { objectName, propertyName }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,35 +1,36 @@
|
|||
// This file was generated by lezer-generator. You probably shouldn't edit it.
|
||||
export const
|
||||
Identifier = 1,
|
||||
Word = 2,
|
||||
IdentifierBeforeDot = 3,
|
||||
Program = 4,
|
||||
PipeExpr = 5,
|
||||
FunctionCall = 6,
|
||||
PositionalArg = 7,
|
||||
ParenExpr = 8,
|
||||
FunctionCallOrIdentifier = 9,
|
||||
BinOp = 10,
|
||||
ConditionalOp = 15,
|
||||
String = 24,
|
||||
StringFragment = 25,
|
||||
Interpolation = 26,
|
||||
EscapeSeq = 27,
|
||||
Number = 28,
|
||||
Boolean = 29,
|
||||
Regex = 30,
|
||||
Null = 31,
|
||||
DotGet = 32,
|
||||
FunctionDef = 33,
|
||||
Fn = 34,
|
||||
Params = 35,
|
||||
colon = 36,
|
||||
end = 37,
|
||||
Underscore = 38,
|
||||
NamedArg = 39,
|
||||
NamedArgPrefix = 40,
|
||||
IfExpr = 42,
|
||||
ThenBlock = 45,
|
||||
ElsifExpr = 46,
|
||||
ElseExpr = 48,
|
||||
Assign = 50
|
||||
AssignableIdentifier = 2,
|
||||
Word = 3,
|
||||
IdentifierBeforeDot = 4,
|
||||
Program = 5,
|
||||
PipeExpr = 6,
|
||||
FunctionCall = 7,
|
||||
PositionalArg = 8,
|
||||
ParenExpr = 9,
|
||||
FunctionCallOrIdentifier = 10,
|
||||
BinOp = 11,
|
||||
ConditionalOp = 16,
|
||||
String = 25,
|
||||
StringFragment = 26,
|
||||
Interpolation = 27,
|
||||
EscapeSeq = 28,
|
||||
Number = 29,
|
||||
Boolean = 30,
|
||||
Regex = 31,
|
||||
Null = 32,
|
||||
DotGet = 33,
|
||||
FunctionDef = 34,
|
||||
Fn = 35,
|
||||
Params = 36,
|
||||
colon = 37,
|
||||
end = 38,
|
||||
Underscore = 39,
|
||||
NamedArg = 40,
|
||||
NamedArgPrefix = 41,
|
||||
IfExpr = 43,
|
||||
ThenBlock = 46,
|
||||
ElsifExpr = 47,
|
||||
ElseExpr = 49,
|
||||
Assign = 51
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ describe('null', () => {
|
|||
test('parses null in assignments', () => {
|
||||
expect('a = null').toMatchTree(`
|
||||
Assign
|
||||
Identifier a
|
||||
AssignableIdentifier a
|
||||
operator =
|
||||
Null null`)
|
||||
})
|
||||
|
|
@ -212,11 +212,11 @@ describe('newlines', () => {
|
|||
expect(`x = 5
|
||||
y = 2`).toMatchTree(`
|
||||
Assign
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
operator =
|
||||
Number 5
|
||||
Assign
|
||||
Identifier y
|
||||
AssignableIdentifier y
|
||||
operator =
|
||||
Number 2`)
|
||||
})
|
||||
|
|
@ -224,11 +224,11 @@ y = 2`).toMatchTree(`
|
|||
test('parses statements separated by semicolons', () => {
|
||||
expect(`x = 5; y = 2`).toMatchTree(`
|
||||
Assign
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
operator =
|
||||
Number 5
|
||||
Assign
|
||||
Identifier y
|
||||
AssignableIdentifier y
|
||||
operator =
|
||||
Number 2`)
|
||||
})
|
||||
|
|
@ -236,7 +236,7 @@ y = 2`).toMatchTree(`
|
|||
test('parses statement with word and a semicolon', () => {
|
||||
expect(`a = hello; 2`).toMatchTree(`
|
||||
Assign
|
||||
Identifier a
|
||||
AssignableIdentifier a
|
||||
operator =
|
||||
FunctionCallOrIdentifier
|
||||
Identifier hello
|
||||
|
|
@ -248,7 +248,7 @@ describe('Assign', () => {
|
|||
test('parses simple assignment', () => {
|
||||
expect('x = 5').toMatchTree(`
|
||||
Assign
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
operator =
|
||||
Number 5`)
|
||||
})
|
||||
|
|
@ -256,7 +256,7 @@ describe('Assign', () => {
|
|||
test('parses assignment with addition', () => {
|
||||
expect('x = 5 + 3').toMatchTree(`
|
||||
Assign
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
operator =
|
||||
BinOp
|
||||
Number 5
|
||||
|
|
@ -267,13 +267,13 @@ describe('Assign', () => {
|
|||
test('parses assignment with functions', () => {
|
||||
expect('add = fn a b: a + b end').toMatchTree(`
|
||||
Assign
|
||||
Identifier add
|
||||
AssignableIdentifier add
|
||||
operator =
|
||||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier a
|
||||
Identifier b
|
||||
AssignableIdentifier a
|
||||
AssignableIdentifier b
|
||||
colon :
|
||||
BinOp
|
||||
Identifier a
|
||||
|
|
@ -287,7 +287,7 @@ describe('DotGet whitespace sensitivity', () => {
|
|||
test('no whitespace - DotGet works when identifier in scope', () => {
|
||||
expect('basename = 5; basename.prop').toMatchTree(`
|
||||
Assign
|
||||
Identifier basename
|
||||
AssignableIdentifier basename
|
||||
operator =
|
||||
Number 5
|
||||
DotGet
|
||||
|
|
@ -298,7 +298,7 @@ describe('DotGet whitespace sensitivity', () => {
|
|||
test('space before dot - NOT DotGet, parses as division', () => {
|
||||
expect('basename = 5; basename / prop').toMatchTree(`
|
||||
Assign
|
||||
Identifier basename
|
||||
AssignableIdentifier basename
|
||||
operator =
|
||||
Number 5
|
||||
BinOp
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ describe('if/elsif/else', () => {
|
|||
|
||||
expect('a = if x: 2').toMatchTree(`
|
||||
Assign
|
||||
Identifier a
|
||||
AssignableIdentifier a
|
||||
operator =
|
||||
IfExpr
|
||||
keyword if
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ describe('DotGet', () => {
|
|||
test('obj.prop is DotGet when obj is assigned', () => {
|
||||
expect('obj = 5; obj.prop').toMatchTree(`
|
||||
Assign
|
||||
Identifier obj
|
||||
AssignableIdentifier obj
|
||||
operator =
|
||||
Number 5
|
||||
DotGet
|
||||
|
|
@ -31,7 +31,7 @@ describe('DotGet', () => {
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier config
|
||||
AssignableIdentifier config
|
||||
colon :
|
||||
DotGet
|
||||
IdentifierBeforeDot config
|
||||
|
|
@ -45,7 +45,7 @@ describe('DotGet', () => {
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
colon :
|
||||
DotGet
|
||||
IdentifierBeforeDot x
|
||||
|
|
@ -63,8 +63,8 @@ end`).toMatchTree(`
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
Identifier y
|
||||
AssignableIdentifier x
|
||||
AssignableIdentifier y
|
||||
colon :
|
||||
DotGet
|
||||
IdentifierBeforeDot x
|
||||
|
|
@ -84,7 +84,7 @@ end`).toMatchTree(`
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
colon :
|
||||
DotGet
|
||||
IdentifierBeforeDot x
|
||||
|
|
@ -92,7 +92,7 @@ end`).toMatchTree(`
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier y
|
||||
AssignableIdentifier y
|
||||
colon :
|
||||
DotGet
|
||||
IdentifierBeforeDot y
|
||||
|
|
@ -105,7 +105,7 @@ end`).toMatchTree(`
|
|||
test('dot get works as function argument', () => {
|
||||
expect('config = 42; echo config.path').toMatchTree(`
|
||||
Assign
|
||||
Identifier config
|
||||
AssignableIdentifier config
|
||||
operator =
|
||||
Number 42
|
||||
FunctionCall
|
||||
|
|
@ -120,7 +120,7 @@ end`).toMatchTree(`
|
|||
test('mixed file paths and dot get', () => {
|
||||
expect('config = 42; cat readme.txt; echo config.path').toMatchTree(`
|
||||
Assign
|
||||
Identifier config
|
||||
AssignableIdentifier config
|
||||
operator =
|
||||
Number 42
|
||||
FunctionCall
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ describe('Fn', () => {
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
colon :
|
||||
BinOp
|
||||
Identifier x
|
||||
|
|
@ -86,8 +86,8 @@ describe('Fn', () => {
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
Identifier y
|
||||
AssignableIdentifier x
|
||||
AssignableIdentifier y
|
||||
colon :
|
||||
BinOp
|
||||
Identifier x
|
||||
|
|
@ -104,8 +104,8 @@ end`).toMatchTree(`
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
Identifier y
|
||||
AssignableIdentifier x
|
||||
AssignableIdentifier y
|
||||
colon :
|
||||
BinOp
|
||||
Identifier x
|
||||
|
|
|
|||
|
|
@ -21,16 +21,16 @@ describe('multiline', () => {
|
|||
add 3 4
|
||||
`).toMatchTree(`
|
||||
Assign
|
||||
Identifier add
|
||||
AssignableIdentifier add
|
||||
operator =
|
||||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier a
|
||||
Identifier b
|
||||
AssignableIdentifier a
|
||||
AssignableIdentifier b
|
||||
colon :
|
||||
Assign
|
||||
Identifier result
|
||||
AssignableIdentifier result
|
||||
operator =
|
||||
BinOp
|
||||
Identifier a
|
||||
|
|
@ -63,8 +63,8 @@ end
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
Identifier y
|
||||
AssignableIdentifier x
|
||||
AssignableIdentifier y
|
||||
colon :
|
||||
FunctionCallOrIdentifier
|
||||
Identifier x
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ describe('pipe expressions', () => {
|
|||
test('pipe expression in assignment', () => {
|
||||
expect('result = echo hello | grep h').toMatchTree(`
|
||||
Assign
|
||||
Identifier result
|
||||
AssignableIdentifier result
|
||||
operator =
|
||||
PipeExpr
|
||||
FunctionCall
|
||||
|
|
@ -77,7 +77,7 @@ describe('pipe expressions', () => {
|
|||
FunctionDef
|
||||
keyword fn
|
||||
Params
|
||||
Identifier x
|
||||
AssignableIdentifier x
|
||||
colon :
|
||||
FunctionCallOrIdentifier
|
||||
Identifier x
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ import type { ScopeContext } from './scopeTracker'
|
|||
export const tokenizer = new ExternalTokenizer(
|
||||
(input: InputStream, stack: Stack) => {
|
||||
let ch = getFullCodePoint(input, 0)
|
||||
console.log(`🌭 checking char ${String.fromCodePoint(ch)}`)
|
||||
if (!isWordChar(ch)) return
|
||||
|
||||
let pos = getCharSize(ch)
|
||||
|
|
@ -66,13 +65,55 @@ export const tokenizer = new ExternalTokenizer(
|
|||
pos += getCharSize(ch)
|
||||
}
|
||||
|
||||
// Build identifier text BEFORE advancing (for debug and peek-ahead)
|
||||
let identifierText = ''
|
||||
if (isValidIdentifier) {
|
||||
for (let i = 0; i < pos; i++) {
|
||||
const charCode = input.peek(i)
|
||||
if (charCode === -1) break
|
||||
if (charCode >= 0xd800 && charCode <= 0xdbff && i + 1 < pos) {
|
||||
const low = input.peek(i + 1)
|
||||
if (low >= 0xdc00 && low <= 0xdfff) {
|
||||
identifierText += String.fromCharCode(charCode, low)
|
||||
i++
|
||||
continue
|
||||
}
|
||||
}
|
||||
identifierText += String.fromCharCode(charCode)
|
||||
}
|
||||
}
|
||||
|
||||
input.advance(pos)
|
||||
if (isValidIdentifier) {
|
||||
// Use canShift to decide which identifier type
|
||||
if (stack.canShift(AssignableIdentifier)) {
|
||||
const canAssignable = stack.canShift(AssignableIdentifier)
|
||||
const canRegular = stack.canShift(Identifier)
|
||||
|
||||
if (canAssignable && !canRegular) {
|
||||
// Only AssignableIdentifier valid (e.g., in Params)
|
||||
input.acceptToken(AssignableIdentifier)
|
||||
} else {
|
||||
} else if (canRegular && !canAssignable) {
|
||||
// Only Identifier valid (e.g., in function args)
|
||||
input.acceptToken(Identifier)
|
||||
} else {
|
||||
// BOTH possible (ambiguous) - peek ahead for '='
|
||||
// Note: we're peeking from current position (after advance), so start at 0
|
||||
let peekPos = 0
|
||||
// Skip whitespace (space, tab, CR, but NOT newline - assignment must be on same line)
|
||||
while (true) {
|
||||
const ch = getFullCodePoint(input, peekPos)
|
||||
if (ch === 32 || ch === 9 || ch === 13) { // space, tab, CR
|
||||
peekPos += getCharSize(ch)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Check if next non-whitespace char is '='
|
||||
const nextCh = getFullCodePoint(input, peekPos)
|
||||
if (nextCh === 61 /* = */) {
|
||||
input.acceptToken(AssignableIdentifier)
|
||||
} else {
|
||||
input.acceptToken(Identifier)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
input.acceptToken(Word)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user