From fe7abb8b210cc7a5ff0dba208f52880e973b4957 Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Wed, 15 Oct 2025 08:45:37 -0700 Subject: [PATCH] wip --- src/compiler/compiler.test.ts | 36 ++ src/compiler/compiler.ts | 82 +++- src/compiler/utils.ts | 32 ++ src/parser/parser.test.ts | 718 ------------------------------- src/parser/shrimp.grammar | 8 +- src/parser/shrimp.ts | 12 +- src/parser/tests/strings.test.ts | 35 +- src/parser/tokenizer.ts | 24 +- 8 files changed, 198 insertions(+), 749 deletions(-) delete mode 100644 src/parser/parser.test.ts diff --git a/src/compiler/compiler.test.ts b/src/compiler/compiler.test.ts index 17cde94..63dc3a7 100644 --- a/src/compiler/compiler.test.ts +++ b/src/compiler/compiler.test.ts @@ -158,3 +158,39 @@ describe('multiline tests', () => { `).toEvaluateTo(7) }) }) + +describe('string interpolation', () => { + test('string with variable interpolation', () => { + expect(`name = 'Alice'; 'hello $name'`).toEvaluateTo('hello Alice') + }) + + test('string with expression interpolation', () => { + expect(`'sum is $(2 + 3)'`).toEvaluateTo('sum is 5') + }) + + test('string with multiple interpolations', () => { + expect(`a = 10; b = 20; '$a + $b = $(a + b)'`).toEvaluateTo('10 + 20 = 30') + }) + + test('string with escape sequences', () => { + expect(`'line1\\nline2'`).toEvaluateTo('line1\nline2') + expect(`'tab\\there'`).toEvaluateTo('tab\there') + expect(`'back\\\\slash'`).toEvaluateTo('back\\slash') + }) + + test('string with escaped dollar sign', () => { + expect(`'price is \\$10'`).toEvaluateTo('price is $10') + }) + + test('string with mixed interpolation and escapes', () => { + expect(`x = 5; 'value: $x\\ntotal: $(x * 2)'`).toEvaluateTo('value: 5\ntotal: 10') + }) + + test('interpolation with unbound identifier', () => { + expect(`'greeting: $hello'`).toEvaluateTo('greeting: hello') + }) + + test('nested expression interpolation', () => { + expect(`a = 3; b = 4; 'result: $(a * (b + 1))'`).toEvaluateTo('result: 15') + }) +}) diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index b2009ac..f8a0eed 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -14,12 +14,37 @@ import { getIfExprParts, getNamedArgParts, getPipeExprParts, + getStringParts, } from '#compiler/utils' -// const DEBUG = false -const DEBUG = true +const DEBUG = false +// const DEBUG = true type Label = `.${string}` + +// Process escape sequences in strings +function processEscapeSequence(escapeSeq: string): string { + // escapeSeq includes the backslash, e.g., "\n", "\$", "\\" + if (escapeSeq.length !== 2) return escapeSeq + + switch (escapeSeq[1]) { + case 'n': + return '\n' + case 't': + return '\t' + case 'r': + return '\r' + case '\\': + return '\\' + case "'": + return "'" + case '$': + return '$' + default: + return escapeSeq // Unknown escape, keep as-is + } +} + export class Compiler { instructions: ProgramItem[] = [] fnLabels = new Map() @@ -84,9 +109,56 @@ export class Compiler { return [[`PUSH`, number]] - case terms.String: - const strValue = value.slice(1, -1).replace(/\\/g, '') - return [[`PUSH`, strValue]] + case terms.String: { + const { parts, hasInterpolation } = getStringParts(node, input) + + // Simple string without interpolation or escapes - extract text directly + if (!hasInterpolation) { + // Remove surrounding quotes and return as-is + const strValue = value.slice(1, -1) + return [['PUSH', strValue]] + } + + // String with interpolation or escapes - compile each part and concatenate + const instructions: ProgramItem[] = [] + parts.forEach((part) => { + const partValue = input.slice(part.from, part.to) + + switch (part.type.id) { + case terms.StringFragment: + // Plain text fragment - just push as-is + instructions.push(['PUSH', partValue]) + break + + case terms.StringEscape: + // Process escape sequence and push the result + const processed = processEscapeSequence(partValue) + instructions.push(['PUSH', processed]) + break + + case terms.Interpolation: + // Interpolation contains either Identifier or ParenExpr (the $ is anonymous) + const child = part.firstChild + if (!child) { + throw new CompilerError('Interpolation has no child', part.from, part.to) + } + // Compile the Identifier or ParenExpr + instructions.push(...this.#compileNode(child, input)) + break + + default: + throw new CompilerError( + `Unexpected string part: ${part.type.name}`, + part.from, + part.to + ) + } + }) + + // Use STR_CONCAT to join all parts + instructions.push(['STR_CONCAT', parts.length]) + return instructions + } case terms.Boolean: { return [[`PUSH`, value === 'true']] diff --git a/src/compiler/utils.ts b/src/compiler/utils.ts index b1dfc11..0a4cf97 100644 --- a/src/compiler/utils.ts +++ b/src/compiler/utils.ts @@ -166,3 +166,35 @@ export const getPipeExprParts = (node: SyntaxNode) => { return { pipedFunctionCall, pipeReceivers } } + +export const getStringParts = (node: SyntaxNode, input: string) => { + const children = getAllChildren(node) + + // String nodes always have at least 2 children (the quote tokens) + // For simple strings like 'hello' with no interpolation, there are no child nodes + // The text is just between the quotes + const parts = children.filter((child) => { + return ( + child.type.id === terms.StringFragment || + child.type.id === terms.Interpolation || + child.type.id === terms.StringEscape + ) + }) + + // Validate each part is the expected type + parts.forEach((part) => { + if ( + part.type.id !== terms.StringFragment && + part.type.id !== terms.Interpolation && + part.type.id !== terms.StringEscape + ) { + throw new CompilerError( + `String child must be StringFragment, Interpolation, or StringEscape, got ${part.type.name}`, + part.from, + part.to + ) + } + }) + + return { parts, hasInterpolation: parts.length > 0 } +} diff --git a/src/parser/parser.test.ts b/src/parser/parser.test.ts deleted file mode 100644 index 7f4472f..0000000 --- a/src/parser/parser.test.ts +++ /dev/null @@ -1,718 +0,0 @@ -import { expect, describe, test } from 'bun:test' -import { afterEach } from 'bun:test' -import { resetCommandSource, setCommandSource } from '#editor/commands' -import { beforeEach } from 'bun:test' - -import './shrimp.grammar' // Importing this so changes cause it to retest! - -describe('calling functions', () => { - beforeEach(() => { - setCommandSource(() => [ - { - command: 'echo', - args: [{ name: 'path', type: 'string' }], - execute: (p: any) => p, - }, - ]) - }) - - afterEach(() => { - resetCommandSource() - }) - - test('call with no args', () => { - expect('tail').toMatchTree(` - FunctionCallOrIdentifier - Identifier tail - `) - }) - - test('call with arg', () => { - expect('tail path').toMatchTree(` - FunctionCall - Identifier tail - PositionalArg - Identifier path - `) - }) - - test('call with arg and named arg', () => { - expect('tail path lines=30').toMatchTree(` - FunctionCall - Identifier tail - PositionalArg - Identifier path - NamedArg - NamedArgPrefix lines= - Number 30 - `) - }) - - test('command with arg that is also a command', () => { - expect('tail tail').toMatchTree(` - FunctionCall - Identifier tail - PositionalArg - Identifier tail - `) - - expect('tai').toMatchTree(` - FunctionCallOrIdentifier - Identifier tai - `) - }) - - test('Incomplete namedArg', () => { - expect('tail lines=').toMatchTree(` - FunctionCall - Identifier tail - NamedArg - NamedArgPrefix lines= - ⚠ - ⚠ `) - }) -}) - -describe('Identifier', () => { - test('parses identifiers with emojis and dashes', () => { - expect('moo-😊-34').toMatchTree(` - FunctionCallOrIdentifier - Identifier moo-😊-34`) - }) -}) - -describe('Parentheses', () => { - test('allows binOps with parentheses correctly', () => { - expect('(2 + 3)').toMatchTree(` - ParenExpr - BinOp - Number 2 - operator + - Number 3`) - }) - - test('allows numbers, strings, and booleans with parentheses correctly', () => { - expect('(42)').toMatchTree(` - ParenExpr - Number 42`) - - expect("('hello')").toMatchTree(` - ParenExpr - String - StringFragment hello`) - - expect('(true)').toMatchTree(` - ParenExpr - Boolean true`) - - expect('(false)').toMatchTree(` - ParenExpr - Boolean false`) - }) - - test('allows function calls in parens', () => { - expect('(echo 3)').toMatchTree(` - ParenExpr - FunctionCall - Identifier echo - PositionalArg - Number 3`) - - expect('(echo)').toMatchTree(` - ParenExpr - FunctionCallOrIdentifier - Identifier echo`) - }) - - test('allows conditionals in parens', () => { - expect('(a > b)').toMatchTree(` - ParenExpr - ConditionalOp - Identifier a - operator > - Identifier b`) - - expect('(a and b)').toMatchTree(` - ParenExpr - ConditionalOp - Identifier a - operator and - Identifier b`) - }) - - test('allows parens in function calls', () => { - expect('echo (3 + 3)').toMatchTree(` - FunctionCall - Identifier echo - PositionalArg - ParenExpr - BinOp - Number 3 - operator + - Number 3`) - }) - - test('a word can be contained in parens', () => { - expect('(basename ./cool)').toMatchTree(` - ParenExpr - FunctionCall - Identifier basename - PositionalArg - Word ./cool - `) - }) - - test('nested parentheses', () => { - expect('(2 + (1 * 4))').toMatchTree(` - ParenExpr - BinOp - Number 2 - operator + - ParenExpr - BinOp - Number 1 - operator * - Number 4`) - }) - - test('Function in parentheses', () => { - expect('4 + (echo 3)').toMatchTree(` - BinOp - Number 4 - operator + - ParenExpr - FunctionCall - Identifier echo - PositionalArg - Number 3`) - }) -}) - -describe('BinOp', () => { - test('addition tests', () => { - expect('2 + 3').toMatchTree(` - BinOp - Number 2 - operator + - Number 3 - `) - }) - - test('subtraction tests', () => { - expect('5 - 2').toMatchTree(` - BinOp - Number 5 - operator - - Number 2 - `) - }) - - test('multiplication tests', () => { - expect('4 * 3').toMatchTree(` - BinOp - Number 4 - operator * - Number 3 - `) - }) - - test('division tests', () => { - expect('8 / 2').toMatchTree(` - BinOp - Number 8 - operator / - Number 2 - `) - }) - - test('mixed operations with precedence', () => { - expect('2 + 3 * 4 - 5 / 1').toMatchTree(` - BinOp - BinOp - Number 2 - operator + - BinOp - Number 3 - operator * - Number 4 - operator - - BinOp - Number 5 - operator / - Number 1 - `) - }) -}) - -describe('Fn', () => { - test('parses function no parameters', () => { - expect('fn: 1 end').toMatchTree(` - FunctionDef - keyword fn - Params - colon : - Number 1 - end end`) - }) - - test('parses function with single parameter', () => { - expect('fn x: x + 1 end').toMatchTree(` - FunctionDef - keyword fn - Params - Identifier x - colon : - BinOp - Identifier x - operator + - Number 1 - end end`) - }) - - test('parses function with multiple parameters', () => { - expect('fn x y: x * y end').toMatchTree(` - FunctionDef - keyword fn - Params - Identifier x - Identifier y - colon : - BinOp - Identifier x - operator * - Identifier y - end end`) - }) - - test('parses multiline function with multiple statements', () => { - expect(`fn x y: - x * y - x + 9 -end`).toMatchTree(` - FunctionDef - keyword fn - Params - Identifier x - Identifier y - colon : - BinOp - Identifier x - operator * - Identifier y - BinOp - Identifier x - operator + - Number 9 - end end`) - }) -}) - -describe('ambiguity', () => { - test('parses ambiguous expressions correctly', () => { - expect('a + -3').toMatchTree(` - BinOp - Identifier a - operator + - Number -3 - `) - }) - - test('parses ambiguous expressions correctly', () => { - expect('a-var + a-thing').toMatchTree(` - BinOp - Identifier a-var - operator + - Identifier a-thing - `) - }) -}) - -describe('newlines', () => { - test('parses multiple statements separated by newlines', () => { - expect(`x = 5 -y = 2`).toMatchTree(` - Assign - Identifier x - operator = - Number 5 - Assign - Identifier y - operator = - Number 2`) - }) - - test('parses statements separated by semicolons', () => { - expect(`x = 5; y = 2`).toMatchTree(` - Assign - Identifier x - operator = - Number 5 - Assign - Identifier y - operator = - Number 2`) - }) - - test('parses statement with word and a semicolon', () => { - expect(`a = hello; 2`).toMatchTree(` - Assign - Identifier a - operator = - FunctionCallOrIdentifier - Identifier hello - Number 2`) - }) -}) - -describe('Assign', () => { - test('parses simple assignment', () => { - expect('x = 5').toMatchTree(` - Assign - Identifier x - operator = - Number 5`) - }) - - test('parses assignment with addition', () => { - expect('x = 5 + 3').toMatchTree(` - Assign - Identifier x - operator = - BinOp - Number 5 - operator + - Number 3`) - }) - - test('parses assignment with functions', () => { - expect('add = fn a b: a + b end').toMatchTree(` - Assign - Identifier add - operator = - FunctionDef - keyword fn - Params - Identifier a - Identifier b - colon : - BinOp - Identifier a - operator + - Identifier b - end end`) - }) -}) - -describe('if/elsif/else', () => { - test('parses single line if', () => { - expect(`if y = 1: 'cool'`).toMatchTree(` - IfExpr - keyword if - ConditionalOp - Identifier y - operator = - Number 1 - colon : - ThenBlock - String - StringFragment cool - `) - - expect('a = if x: 2').toMatchTree(` - Assign - Identifier a - operator = - IfExpr - keyword if - Identifier x - colon : - ThenBlock - Number 2 - `) - }) - - test('parses multiline if', () => { - expect(` - if x < 9: - yes - end`).toMatchTree(` - IfExpr - keyword if - ConditionalOp - Identifier x - operator < - Number 9 - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier yes - end end - `) - }) - - test('parses multiline if with else', () => { - expect(`if with-else: - x - else: - y - end`).toMatchTree(` - IfExpr - keyword if - Identifier with-else - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier x - ElseExpr - keyword else - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier y - end end - `) - }) - - test('parses multiline if with elsif', () => { - expect(`if with-elsif: - x - elsif another-condition: - y - end`).toMatchTree(` - IfExpr - keyword if - Identifier with-elsif - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier x - ElsifExpr - keyword elsif - Identifier another-condition - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier y - end end - `) - }) - - test('parses multiline if with multiple elsif and else', () => { - expect(`if with-elsif-else: - x - elsif another-condition: - y - elsif yet-another-condition: - z - else: - oh-no - end`).toMatchTree(` - IfExpr - keyword if - Identifier with-elsif-else - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier x - ElsifExpr - keyword elsif - Identifier another-condition - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier y - ElsifExpr - keyword elsif - Identifier yet-another-condition - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier z - ElseExpr - keyword else - colon : - ThenBlock - FunctionCallOrIdentifier - Identifier oh-no - end end - `) - }) -}) - -describe('pipe expressions', () => { - test('simple pipe expression', () => { - expect('echo hello | grep h').toMatchTree(` - PipeExpr - FunctionCall - Identifier echo - PositionalArg - Identifier hello - operator | - FunctionCall - Identifier grep - PositionalArg - Identifier h - `) - }) - - test('multi-stage pipe chain', () => { - expect('find files | filter active | sort').toMatchTree(` - PipeExpr - FunctionCall - Identifier find - PositionalArg - Identifier files - operator | - FunctionCall - Identifier filter - PositionalArg - Identifier active - operator | - FunctionCallOrIdentifier - Identifier sort - `) - }) - - test('pipe with identifier', () => { - expect('get-value | process').toMatchTree(` - PipeExpr - FunctionCallOrIdentifier - Identifier get-value - operator | - FunctionCallOrIdentifier - Identifier process - `) - }) - - test('pipe expression in assignment', () => { - expect('result = echo hello | grep h').toMatchTree(` - Assign - Identifier result - operator = - PipeExpr - FunctionCall - Identifier echo - PositionalArg - Identifier hello - operator | - FunctionCall - Identifier grep - PositionalArg - Identifier h - `) - }) - - test('pipe with inline function', () => { - expect('items | each fn x: x end').toMatchTree(` - PipeExpr - FunctionCallOrIdentifier - Identifier items - operator | - FunctionCall - Identifier each - PositionalArg - FunctionDef - keyword fn - Params - Identifier x - colon : - FunctionCallOrIdentifier - Identifier x - end end - `) - }) -}) - -describe('multiline', () => { - test('parses multiline strings', () => { - expect(`'first'\n'second'`).toMatchTree(` - String - StringFragment first - String - StringFragment second`) - }) - - test('parses multiline functions', () => { - expect(` - add = fn a b: - result = a + b - result - end - - add 3 4 - `).toMatchTree(` - Assign - Identifier add - operator = - FunctionDef - keyword fn - Params - Identifier a - Identifier b - colon : - Assign - Identifier result - operator = - BinOp - Identifier a - operator + - Identifier b - FunctionCallOrIdentifier - Identifier result - - end end - FunctionCall - Identifier add - PositionalArg - Number 3 - PositionalArg - Number 4`) - }) - - test('ignores leading and trailing whitespace in expected tree', () => { - expect(` - 3 - - - fn x y: - x -end - -`).toMatchTree(` - Number 3 - - FunctionDef - keyword fn - Params - Identifier x - Identifier y - colon : - FunctionCallOrIdentifier - Identifier x - end end - `) - }) -}) - -describe('string interpolation', () => { - test('string with variable interpolation', () => { - expect("'hello $name'").toMatchTree(` - String - StringFragment ${'hello '} - Interpolation - Identifier name - `) - }) - - test('string with expression interpolation', () => { - expect("'sum is $(a + b)'").toMatchTree(` - String - StringFragment ${'sum is '} - Interpolation - BinOp - Identifier a - operator + - Identifier b - `) - }) -}) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 73f2603..f922672 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -161,11 +161,7 @@ BinOp { } ParenExpr { - leftParen parenContent rightParen -} - -parenContent { - (ambiguousFunctionCall | BinOp | expressionWithoutIdentifier | ConditionalOp | PipeExpr) + leftParen (ambiguousFunctionCall | BinOp | expressionWithoutIdentifier | ConditionalOp | PipeExpr) rightParen } expression { @@ -184,7 +180,7 @@ stringContent { Interpolation { "$" Identifier | - "$" leftParen parenContent rightParen + "$" ParenExpr } StringEscape { diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 3c14df8..835f63c 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,11 +4,11 @@ import {tokenizer} from "./tokenizer" import {highlighting} from "./highlight" export const parser = LRParser.deserialize({ version: 14, - states: ".pQVQaOOO!rQbO'#CdO#SQPO'#CeO#bQPO'#DhO#yQaO'#CcO$_OSO'#CsOOQ`'#Dl'#DlO$mQPO'#DkO%UQaO'#DwOOQ`'#Cy'#CyOOQO'#Di'#DiO%^QPO'#DhO%lQaO'#D{OOQO'#DS'#DSOOQO'#Dh'#DhO&QQPO'#DgOOQ`'#Dg'#DgOOQ`'#D]'#D]QVQaOOOOQ`'#Dk'#DkOOQ`'#Cb'#CbO&YQaO'#DPOOQ`'#Dj'#DjOOQ`'#D^'#D^O&dQbO,58{O'QQaO,59vO%lQaO,59PO%lQaO,59PO'lQbO'#CdO(wQPO'#CeO)XQPO'#DnO)jQPO'#DnOOQO'#Dn'#DnO*eQPO,58}O*jQPO'#DnO*rQaO'#CuO*zQWO'#CvOOOO'#Dq'#DqOOOO'#D_'#D_O+`OSO,59_OOQ`,59_,59_OOQ`'#D`'#D`O+nQaO'#C{O+vQPO,5:cO+{QaO'#DbO,QQPO,58zO,cQPO,5:gO,jQPO,5:gOOQ`,5:R,5:ROOQ`-E7Z-E7ZOOQ`,59k,59kOOQ`-E7[-E7[OOQO1G/b1G/bOOQO1G.k1G.kO,oQPO1G.kO%lQaO,59UO%lQaO,59UOOQ`1G.i1G.iOOOO,59a,59aO#yQaO,59aOOOO,59b,59bOOOO-E7]-E7]OOQ`1G.y1G.yOOQ`-E7^-E7^O-ZQaO1G/}O-bQbO'#CdOOQO,59|,59|OOQO-E7`-E7`O.OQaO1G0ROOQO1G.p1G.pO.VQPO1G.pO.aQPO1G.{O.fQPO7+%iO.kQaO7+%jOOQO'#DU'#DUOOQO7+%m7+%mO.rQaO7+%nOOOO7+$g7+$gOOQ`<pAN>pO%lQaO'#DWOOQO'#Dc'#DcO/sQPOAN>tO0OQPO'#DYOOQOAN>tAN>tO0TQPOAN>tO0YQPO,59rO0aQPO,59rOOQO-E7a-E7aOOQOG24`G24`O0fQPOG24`O0kQPO,59tO0pQPO1G/^OOQOLD)zLD)zO.kQaO1G/`O.rQaO7+$xOOQO7+$z7+$zOOQO<oAN>oO%pQaO'#DWOOQO'#Dc'#DcO0ZQPOAN>sO0fQPO'#DYOOQOAN>sAN>sO0kQPOAN>sO0pQPO,59rO0wQPO,59rOOQO-E7a-E7aOOQOG24_G24_O0|QPOG24_O1RQPO,59tO1WQPO1G/^OOQOLD)yLD)yO.fQaO1G/`O.vQaO7+$xOOQO7+$z7+$zOOQO<S[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h>x#h#o2i#o;'S$_;'S;=`$v<%lO$_V>}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y?s#Y#o2i#o;'S$_;'S;=`$v<%lO$_V?zYlRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V@qYnRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#ZB[#Z#o2i#o;'S$_;'S;=`$v<%lO$_VBcYwPhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^CYY!hWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VC}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gDs#g#o2i#o;'S$_;'S;=`$v<%lO$_VDzYfRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^EqY!jWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$__Fh[!iWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gG^#g#o2i#o;'S$_;'S;=`$v<%lO$_VGc[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#i2i#i#j>x#j#o2i#o;'S$_;'S;=`$v<%lO$_VH`UuRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~HwO!q~", + tokenData: "Hw~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P$_!P!Q,b!Q![*]![!],{!]!^%g!^!_-f!_!`.p!`!a/Z!a#O$_#O#P0e#P#R$_#R#S0j#S#T$_#T#U1T#U#X2i#X#Y5O#Y#ZS[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h>x#h#o2i#o;'S$_;'S;=`$v<%lO$_V>}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y?s#Y#o2i#o;'S$_;'S;=`$v<%lO$_V?zYlRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V@qYnRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#ZB[#Z#o2i#o;'S$_;'S;=`$v<%lO$_VBcYwPhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^CYY!gWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VC}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gDs#g#o2i#o;'S$_;'S;=`$v<%lO$_VDzYfRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^EqY!iWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$__Fh[!hWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gG^#g#o2i#o;'S$_;'S;=`$v<%lO$_VGc[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#i2i#i#j>x#j#o2i#o;'S$_;'S;=`$v<%lO$_VH`UuRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~HwO!p~", tokenizers: [0, 1, 2, 3, tokenizer], topRules: {"Program":[0,3]}, - tokenPrec: 727 + tokenPrec: 749 }) diff --git a/src/parser/tests/strings.test.ts b/src/parser/tests/strings.test.ts index c7ba69a..037ac55 100644 --- a/src/parser/tests/strings.test.ts +++ b/src/parser/tests/strings.test.ts @@ -12,15 +12,40 @@ describe('string interpolation', () => { `) }) - test('string with expression interpolation', () => { + test('string with expression interpolation in the middle', () => { + expect("'sum is $(a + b)!'").toMatchTree(` + String + StringFragment ${'sum is '} + Interpolation + ParenExpr + BinOp + Identifier a + operator + + Identifier b + StringFragment ! + `) + }) + + test('string with expression interpolation at the end', () => { expect("'sum is $(a + b)'").toMatchTree(` String StringFragment ${'sum is '} Interpolation - BinOp - Identifier a - operator + - Identifier b + ParenExpr + BinOp + Identifier a + operator + + Identifier b + `) + }) + + test('string with expression smooshed inbetween', () => { + expect("'x/$y/z'").toMatchTree(` + String + StringFragment x/ + Interpolation + Identifier y + StringFragment /z `) }) }) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 9ea9c87..09ebe55 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -1,9 +1,11 @@ import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' import { Identifier, Word } from './shrimp.terms' +// The only chars that can't be words are whitespace, apostrophes, closing parens, and EOF. + export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => { let ch = getFullCodePoint(input, 0) - if (isWhitespace(ch) || ch === -1) return + if (!isWordChar(ch)) return let pos = getCharSize(ch) let isValidIdentifier = isLowercaseLetter(ch) || isEmoji(ch) @@ -12,17 +14,14 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack while (true) { ch = getFullCodePoint(input, pos) - // Words and identifiers end at whitespace, single quotes, or end of input. - if (isWhitespace(ch) || ch === 39 /* ' */ || ch === -1) break + if (!isWordChar(ch)) break // Certain characters might end a word or identifier if they are followed by whitespace. - // This allows things like `a = hello; 2` or a = (basename ./file.txt) + // This allows things like `a = hello; 2` of if `x: y` to parse correctly. // to work as expected. - if (canBeWord && (ch === 59 /* ; */ || ch === 41 /* ) */ || ch === 58) /* : */) { + if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) { const nextCh = getFullCodePoint(input, pos + 1) - if (isWhitespace(nextCh) || nextCh === 39 /* ' */ || nextCh === -1) { - break - } + if (!isWordChar(nextCh)) break } // Track identifier validity @@ -38,10 +37,17 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack input.acceptToken(isValidIdentifier ? Identifier : Word) }) -const isWhitespace = (ch: number): boolean => { +const isWhiteSpace = (ch: number): boolean => { return ch === 32 /* space */ || ch === 10 /* \n */ || ch === 9 /* tab */ || ch === 13 /* \r */ } +const isWordChar = (ch: number): boolean => { + const closingParen = ch === 41 /* ) */ + const eof = ch === -1 + + return !isWhiteSpace(ch) && !closingParen && !eof +} + const isLowercaseLetter = (ch: number): boolean => { return ch >= 97 && ch <= 122 // a-z }