diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index 429a94b..b907055 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -2,6 +2,7 @@ import { CompilerError } from '#compiler/compilerError.ts' import { parser } from '#parser/shrimp.ts' import * as terms from '#parser/shrimp.terms' import { setGlobals } from '#parser/tokenizer' +import { tokenizeCurlyString } from '#parser/curlyTokenizer' import type { SyntaxNode, Tree } from '@lezer/common' import { assert, errorMessage } from '#utils/utils' import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm' @@ -112,6 +113,9 @@ export class Compiler { return [[`PUSH`, number]] case terms.String: { + if (node.firstChild?.type.id === terms.CurlyString) + return this.#compileCurlyString(value, input) + const { parts, hasInterpolation } = getStringParts(node, input) // Simple string without interpolation or escapes - extract text directly @@ -772,4 +776,26 @@ export class Compiler { return instructions } + + #compileCurlyString(value: string, input: string): ProgramItem[] { + const instructions: ProgramItem[] = [] + const nodes = tokenizeCurlyString(value) + + nodes.forEach((node) => { + if (typeof node === 'string') { + instructions.push(['PUSH', node]) + } else { + const [input, topNode] = node + let child = topNode.firstChild + while (child) { + instructions.push(...this.#compileNode(child, input)) + child = child.nextSibling + } + } + }) + + instructions.push(['STR_CONCAT', nodes.length]) + + return instructions + } } diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index c1dc14b..03858e3 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -155,3 +155,48 @@ describe('dict literals', () => { c=3]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) }) }) + +describe('curly strings', () => { + test('work on one line', () => { + expect('{ one two three }').toEvaluateTo(" one two three ") + }) + + test('work on multiple lines', () => { + expect(`{ + one + two + three + }`).toEvaluateTo("\n one\n two\n three\n ") + }) + + test('can contain other curlies', () => { + expect(`{ + { one } + two + { three } + }`).toEvaluateTo("\n { one }\n two\n { three }\n ") + }) + + test('interpolates variables', () => { + expect(`name = Bob; { Hello $name! }`).toEvaluateTo(` Hello Bob! `) + }) + + test("doesn't interpolate escaped variables ", () => { + expect(`name = Bob; { Hello \\$name }`).toEvaluateTo(` Hello $name `) + expect(`a = 1; b = 2; { sum is \\$(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `) + }) + + test('interpolates expressions', () => { + expect(`a = 1; b = 2; { sum is $(a + b)! }`).toEvaluateTo(` sum is 3! `) + expect(`a = 1; b = 2; { sum is { $(a + b) }! }`).toEvaluateTo(` sum is { 3 }! `) + expect(`a = 1; b = 2; { sum is $(a + (b * b))! }`).toEvaluateTo(` sum is 5! `) + expect(`{ This is $({twisted}). }`).toEvaluateTo(` This is twisted. `) + expect(`{ This is $({{twisted}}). }`).toEvaluateTo(` This is {twisted}. `) + }) + + test('interpolation edge cases', () => { + expect(`{[a=1 b=2 c={wild}]}`).toEvaluateTo(`[a=1 b=2 c={wild}]`) + expect(`a = 1;b = 2;c = 3;{$a $b $c}`).toEvaluateTo(`1 2 3`) + expect(`a = 1;b = 2;c = 3;{$a$b$c}`).toEvaluateTo(`123`) + }) +}) diff --git a/src/compiler/tests/pipe.test.ts b/src/compiler/tests/pipe.test.ts index 1d08dec..06d56c3 100644 --- a/src/compiler/tests/pipe.test.ts +++ b/src/compiler/tests/pipe.test.ts @@ -89,7 +89,7 @@ describe('pipe expressions', () => { test('pipe with prelude function (echo)', () => { expect(` get-msg = do: 'hello' end - get-msg | echo - `).toEvaluateTo(null) + get-msg | length + `).toEvaluateTo(5) }) }) diff --git a/src/compiler/tests/ribbit.test.ts b/src/compiler/tests/ribbit.test.ts index e2bb6c2..def34c4 100644 --- a/src/compiler/tests/ribbit.test.ts +++ b/src/compiler/tests/ribbit.test.ts @@ -83,7 +83,7 @@ end test('custom tags', () => { expect(` -list = tag ul class=list +list = tag ul class='list' ribbit: list: li border-bottom='1px solid black' one diff --git a/src/compiler/utils.ts b/src/compiler/utils.ts index 20afa96..c424be2 100644 --- a/src/compiler/utils.ts +++ b/src/compiler/utils.ts @@ -251,7 +251,9 @@ export const getStringParts = (node: SyntaxNode, input: string) => { return ( child.type.id === terms.StringFragment || child.type.id === terms.Interpolation || - child.type.id === terms.EscapeSeq + child.type.id === terms.EscapeSeq || + child.type.id === terms.CurlyString + ) }) @@ -260,7 +262,8 @@ export const getStringParts = (node: SyntaxNode, input: string) => { if ( part.type.id !== terms.StringFragment && part.type.id !== terms.Interpolation && - part.type.id !== terms.EscapeSeq + part.type.id !== terms.EscapeSeq && + part.type.id !== terms.CurlyString ) { throw new CompilerError( `String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`, diff --git a/src/parser/curlyTokenizer.ts b/src/parser/curlyTokenizer.ts new file mode 100644 index 0000000..00e3ce1 --- /dev/null +++ b/src/parser/curlyTokenizer.ts @@ -0,0 +1,62 @@ +import { parser } from '#parser/shrimp.ts' +import type { SyntaxNode } from '@lezer/common' +import { isIdentStart, isIdentChar } from './tokenizer' + +// Turns a { curly string } into strings and nodes for interpolation +export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNode])[] => { + let pos = 1 + let start = 1 + let char = value[pos] + const tokens: (string | [string, SyntaxNode])[] = [] + + while (pos < value.length) { + if (char === '$') { + // escaped \$ + if (value[pos - 1] === '\\' && value[pos - 2] !== '\\') { + tokens.push(value.slice(start, pos - 1)) + start = pos + char = value[++pos] + continue + } + + tokens.push(value.slice(start, pos)) + start = pos + + if (value[pos + 1] === '(') { + pos++ // slip opening '(' + + char = value[++pos] + if (!char) break + + let depth = 0 + while (char) { + if (char === '(') depth++ + if (char === ')') depth-- + if (depth < 0) break + char = value[++pos] + } + + const input = value.slice(start + 2, pos) // skip '$(' + tokens.push([input, parser.parse(input).topNode]) + start = ++pos // skip ')' + } else { + char = value[++pos] + if (!char) break + if (!isIdentStart(char.charCodeAt(0))) break + + while (char && isIdentChar(char.charCodeAt(0))) + char = value[++pos] + + const input = value.slice(start + 1, pos) // skip '$' + tokens.push([input, parser.parse(input).topNode]) + start = pos-- // backtrack and start over + } + } + + char = value[++pos] + } + + tokens.push(value.slice(start, pos - 1)) + + return tokens +} \ No newline at end of file diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index ac38814..91a9faf 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -37,7 +37,7 @@ finally { @specialize[@name=keyword] } throw { @specialize[@name=keyword] } null { @specialize[@name=Null] } -@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot } +@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, CurlyString } @external specialize {Identifier} specializeKeyword from "./tokenizer" { Do } @precedence { @@ -205,7 +205,9 @@ expression { IdentifierBeforeDot dot (Number | Identifier | ParenExpr) } - String { "'" stringContent* "'" } + String { + "'" stringContent* "'" | CurlyString + } } stringContent { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 716ef1c..b5b68e0 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -23,44 +23,45 @@ export const AssignableIdentifier = 21, Word = 22, IdentifierBeforeDot = 23, - Do = 24, - Comment = 25, - Program = 26, - PipeExpr = 27, - FunctionCall = 28, - DotGet = 29, - Number = 30, - ParenExpr = 31, - IfExpr = 32, - keyword = 70, - ConditionalOp = 34, - String = 35, - StringFragment = 36, - Interpolation = 37, - EscapeSeq = 38, - Boolean = 39, - Regex = 40, - Dict = 41, - NamedArg = 42, - NamedArgPrefix = 43, - FunctionDef = 44, - Params = 45, - NamedParam = 46, - Null = 47, - colon = 48, - CatchExpr = 49, - Block = 51, - FinallyExpr = 52, - Underscore = 55, - Array = 56, - ElseIfExpr = 57, - ElseExpr = 59, - FunctionCallOrIdentifier = 60, - BinOp = 61, - PositionalArg = 62, - WhileExpr = 64, - FunctionCallWithBlock = 66, - TryExpr = 67, - Throw = 69, - CompoundAssign = 71, - Assign = 72 + CurlyString = 24, + Do = 25, + Comment = 26, + Program = 27, + PipeExpr = 28, + FunctionCall = 29, + DotGet = 30, + Number = 31, + ParenExpr = 32, + IfExpr = 33, + keyword = 71, + ConditionalOp = 35, + String = 36, + StringFragment = 37, + Interpolation = 38, + EscapeSeq = 39, + Boolean = 40, + Regex = 41, + Dict = 42, + NamedArg = 43, + NamedArgPrefix = 44, + FunctionDef = 45, + Params = 46, + NamedParam = 47, + Null = 48, + colon = 49, + CatchExpr = 50, + Block = 52, + FinallyExpr = 53, + Underscore = 56, + Array = 57, + ElseIfExpr = 58, + ElseExpr = 60, + FunctionCallOrIdentifier = 61, + BinOp = 62, + PositionalArg = 63, + WhileExpr = 65, + FunctionCallWithBlock = 67, + TryExpr = 68, + Throw = 70, + CompoundAssign = 72, + Assign = 73 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index ffb958e..ea588a3 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,24 +4,24 @@ import {operatorTokenizer} from "./operatorTokenizer" import {tokenizer, specializeKeyword} from "./tokenizer" import {trackScope} from "./parserScopeContext" import {highlighting} from "./highlight" -const spec_Identifier = {__proto__:null,if:66, null:94, catch:100, finally:106, end:108, else:116, while:130, try:136, throw:140} +const spec_Identifier = {__proto__:null,if:68, null:96, catch:102, finally:108, end:110, else:118, while:132, try:138, throw:142} export const parser = LRParser.deserialize({ version: 14, - states: "9[QYQbOOO!dOSO'#DPOOQa'#DV'#DVO#mQbO'#DfO%RQcO'#E^OOQa'#E^'#E^O&XQcO'#E^O'ZQcO'#E]O'qQcO'#E]O)^QRO'#DOO*mQcO'#EWO*wQcO'#EWO+XQbO'#C{O,SOpO'#CyOOQ`'#EX'#EXO,XQbO'#EWO,cQRO'#DuOOQ`'#EW'#EWO,wQQO'#EVOOQ`'#EV'#EVOOQ`'#Dw'#DwQYQbOOO-PQbO'#DYO-[QbO'#C|O.PQbO'#DnO.tQQO'#DqO.PQbO'#DsO.yQbO'#DRO/RQWO'#DSOOOO'#E`'#E`OOOO'#Dx'#DxO/gOSO,59kOOQa,59k,59kOOQ`'#Dy'#DyO/uQbO,5:QO/|QbO'#DWO0WQQO,59qOOQa,5:Q,5:QO0cQbO,5:QOOQa'#E]'#E]OOQ`'#Dl'#DlOOQ`'#El'#ElOOQ`'#EQ'#EQO0mQbO,59dO1gQbO,5:bO.PQbO,59jO.PQbO,59jO.PQbO,59jO.PQbO,5:VO.PQbO,5:VO.PQbO,5:VO1wQRO,59gO2OQRO,59gO2ZQRO,59gO2UQQO,59gO2lQQO,59gO2tObO,59eO3PQbO'#ERO3[QbO,59cO3vQbO,5:[O1gQbO,5:aOOQ`,5:q,5:qOOQ`-E7u-E7uOOQ`'#Dz'#DzO4ZQbO'#DZO4fQbO'#D[OOQO'#D{'#D{O4^QQO'#DZO4tQQO,59tO4yQcO'#E]O6_QRO'#E[O6fQRO'#E[OOQO'#E['#E[O6qQQO,59hO6vQRO,5:YO6}QRO,5:YO3vQbO,5:]O7YQcO,5:_O8UQcO,5:_O8`QcO,5:_OOOO,59m,59mOOOO,59n,59nOOOO-E7v-E7vOOQa1G/V1G/VOOQ`-E7w-E7wO8pQQO1G/]OOQa1G/l1G/lO8{QbO1G/lOOQ`,59r,59rOOQO'#D}'#D}O8pQQO1G/]OOQa1G/]1G/]OOQ`'#EO'#EOO8{QbO1G/lOOQ`-E8O-E8OOOQ`1G/|1G/|OOQa1G/U1G/UO:WQcO1G/UO:_QcO1G/UO:fQcO1G/UOOQa1G/q1G/qO;_QcO1G/qO;iQcO1G/qO;sQcO1G/qOOQa1G/R1G/ROOQa1G/P1G/PO]QbO1G/vOOQ`1G/{1G/{OOQ`-E7x-E7xO>hQQO,59uOOQO,59v,59vOOQO-E7y-E7yO>pQbO1G/`O3vQbO1G/SO3vQbO1G/tO?TQbO1G/wO?`QQO7+$wOOQa7+$w7+$wO?kQbO7+%WOOQa7+%W7+%WOOQO-E7{-E7{OOQ`-E7|-E7|OOQ`'#D|'#D|O?uQQO'#D|O?zQbO'#EiOOQ`,59{,59{O@kQbO'#D_O@pQQO'#DbOOQ`7+%b7+%bO@uQbO7+%bO@zQbO7+%bOASQbO7+$zOA_QbO7+$zOA{QbO7+$nOBTQbO7+%`OOQ`7+%c7+%cOBYQbO7+%cOB_QbO7+%cOOQa<hAN>hOOQ`AN>QAN>QOCuQbOAN>QOCzQbOAN>QOOQ`-E7}-E7}OOQ`AN=tAN=tODSQbOAN=tO-[QbO,5:RO3vQbO,5:TOOQ`AN>iAN>iOOQ`7+%P7+%POOQ`G23lG23lODXQbOG23lPD^QbO'#DgOOQ`G23`G23`ODcQQO1G/mOOQ`1G/o1G/oOOQ`LD)WLD)WO3vQbO7+%XOOQ`<UQbO'#DbO>uQbO1G/wOOQ`1G/|1G/|OOQ`-E7y-E7yO?QQQO,59vOOQO,59w,59wOOQO-E7z-E7zO?YQbO1G/aO4]QbO1G/TO4]QbO1G/uO?mQbO1G/xO?xQQO7+$xOOQa7+$x7+$xO@TQbO7+%XOOQa7+%X7+%XOOQO-E7|-E7|OOQ`-E7}-E7}OOQ`'#D}'#D}O@_QQO'#D}O@dQbO'#EjOOQ`,59|,59|OATQbO'#D`OAYQQO'#DcOOQ`7+%c7+%cOA_QbO7+%cOAdQbO7+%cOAlQbO7+${OAwQbO7+${OBeQbO7+$oOBmQbO7+%aOOQ`7+%d7+%dOBrQbO7+%dOBwQbO7+%dOOQa<iAN>iOOQ`AN>RAN>ROD_QbOAN>RODdQbOAN>ROOQ`-E8O-E8OOOQ`AN=uAN=uODlQbOAN=uO-kQbO,5:SO4]QbO,5:UOOQ`AN>jAN>jOOQ`7+%Q7+%QOOQ`G23mG23mODqQbOG23mPDvQbO'#DhOOQ`G23aG23aOD{QQO1G/nOOQ`1G/p1G/pOOQ`LD)XLD)XO4]QbO7+%YOOQ`<c#Y#o,w#o;'S#{;'S;=`$d<%lO#{U>j[wQtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^?g[#VWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^@d[#XWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^Aa^#WWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#f,w#f#gB]#g#o,w#o;'S#{;'S;=`$d<%lO#{UBb^tSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#i,w#i#j=b#j#o,w#o;'S#{;'S;=`$d<%lO#{UCeU!aQtSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~C|O#a~", - tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!|~~", 11)], - topRules: {"Program":[0,26]}, + tokenData: "C|~R|OX#{XY$jYZ%TZp#{pq$jqs#{st%ntu'tuw#{wx'yxy(Oyz(iz{#{{|)S|}#{}!O+v!O!P#{!P!Q.]!Q![)q![!]6x!]!^%T!^!}#{!}#O7c#O#P9X#P#Q9^#Q#R#{#R#S9w#S#T#{#T#Y,w#Y#Z:b#Z#b,w#b#c?`#c#f,w#f#g@]#g#h,w#h#iAY#i#o,w#o#p#{#p#qC^#q;'S#{;'S;=`$d<%l~#{~O#{~~CwS$QUuSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{S$gP;=`<%l#{^$qUuS!yYOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U%[UuS#]QOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{^%sWuSOp#{pq&]qt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{^&dZjYuSOY&]YZ#{Zt&]tu'Vuw&]wx'Vx#O&]#O#P'V#P;'S&];'S;=`'n<%lO&]Y'[SjYOY'VZ;'S'V;'S;=`'h<%lO'VY'kP;=`<%l'V^'qP;=`<%l&]~'yO#U~~(OO#S~U(VUuS#OQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U(pUuS#`QOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U)XWuSOt#{uw#{x!Q#{!Q![)q![#O#{#P;'S#{;'S;=`$d<%lO#{U)xYuSoQOt#{uw#{x!O#{!O!P*h!P!Q#{!Q![)q![#O#{#P;'S#{;'S;=`$d<%lO#{U*mWuSOt#{uw#{x!Q#{!Q![+V![#O#{#P;'S#{;'S;=`$d<%lO#{U+^WuSoQOt#{uw#{x!Q#{!Q![+V![#O#{#P;'S#{;'S;=`$d<%lO#{U+{^uSOt#{uw#{x}#{}!O,w!O!Q#{!Q![)q![!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{U,|[uSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{U-yU|QuSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U.bWuSOt#{uw#{x!P#{!P!Q.z!Q#O#{#P;'S#{;'S;=`$d<%lO#{U/P^uSOY/{YZ#{Zt/{tu1Ouw/{wx1Ox!P/{!P!Q#{!Q!}/{!}#O5q#O#P3^#P;'S/{;'S;=`6r<%lO/{U0S^uSyQOY/{YZ#{Zt/{tu1Ouw/{wx1Ox!P/{!P!Q3s!Q!}/{!}#O5q#O#P3^#P;'S/{;'S;=`6r<%lO/{Q1TXyQOY1OZ!P1O!P!Q1p!Q!}1O!}#O2_#O#P3^#P;'S1O;'S;=`3m<%lO1OQ1sP!P!Q1vQ1{UyQ#Z#[1v#]#^1v#a#b1v#g#h1v#i#j1v#m#n1vQ2bVOY2_Z#O2_#O#P2w#P#Q1O#Q;'S2_;'S;=`3W<%lO2_Q2zSOY2_Z;'S2_;'S;=`3W<%lO2_Q3ZP;=`<%l2_Q3aSOY1OZ;'S1O;'S;=`3m<%lO1OQ3pP;=`<%l1OU3xWuSOt#{uw#{x!P#{!P!Q4b!Q#O#{#P;'S#{;'S;=`$d<%lO#{U4ibuSyQOt#{uw#{x#O#{#P#Z#{#Z#[4b#[#]#{#]#^4b#^#a#{#a#b4b#b#g#{#g#h4b#h#i#{#i#j4b#j#m#{#m#n4b#n;'S#{;'S;=`$d<%lO#{U5v[uSOY5qYZ#{Zt5qtu2_uw5qwx2_x#O5q#O#P2w#P#Q/{#Q;'S5q;'S;=`6l<%lO5qU6oP;=`<%l5qU6uP;=`<%l/{U7PUuS!RQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U7jW#[QuSOt#{uw#{x!_#{!_!`8S!`#O#{#P;'S#{;'S;=`$d<%lO#{U8XVuSOt#{uw#{x#O#{#P#Q8n#Q;'S#{;'S;=`$d<%lO#{U8uU#ZQuSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~9^O#V~U9eU#_QuSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U:OUuS!YQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U:g]uSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#U;`#U#o,w#o;'S#{;'S;=`$d<%lO#{U;e^uSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#`,w#`#ac#Y#o,w#o;'S#{;'S;=`$d<%lO#{U>j[xQuSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^?g[#WWuSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^@d[#YWuSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^Aa^#XWuSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#f,w#f#gB]#g#o,w#o;'S#{;'S;=`$d<%lO#{UBb^uSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#i,w#i#j=b#j#o,w#o;'S#{;'S;=`$d<%lO#{UCeU!bQuSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~C|O#b~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!}~~", 11)], + topRules: {"Program":[0,27]}, specialized: [{term: 20, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 20, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 1634 + tokenPrec: 1658 }) diff --git a/src/parser/tests/strings.test.ts b/src/parser/tests/strings.test.ts index 3f78f56..22f780b 100644 --- a/src/parser/tests/strings.test.ts +++ b/src/parser/tests/strings.test.ts @@ -127,3 +127,34 @@ describe('string escape sequences', () => { `) }) }) + +describe('curly strings', () => { + test('work on one line', () => { + expect('{ one two three }').toMatchTree(` + String + CurlyString { one two three } + `) + }) + + test('work on multiple lines', () => { + expect(`{ + one + two + three }`).toMatchTree(` + String + CurlyString { + one + two + three }`) + }) + + test('can contain other curlies', () => { + expect(`{ { one } + two + { three } }`).toMatchTree(` + String + CurlyString { { one } + two + { three } }`) + }) +}) \ No newline at end of file diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index ba8da48..9403179 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -1,5 +1,5 @@ import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' -import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do } from './shrimp.terms' +import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString } from './shrimp.terms' // doobie doobie do (we need the `do` keyword to know when we're defining params) export function specializeKeyword(ident: string) { @@ -18,6 +18,10 @@ export const setGlobals = (newGlobals: string[]) => { export const tokenizer = new ExternalTokenizer( (input: InputStream, stack: Stack) => { const ch = getFullCodePoint(input, 0) + + // Handle curly strings + if (ch === 123 /* { */) return consumeCurlyString(input, stack) + if (!isWordChar(ch)) return // Don't consume things that start with digits - let Number token handle it @@ -26,7 +30,7 @@ export const tokenizer = new ExternalTokenizer( // Don't consume things that start with - or + followed by a digit (negative/positive numbers) if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return - const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch) + const isValidStart = isIdentStart(ch) const canBeWord = stack.canShift(Word) // Consume all word characters, tracking if it remains a valid identifier @@ -119,13 +123,7 @@ const consumeWordToken = ( } // Track identifier validity: must be lowercase, digit, dash, or emoji/unicode - if ( - !isLowercaseLetter(ch) && - !isDigit(ch) && - ch !== 45 /* - */ && - ch !== 63 /* ? */ && - !isEmojiOrUnicode(ch) - ) { + if (!isIdentChar(ch)) { if (!canBeWord) break isValidIdentifier = false } @@ -157,6 +155,32 @@ const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: bool return pos } +// Consumes { curly strings } and tracks braces so you can { have { braces { inside { braces } } } +const consumeCurlyString = (input: InputStream, stack: Stack) => { + if (!stack.canShift(CurlyString)) return + + let depth = 0 + let pos = 0 + + while (true) { + const ch = input.peek(pos) + if (ch < 0) return // EOF - invalid + + if (ch === 123) depth++ // { + else if (ch === 125) { // } + depth-- + if (depth === 0) { + pos++ // consume final } + break + } + } + + pos++ + } + + input.acceptToken(CurlyString, pos) +} + // Check if this identifier is in scope (for property access detection) // Returns IdentifierBeforeDot token if in scope, null otherwise const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number | null => { @@ -219,6 +243,14 @@ const chooseIdentifierToken = (input: InputStream, stack: Stack): number => { } // Character classification helpers +export const isIdentStart = (ch: number): boolean => { + return isLowercaseLetter(ch) || isEmojiOrUnicode(ch) +} + +export const isIdentChar = (ch: number): boolean => { + return isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */ || isEmojiOrUnicode(ch) +} + const isWhiteSpace = (ch: number): boolean => { return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 13 /* \r */ }