diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index 2cb076b..7cf9255 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -2,6 +2,7 @@ import { CompilerError } from '#compiler/compilerError.ts' import { parser } from '#parser/shrimp.ts' import * as terms from '#parser/shrimp.terms' import { setGlobals } from '#parser/tokenizer' +import { tokenizeCurlyString } from '#parser/curlyTokenizer' import type { SyntaxNode, Tree } from '@lezer/common' import { assert, errorMessage } from '#utils/utils' import { toBytecode, type Bytecode, type ProgramItem, bytecodeToString } from 'reefvm' @@ -123,6 +124,9 @@ export class Compiler { return [[`PUSH`, numberValue]] case terms.String: { + if (node.firstChild?.type.id === terms.CurlyString) + return this.#compileCurlyString(value, input) + const { parts, hasInterpolation } = getStringParts(node, input) // Simple string without interpolation or escapes - extract text directly @@ -853,4 +857,26 @@ export class Compiler { return instructions } + + #compileCurlyString(value: string, input: string): ProgramItem[] { + const instructions: ProgramItem[] = [] + const nodes = tokenizeCurlyString(value) + + nodes.forEach((node) => { + if (typeof node === 'string') { + instructions.push(['PUSH', node]) + } else { + const [input, topNode] = node + let child = topNode.firstChild + while (child) { + instructions.push(...this.#compileNode(child, input)) + child = child.nextSibling + } + } + }) + + instructions.push(['STR_CONCAT', nodes.length]) + + return instructions + } } diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index 45f9fba..96830cf 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -193,3 +193,69 @@ describe('dict literals', () => { c=3]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) }) }) + +describe('curly strings', () => { + test('work on one line', () => { + expect('{ one two three }').toEvaluateTo(" one two three ") + }) + + test('work on multiple lines', () => { + expect(`{ + one + two + three + }`).toEvaluateTo("\n one\n two\n three\n ") + }) + + test('can contain other curlies', () => { + expect(`{ + { one } + two + { three } + }`).toEvaluateTo("\n { one }\n two\n { three }\n ") + }) + + test('interpolates variables', () => { + expect(`name = Bob; { Hello $name! }`).toEvaluateTo(` Hello Bob! `) + }) + + test("doesn't interpolate escaped variables ", () => { + expect(`name = Bob; { Hello \\$name }`).toEvaluateTo(` Hello $name `) + expect(`a = 1; b = 2; { sum is \\$(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `) + }) + + test('interpolates expressions', () => { + expect(`a = 1; b = 2; { sum is $(a + b)! }`).toEvaluateTo(` sum is 3! `) + expect(`a = 1; b = 2; { sum is { $(a + b) }! }`).toEvaluateTo(` sum is { 3 }! `) + expect(`a = 1; b = 2; { sum is $(a + (b * b))! }`).toEvaluateTo(` sum is 5! `) + expect(`{ This is $({twisted}). }`).toEvaluateTo(` This is twisted. `) + expect(`{ This is $({{twisted}}). }`).toEvaluateTo(` This is {twisted}. `) + }) + + test('interpolation edge cases', () => { + expect(`{[a=1 b=2 c={wild}]}`).toEvaluateTo(`[a=1 b=2 c={wild}]`) + expect(`a = 1;b = 2;c = 3;{$a $b $c}`).toEvaluateTo(`1 2 3`) + expect(`a = 1;b = 2;c = 3;{$a$b$c}`).toEvaluateTo(`123`) + }) +}) + +describe('double quoted strings', () => { + test("work", () => { + expect(`"hello world"`).toEvaluateTo('hello world') + }) + + test("don't interpolate", () => { + expect(`"hello $world"`).toEvaluateTo('hello $world') + expect(`"hello $(1 + 2)"`).toEvaluateTo('hello $(1 + 2)') + }) + + test("equal regular strings", () => { + expect(`"hello world" == 'hello world'`).toEvaluateTo(true) + }) + + test("can contain newlines", () => { + expect(` + "hello + world"`).toEvaluateTo('hello\n world') + }) +}) \ No newline at end of file diff --git a/src/compiler/utils.ts b/src/compiler/utils.ts index 20afa96..c424be2 100644 --- a/src/compiler/utils.ts +++ b/src/compiler/utils.ts @@ -251,7 +251,9 @@ export const getStringParts = (node: SyntaxNode, input: string) => { return ( child.type.id === terms.StringFragment || child.type.id === terms.Interpolation || - child.type.id === terms.EscapeSeq + child.type.id === terms.EscapeSeq || + child.type.id === terms.CurlyString + ) }) @@ -260,7 +262,8 @@ export const getStringParts = (node: SyntaxNode, input: string) => { if ( part.type.id !== terms.StringFragment && part.type.id !== terms.Interpolation && - part.type.id !== terms.EscapeSeq + part.type.id !== terms.EscapeSeq && + part.type.id !== terms.CurlyString ) { throw new CompilerError( `String child must be StringFragment, Interpolation, or EscapeSeq, got ${part.type.name}`, diff --git a/src/parser/curlyTokenizer.ts b/src/parser/curlyTokenizer.ts new file mode 100644 index 0000000..00e3ce1 --- /dev/null +++ b/src/parser/curlyTokenizer.ts @@ -0,0 +1,62 @@ +import { parser } from '#parser/shrimp.ts' +import type { SyntaxNode } from '@lezer/common' +import { isIdentStart, isIdentChar } from './tokenizer' + +// Turns a { curly string } into strings and nodes for interpolation +export const tokenizeCurlyString = (value: string): (string | [string, SyntaxNode])[] => { + let pos = 1 + let start = 1 + let char = value[pos] + const tokens: (string | [string, SyntaxNode])[] = [] + + while (pos < value.length) { + if (char === '$') { + // escaped \$ + if (value[pos - 1] === '\\' && value[pos - 2] !== '\\') { + tokens.push(value.slice(start, pos - 1)) + start = pos + char = value[++pos] + continue + } + + tokens.push(value.slice(start, pos)) + start = pos + + if (value[pos + 1] === '(') { + pos++ // slip opening '(' + + char = value[++pos] + if (!char) break + + let depth = 0 + while (char) { + if (char === '(') depth++ + if (char === ')') depth-- + if (depth < 0) break + char = value[++pos] + } + + const input = value.slice(start + 2, pos) // skip '$(' + tokens.push([input, parser.parse(input).topNode]) + start = ++pos // skip ')' + } else { + char = value[++pos] + if (!char) break + if (!isIdentStart(char.charCodeAt(0))) break + + while (char && isIdentChar(char.charCodeAt(0))) + char = value[++pos] + + const input = value.slice(start + 1, pos) // skip '$' + tokens.push([input, parser.parse(input).topNode]) + start = pos-- // backtrack and start over + } + } + + char = value[++pos] + } + + tokens.push(value.slice(start, pos - 1)) + + return tokens +} \ No newline at end of file diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index f4ac3e7..c01fd18 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -12,6 +12,7 @@ @precedence { Number Regex } StringFragment { !['\\$]+ } + DoubleQuote { '"' !["]* '"' } NamedArgPrefix { $[a-z] $[a-z0-9-]* "=" } Number { ("-" | "+")? "0x" $[0-9a-fA-F]+ | @@ -41,7 +42,7 @@ finally { @specialize[@name=keyword] } throw { @specialize[@name=keyword] } null { @specialize[@name=Null] } -@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot } +@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, CurlyString } @external specialize {Identifier} specializeKeyword from "./tokenizer" { Do } @precedence { @@ -233,7 +234,9 @@ expression { IdentifierBeforeDot dot (Number | Identifier | ParenExpr) } - String { "'" stringContent* "'" } + String { + "'" stringContent* "'" | CurlyString | DoubleQuote + } } stringContent { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 3da47bb..0f49afe 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -31,45 +31,47 @@ export const AssignableIdentifier = 29, Word = 30, IdentifierBeforeDot = 31, - Do = 32, - Comment = 33, - Program = 34, - PipeExpr = 35, - WhileExpr = 37, - keyword = 79, - ConditionalOp = 39, - ParenExpr = 40, - FunctionCallWithNewlines = 41, - DotGet = 42, - Number = 43, - PositionalArg = 44, - FunctionDef = 45, - Params = 46, - NamedParam = 47, - NamedArgPrefix = 48, - String = 49, - StringFragment = 50, - Interpolation = 51, - EscapeSeq = 52, - Boolean = 53, - Null = 54, - colon = 55, - CatchExpr = 56, - Block = 58, - FinallyExpr = 59, - Underscore = 62, - NamedArg = 63, - IfExpr = 64, - FunctionCall = 66, - ElseIfExpr = 67, - ElseExpr = 69, - FunctionCallOrIdentifier = 70, - BinOp = 71, - Regex = 72, - Dict = 73, - Array = 74, - FunctionCallWithBlock = 75, - TryExpr = 76, - Throw = 78, - CompoundAssign = 80, - Assign = 81 + CurlyString = 32, + Do = 33, + Comment = 34, + Program = 35, + PipeExpr = 36, + WhileExpr = 38, + keyword = 81, + ConditionalOp = 40, + ParenExpr = 41, + FunctionCallWithNewlines = 42, + DotGet = 43, + Number = 44, + PositionalArg = 45, + FunctionDef = 46, + Params = 47, + NamedParam = 48, + NamedArgPrefix = 49, + String = 50, + StringFragment = 51, + Interpolation = 52, + EscapeSeq = 53, + DoubleQuote = 54, + Boolean = 55, + Null = 56, + colon = 57, + CatchExpr = 58, + Block = 60, + FinallyExpr = 61, + Underscore = 64, + NamedArg = 65, + IfExpr = 66, + FunctionCall = 68, + ElseIfExpr = 69, + ElseExpr = 71, + FunctionCallOrIdentifier = 72, + BinOp = 73, + Regex = 74, + Dict = 75, + Array = 76, + FunctionCallWithBlock = 77, + TryExpr = 78, + Throw = 80, + CompoundAssign = 82, + Assign = 83 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 901d667..3f09fb1 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,24 +4,24 @@ import {operatorTokenizer} from "./operatorTokenizer" import {tokenizer, specializeKeyword} from "./tokenizer" import {trackScope} from "./parserScopeContext" import {highlighting} from "./highlight" -const spec_Identifier = {__proto__:null,while:76, null:108, catch:114, finally:120, end:122, if:130, else:136, try:154, throw:158} +const spec_Identifier = {__proto__:null,while:78, null:112, catch:118, finally:124, end:126, if:134, else:140, try:158, throw:162} export const parser = LRParser.deserialize({ version: 14, - states: "vQcO,5:hO?dQcO,5:hOOQa1G/^1G/^OOOO,59{,59{OOOO,59|,59|OOOO-E8T-E8TOOQa1G/e1G/eOOQ`,5:X,5:XOOQ`-E8W-E8WOOQa1G/{1G/{OA`QcO1G/{OAjQcO1G/{OBxQcO1G/{OCSQcO1G/{OCaQcO1G/{OOQa1G/Z1G/ZODrQcO1G/ZODyQcO1G/ZOEQQcO1G/ZOFPQcO1G/ZOEXQcO1G/ZOOQ`-E8Q-E8QOFgQRO1G/[OFqQQO1G/[OFvQQO1G/[OGOQQO1G/[OGZQRO1G/[OGbQRO1G/[OGiQbO,59qOGsQQO1G/[OOQa1G/[1G/[OG{QQO1G/}OOQa1G0O1G0OOHWQbO1G0OOOQO'#E['#E[OG{QQO1G/}OOQa1G/}1G/}OOQ`'#E]'#E]OHWQbO1G0OOHbQbO1G0VOH|QbO1G0UOIhQbO'#DhOIyQbO'#DhOJ^QbO1G0POOQ`-E8P-E8POOQ`,5:m,5:mOOQ`-E8R-E8ROJiQQO,59vOOQO,59w,59wOOQO-E8S-E8SOJqQbO1G/aO9jQbO1G/tO9jQbO1G/XOJxQbO1G0QOKTQQO7+$vOOQa7+$v7+$vOK]QQO1G/]OKeQQO7+%iOOQa7+%i7+%iOKpQbO7+%jOOQa7+%j7+%jOOQO-E8Y-E8YOOQ`-E8Z-E8ZOOQ`'#EW'#EWOKzQQO'#EWOLSQbO'#EpOOQ`,5:S,5:SOLgQbO'#DfOLlQQO'#DiOOQ`7+%k7+%kOLqQbO7+%kOLvQbO7+%kOMOQbO7+${OM^QbO7+${OMnQbO7+%`OMvQbO7+$sOOQ`7+%l7+%lOM{QbO7+%lONQQbO7+%lOOQa<qAN>qOOQ`AN>RAN>RO!![QbOAN>RO!!aQbOAN>ROOQ`-E8X-E8XOOQ`AN>fAN>fO!!iQbOAN>fO2TQbO,5:]O9jQbO,5:_OOQ`AN>rAN>rPGiQbO'#ESOOQ`7+%W7+%WOOQ`G23mG23mO!!nQbOG23mP! nQbO'#DqOOQ`G24QG24QO!!sQQO1G/wOOQ`1G/y1G/yOOQ`LD)XLD)XO9jQbO7+%cOOQ`<UOT}OU!OOj!POt!pa#Y!pa#k!pa!Z!pa!^!pa!_!pa#g!pa!f!pa~O^xOR!iiS!iid!iie!iif!iig!iih!iii!iit!ii#Y!ii#k!ii#g!ii!Z!ii!^!ii!_!ii!f!ii~OP!iiQ!ii~P@XOPyOQyO~P@XOPyOQyOd!iie!iif!iig!iih!iii!iit!ii#Y!ii#k!ii#g!ii!Z!ii!^!ii!_!ii!f!ii~OR!iiS!ii~PAtORzOSzO^xO~PAtORzOSzO~PAtOW|OX|OY|OZ|O[|O]|OTwijwitwi#Ywi#kwi#gwi!Xwi!Zwi!^wi!_wi!fwi~OU!OO~PCkOU!OO~PC}OUwi~PCkOT}OU!OOjwitwi#Ywi#kwi#gwi!Xwi!Zwi!^wi!_wi!fwi~OW|OX|OY|OZ|O[|O]|O~PEXO#Y!QO#g$QO~P*RO#g$QO~O#g$QOt#UX~O!X!cO#g$QOt#UX~O#g$QO~P.WO#g$QO~P7WOpfO!`rO~P,kO#Y!QO#g$QO~O!QsO#Y#kO#j$TO~O#Y#nO#j$VO~P2xOt!fO#Y!si#k!si!Z!si!^!si!_!si#g!si!f!si~Ot!fO#Y!ri#k!ri!Z!ri!^!ri!_!ri#g!ri!f!ri~Ot!fO!Z![X!^![X!_![X!f![X~O#Y$YO!Z#dP!^#dP!_#dP!f#dP~P8cO!Z$^O!^$_O!_$`O~O!Q!jO!X!Oa~O#Y$dO~P8cO!Z$^O!^$_O!_$gO~O#Y!QO#g$jO~O#Y!QO#gyi~O!QsO#Y#kO#j$mO~O#Y#nO#j$nO~P2xOt!fO#Y$oO~O#Y$YO!Z#dX!^#dX!_#dX!f#dX~P8cOl$qO~O!X$rO~O!_$sO~O!^$_O!_$sO~Ot!fO!Z$^O!^$_O!_$uO~O#Y$YO!Z#dP!^#dP!_#dP~P8cO!_$|O!f${O~O!_%OO~O!_%PO~O!^$_O!_%PO~OpfO!`rO#gyq~P,kO#Y!QO#gyq~O!X%UO~O!_%WO~O!_%XO~O!^$_O!_%XO~O!Z$^O!^$_O!_%XO~O!_%]O!f${O~O!X%`O!c%_O~O!_%]O~O!_%aO~OpfO!`rO#gyy~P,kO!_%dO~O!^$_O!_%dO~O!_%gO~O!_%jO~O!X%kO~O{!j~", - goto: "8f#gPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP#hP$RP$h%f&t&zP(U(b)[)_P)eP*l*lPPP*pP*|+fPPP+|#hP,f-PP-T-Z-pP.g/k$R$RP$RP$R$R0q0w1T1w1}2X2_2f2l2v2|3WPPP3b3f4Z6PPPP7ZP7kPPPPP7o7u7{r`Oe!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kQ!WWR#a!Rw`OWe!R!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kr^Oe!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kQ!ZWS!og%_Q!thQ!xjQ#W!OQ#Y}Q#]!PR#d!RvSOeg!a!b!c!f!u#s#{#|#}$[$d$r%U%_%`%k!WZRSYhjsvxyz{|}!O!P!S!T!]!`!n#e#j#o$U$k%S%bS!TW!RQ!ykR!zlQ!VWR#`!RrROe!a!b!c!f!u#s#{#|#}$[$d$r%U%`%k!WwRSYhjsvxyz{|}!O!P!S!T!]!`!n#e#j#o$U$k%S%bS!SW!RT!ng%_etRSv!S!T!n#e$k%S%br`Oe!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kdrRSv!S!T!n#e$k%S%bQ!WWQ#OsR#a!RR!mfX!kf!i!l#x#SZORSWYeghjsvxyz{|}!O!P!R!S!T!]!`!a!b!c!f!n!u#e#j#o#s#{#|#}$U$[$d$k$r%S%U%_%`%b%kR#y!jTnQpQ$b#tQ$i$OQ$w$cR%Z$xQ#t!cQ$O!uQ$e#|Q$f#}Q%V$rQ%c%UQ%i%`R%l%kQ$a#tQ$h$OQ$t$bQ$v$cQ%Q$iS%Y$w$xR%e%ZdtRSv!S!T!n#e$k%S%bQ!^YQ#h!]X#k!^#h#l$SvTOWe!R!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kT!qg%_T$y$e$zQ$}$eR%^$zwTOWe!R!a!b!c!f!u#s#{#|#}$[$d$r%U%`%krVOe!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kQ!UWQ!wjQ#QyQ#TzQ#V{R#_!R#TZORSWYeghjsvxyz{|}!O!P!R!S!T!]!`!a!b!c!f!n!u#e#j#o#s#{#|#}$U$[$d$k$r%S%U%_%`%b%k![ZRSYghjsvxyz{|}!O!P!S!T!]!`!n#e#j#o$U$k%S%_%bw[OWe!R!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kQeOR!ge^!db![#p#q#r$Z$cR#u!dQ!RWQ!]Y`#^!R!]#e#f$P$k%S%bS#e!S!TS#f!U!ZS$P#_#dQ$k$RR%S$lQ!ifR#w!iQ!lfQ#x!iT#z!l#xQpQR!|pS$[#s$dR$p$[Q$l$RR%T$lYvRS!S!T!nR#PvQ$z$eR%[$zQ#l!^Q$S#hT$W#l$SQ#o!`Q$U#jT$X#o$UTdOeSbOeS![W!RQ#p!aQ#q!b`#r!c!u#|#}$r%U%`%kQ#v!fU$Z#s$[$dR$c#{vUOWe!R!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kdrRSv!S!T!n#e$k%S%bQ!`YS!pg%_Q!shQ!vjQ#OsQ#QxQ#RyQ#SzQ#U{Q#W|Q#X}Q#Z!OQ#[!PQ#j!]X#n!`#j#o$Ur]Oe!a!b!c!f!u#s#{#|#}$[$d$r%U%`%k![wRSYghjsvxyz{|}!O!P!S!T!]!`!n#e#j#o$U$k%S%_%bQ!YWR#c!R[uRSv!S!T!nQ$R#eV%R$k%S%bToQpQ$]#sR$x$dQ!rgR%h%_raOe!a!b!c!f!u#s#{#|#}$[$d$r%U%`%kQ!XWR#b!R", - nodeNames: "⚠ Star Slash Plus Minus And Or Eq EqEq Neq Lt Lte Gt Gte Modulo PlusEq MinusEq StarEq SlashEq ModuloEq Band Bor Bxor Shl Shr Ushr NullishCoalesce NullishEq Identifier AssignableIdentifier Word IdentifierBeforeDot Do Comment Program PipeExpr operator WhileExpr keyword ConditionalOp ParenExpr FunctionCall DotGet Number PositionalArg FunctionDef Params NamedParam NamedArgPrefix String StringFragment Interpolation EscapeSeq Boolean Null colon CatchExpr keyword Block FinallyExpr keyword keyword Underscore NamedArg IfExpr keyword FunctionCall ElseIfExpr keyword ElseExpr FunctionCallOrIdentifier BinOp Regex Dict Array FunctionCallWithBlock TryExpr keyword Throw keyword CompoundAssign Assign", - maxTerm: 119, + states: "UQQO,5:[O>ZQRO,59nO>bQRO,59nO:lQbO,5:hO>pQcO,5:jO@OQcO,5:jO@lQcO,5:jOOQa1G/_1G/_OOOO,59|,59|OOOO,59},59}OOOO-E8V-E8VOOQa1G/f1G/fOOQ`,5:Z,5:ZOOQ`-E8Y-E8YOOQa1G/}1G/}OBhQcO1G/}OBrQcO1G/}ODQQcO1G/}OD[QcO1G/}ODiQcO1G/}OOQa1G/[1G/[OEzQcO1G/[OFRQcO1G/[OFYQcO1G/[OGXQcO1G/[OFaQcO1G/[OOQ`-E8S-E8SOGoQRO1G/]OGyQQO1G/]OHOQQO1G/]OHWQQO1G/]OHcQRO1G/]OHjQRO1G/]OHqQbO,59rOH{QQO1G/]OOQa1G/]1G/]OITQQO1G0POOQa1G0Q1G0QOI`QbO1G0QOOQO'#E^'#E^OITQQO1G0POOQa1G0P1G0POOQ`'#E_'#E_OI`QbO1G0QOIjQbO1G0XOJUQbO1G0WOJpQbO'#DjOKRQbO'#DjOKfQbO1G0ROOQ`-E8R-E8ROOQ`,5:o,5:oOOQ`-E8T-E8TOKqQQO,59wOOQO,59x,59xOOQO-E8U-E8UOKyQbO1G/bO:lQbO1G/vO:lQbO1G/YOLQQbO1G0SOL]QQO7+$wOOQa7+$w7+$wOLeQQO1G/^OLmQQO7+%kOOQa7+%k7+%kOLxQbO7+%lOOQa7+%l7+%lOOQO-E8[-E8[OOQ`-E8]-E8]OOQ`'#EY'#EYOMSQQO'#EYOM[QbO'#ErOOQ`,5:U,5:UOMoQbO'#DhOMtQQO'#DkOOQ`7+%m7+%mOMyQbO7+%mONOQbO7+%mONWQbO7+$|ONfQbO7+$|ONvQbO7+%bO! OQbO7+$tOOQ`7+%n7+%nO! TQbO7+%nO! YQbO7+%nOOQa<sAN>sOOQ`AN>SAN>SO!#dQbOAN>SO!#iQbOAN>SOOQ`-E8Z-E8ZOOQ`AN>hAN>hO!#qQbOAN>hO2sQbO,5:_O:lQbO,5:aOOQ`AN>tAN>tPHqQbO'#EUOOQ`7+%Y7+%YOOQ`G23nG23nO!#vQbOG23nP!!vQbO'#DsOOQ`G24SG24SO!#{QQO1G/yOOQ`1G/{1G/{OOQ`LD)YLD)YO:lQbO7+%eOOQ`<`#Z#be_!SSOt$Ouw$Ox}$O}!Od#S#T$R#T#Y>}#Y#Z@i#Z#b>}#b#cFV#c#f>}#f#gGY#g#h>}#h#iH]#i#o>}#o#p$R#p#qJm#q;'S$R;'S;=`$j<%l~$R~O$R~~KWS$WU!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RS$mP;=`<%l$R^$wU!TS#UYOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU%bU!TS#[QOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU%yZ!TSOr%trs&lst%ttu'Vuw%twx'Vx#O%t#O#P'V#P;'S%t;'S;=`'t<%lO%tU&sU!WQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RQ'YTOr'Vrs'is;'S'V;'S;=`'n<%lO'VQ'nO!WQQ'qP;=`<%l'VU'wP;=`<%l%t^(RZrY!TSOY'zYZ$RZt'ztu(tuw'zwx(tx#O'z#O#P(t#P;'S'z;'S;=`)]<%lO'zY(ySrYOY(tZ;'S(t;'S;=`)V<%lO(tY)YP;=`<%l(t^)`P;=`<%l'z~)hO#a~~)mO#_~U)tU!TS#ZQOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU*_U!TS#iQOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU*vX!TSOt$Ruw$Rx!Q$R!Q!R+c!R![.Q![#O$R#P;'S$R;'S;=`$j<%lO$RU+j`!TS|QOt$Ruw$Rx!O$R!O!P,l!P!Q$R!Q![.Q![#O$R#P#R$R#R#S.}#S#U$R#U#V/l#V#l$R#l#m1Q#m;'S$R;'S;=`$j<%lO$RU,qW!TSOt$Ruw$Rx!Q$R!Q![-Z![#O$R#P;'S$R;'S;=`$j<%lO$RU-bY!TS|QOt$Ruw$Rx!Q$R!Q![-Z![#O$R#P#R$R#R#S,l#S;'S$R;'S;=`$j<%lO$RU.X[!TS|QOt$Ruw$Rx!O$R!O!P,l!P!Q$R!Q![.Q![#O$R#P#R$R#R#S.}#S;'S$R;'S;=`$j<%lO$RU/SW!TSOt$Ruw$Rx!Q$R!Q![.Q![#O$R#P;'S$R;'S;=`$j<%lO$RU/qX!TSOt$Ruw$Rx!Q$R!Q!R0^!R!S0^!S#O$R#P;'S$R;'S;=`$j<%lO$RU0eX!TS|QOt$Ruw$Rx!Q$R!Q!R0^!R!S0^!S#O$R#P;'S$R;'S;=`$j<%lO$RU1V[!TSOt$Ruw$Rx!Q$R!Q![1{![!c$R!c!i1{!i#O$R#P#T$R#T#Z1{#Z;'S$R;'S;=`$j<%lO$RU2S[!TS|QOt$Ruw$Rx!Q$R!Q![1{![!c$R!c!i1{!i#O$R#P#T$R#T#Z1{#Z;'S$R;'S;=`$j<%lO$RU2}W!TSOt$Ruw$Rx!P$R!P!Q3g!Q#O$R#P;'S$R;'S;=`$j<%lO$RU3l^!TSOY4hYZ$RZt4htu5kuw4hwx5kx!P4h!P!Q$R!Q!}4h!}#O:^#O#P7y#P;'S4h;'S;=`;_<%lO4hU4o^!TS!lQOY4hYZ$RZt4htu5kuw4hwx5kx!P4h!P!Q8`!Q!}4h!}#O:^#O#P7y#P;'S4h;'S;=`;_<%lO4hQ5pX!lQOY5kZ!P5k!P!Q6]!Q!}5k!}#O6z#O#P7y#P;'S5k;'S;=`8Y<%lO5kQ6`P!P!Q6cQ6hU!lQ#Z#[6c#]#^6c#a#b6c#g#h6c#i#j6c#m#n6cQ6}VOY6zZ#O6z#O#P7d#P#Q5k#Q;'S6z;'S;=`7s<%lO6zQ7gSOY6zZ;'S6z;'S;=`7s<%lO6zQ7vP;=`<%l6zQ7|SOY5kZ;'S5k;'S;=`8Y<%lO5kQ8]P;=`<%l5kU8eW!TSOt$Ruw$Rx!P$R!P!Q8}!Q#O$R#P;'S$R;'S;=`$j<%lO$RU9Ub!TS!lQOt$Ruw$Rx#O$R#P#Z$R#Z#[8}#[#]$R#]#^8}#^#a$R#a#b8}#b#g$R#g#h8}#h#i$R#i#j8}#j#m$R#m#n8}#n;'S$R;'S;=`$j<%lO$RU:c[!TSOY:^YZ$RZt:^tu6zuw:^wx6zx#O:^#O#P7d#P#Q4h#Q;'S:^;'S;=`;X<%lO:^U;[P;=`<%l:^U;bP;=`<%l4hU;lU!TS!ZQOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RUQU#lQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU>kU!TS!bQOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU?S^!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#o>}#o;'S$R;'S;=`$j<%lO$RU@VU!RQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU@n_!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#UAm#U#o>}#o;'S$R;'S;=`$j<%lO$RUAr`!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#`>}#`#aBt#a#o>}#o;'S$R;'S;=`$j<%lO$RUBy`!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#g>}#g#hC{#h#o>}#o;'S$R;'S;=`$j<%lO$RUDQ`!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#X>}#X#YES#Y#o>}#o;'S$R;'S;=`$j<%lO$RUEZ^!XQ!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#o>}#o;'S$R;'S;=`$j<%lO$R^F^^#cW!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#o>}#o;'S$R;'S;=`$j<%lO$R^Ga^#eW!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#o>}#o;'S$R;'S;=`$j<%lO$R^Hd`#dW!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#f>}#f#gIf#g#o>}#o;'S$R;'S;=`$j<%lO$RUIk`!TSOt$Ruw$Rx}$R}!O>}!O!Q$R!Q![>}![!_$R!_!`@O!`#O$R#P#T$R#T#i>}#i#jC{#j#o>}#o;'S$R;'S;=`$j<%lO$RUJtUuQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$R~K]O#m~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO#]~~", 11)], + topRules: {"Program":[0,35]}, specialized: [{term: 28, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 28, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 2202 + tokenPrec: 2256 }) diff --git a/src/parser/tests/strings.test.ts b/src/parser/tests/strings.test.ts index 3f78f56..7b4a672 100644 --- a/src/parser/tests/strings.test.ts +++ b/src/parser/tests/strings.test.ts @@ -127,3 +127,52 @@ describe('string escape sequences', () => { `) }) }) + +describe('curly strings', () => { + test('work on one line', () => { + expect('{ one two three }').toMatchTree(` + String + CurlyString { one two three } + `) + }) + + test('work on multiple lines', () => { + expect(`{ + one + two + three }`).toMatchTree(` + String + CurlyString { + one + two + three }`) + }) + + test('can contain other curlies', () => { + expect(`{ { one } + two + { three } }`).toMatchTree(` + String + CurlyString { { one } + two + { three } }`) + }) +}) + +describe('double quoted strings', () => { + test("work", () => { + expect(`"hello world"`).toMatchTree(` + String + DoubleQuote "hello world"`) + }) + + test("don't interpolate", () => { + expect(`"hello $world"`).toMatchTree(` + String + DoubleQuote "hello $world"`) + + expect(`"hello $(1 + 2)"`).toMatchTree(` + String + DoubleQuote "hello $(1 + 2)"`) + }) +}) \ No newline at end of file diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index d18a515..8ad55c2 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -1,5 +1,5 @@ import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' -import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do } from './shrimp.terms' +import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString } from './shrimp.terms' // doobie doobie do (we need the `do` keyword to know when we're defining params) export function specializeKeyword(ident: string) { @@ -18,6 +18,10 @@ export const setGlobals = (newGlobals: string[] | Record) => { export const tokenizer = new ExternalTokenizer( (input: InputStream, stack: Stack) => { const ch = getFullCodePoint(input, 0) + + // Handle curly strings + if (ch === 123 /* { */) return consumeCurlyString(input, stack) + if (!isWordChar(ch)) return // Don't consume things that start with digits - let Number token handle it @@ -26,7 +30,7 @@ export const tokenizer = new ExternalTokenizer( // Don't consume things that start with - or + followed by a digit (negative/positive numbers) if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return - const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch) + const isValidStart = isIdentStart(ch) const canBeWord = stack.canShift(Word) // Consume all word characters, tracking if it remains a valid identifier @@ -119,13 +123,7 @@ const consumeWordToken = ( } // Track identifier validity: must be lowercase, digit, dash, or emoji/unicode - if ( - !isLowercaseLetter(ch) && - !isDigit(ch) && - ch !== 45 /* - */ && - ch !== 63 /* ? */ && - !isEmojiOrUnicode(ch) - ) { + if (!isIdentChar(ch)) { if (!canBeWord) break isValidIdentifier = false } @@ -157,6 +155,32 @@ const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: bool return pos } +// Consumes { curly strings } and tracks braces so you can { have { braces { inside { braces } } } +const consumeCurlyString = (input: InputStream, stack: Stack) => { + if (!stack.canShift(CurlyString)) return + + let depth = 0 + let pos = 0 + + while (true) { + const ch = input.peek(pos) + if (ch < 0) return // EOF - invalid + + if (ch === 123) depth++ // { + else if (ch === 125) { // } + depth-- + if (depth === 0) { + pos++ // consume final } + break + } + } + + pos++ + } + + input.acceptToken(CurlyString, pos) +} + // Check if this identifier is in scope (for property access detection) // Returns IdentifierBeforeDot token if in scope, null otherwise const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number | null => { @@ -228,6 +252,14 @@ const chooseIdentifierToken = (input: InputStream, stack: Stack): number => { } // Character classification helpers +export const isIdentStart = (ch: number): boolean => { + return isLowercaseLetter(ch) || isEmojiOrUnicode(ch) +} + +export const isIdentChar = (ch: number): boolean => { + return isLowercaseLetter(ch) || isDigit(ch) || ch === 45 /* - */ || ch === 63 /* ? */ || isEmojiOrUnicode(ch) +} + const isWhiteSpace = (ch: number): boolean => { return ch === 32 /* space */ || ch === 9 /* tab */ || ch === 13 /* \r */ }