diff --git a/src/compiler/tests/literals.test.ts b/src/compiler/tests/literals.test.ts index c1dc14b..15d77e1 100644 --- a/src/compiler/tests/literals.test.ts +++ b/src/compiler/tests/literals.test.ts @@ -155,3 +155,29 @@ describe('dict literals', () => { c=3]`).toEvaluateTo({ a: 1, b: 2, c: 3 }) }) }) + +describe('curly strings', () => { + test('work on one line', () => { + expect('{ one two three }').toEvaluateTo(" one two three ") + }) + + test('work on multiple lines', () => { + expect(`{ + one + two + three + }`).toEvaluateTo("\n one\n two\n three\n ") + }) + + test('can contain other curlies', () => { + expect(`{ + { one } + two + { three } + }`).toEvaluateTo("\n { one }\n two\n { three }\n ") + }) + + test("don't interpolate", () => { + expect(`{ sum is $(a + b)! }`).toEvaluateTo(` sum is $(a + b)! `) + }) +}) \ No newline at end of file diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index e9bc2ec..6221c2e 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -37,7 +37,7 @@ finally { @specialize[@name=keyword] } throw { @specialize[@name=keyword] } null { @specialize[@name=Null] } -@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot } +@external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, curlyString } @external specialize {Identifier} specializeKeyword from "./tokenizer" { Do } @precedence { @@ -205,7 +205,9 @@ expression { IdentifierBeforeDot dot (Number | Identifier | ParenExpr) } - String { "'" stringContent* "'" } + String { + "'" stringContent* "'" | curlyString + } } stringContent { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 716ef1c..3b5dc00 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -23,6 +23,7 @@ export const AssignableIdentifier = 21, Word = 22, IdentifierBeforeDot = 23, + curlyString = 85, Do = 24, Comment = 25, Program = 26, diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index afd774f..3960f20 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -7,11 +7,11 @@ import {highlighting} from "./highlight" const spec_Identifier = {__proto__:null,if:66, null:94, catch:100, finally:106, end:108, else:116, while:130, try:136, throw:140} export const parser = LRParser.deserialize({ version: 14, - states: "9[QYQbOOO!dOSO'#DPOOQa'#DV'#DVO#mQbO'#DfO%RQcO'#E^OOQa'#E^'#E^O&XQcO'#E^O'ZQcO'#E]O'qQcO'#E]O)^QRO'#DOO*mQcO'#EWO*wQcO'#EWO+XQbO'#C{O,SOpO'#CyOOQ`'#EX'#EXO,XQbO'#EWO,cQRO'#DuOOQ`'#EW'#EWO,wQQO'#EVOOQ`'#EV'#EVOOQ`'#Dw'#DwQYQbOOO-PQbO'#DYO-[QbO'#C|O.PQbO'#DnO.tQQO'#DqO.PQbO'#DsO.yQbO'#DRO/RQWO'#DSOOOO'#E`'#E`OOOO'#Dx'#DxO/gOSO,59kOOQa,59k,59kOOQ`'#Dy'#DyO/uQbO,5:QO/|QbO'#DWO0WQQO,59qOOQa,5:Q,5:QO0cQbO,5:QOOQa'#E]'#E]OOQ`'#Dl'#DlOOQ`'#El'#ElOOQ`'#EQ'#EQO0mQbO,59dO1gQbO,5:bO.PQbO,59jO.PQbO,59jO.PQbO,59jO.PQbO,5:VO.PQbO,5:VO.PQbO,5:VO1wQRO,59gO2OQRO,59gO2ZQRO,59gO2UQQO,59gO2lQQO,59gO2tObO,59eO3PQbO'#ERO3[QbO,59cO3vQbO,5:[O1gQbO,5:aOOQ`,5:q,5:qOOQ`-E7u-E7uOOQ`'#Dz'#DzO4ZQbO'#DZO4fQbO'#D[OOQO'#D{'#D{O4^QQO'#DZO4tQQO,59tO4yQcO'#E]O6_QRO'#E[O6fQRO'#E[OOQO'#E['#E[O6qQQO,59hO6vQRO,5:YO6}QRO,5:YO3vQbO,5:]O7YQcO,5:_O8UQcO,5:_O8`QcO,5:_OOOO,59m,59mOOOO,59n,59nOOOO-E7v-E7vOOQa1G/V1G/VOOQ`-E7w-E7wO8pQQO1G/]OOQa1G/l1G/lO8{QbO1G/lOOQ`,59r,59rOOQO'#D}'#D}O8pQQO1G/]OOQa1G/]1G/]OOQ`'#EO'#EOO8{QbO1G/lOOQ`-E8O-E8OOOQ`1G/|1G/|OOQa1G/U1G/UO:WQcO1G/UO:_QcO1G/UO:fQcO1G/UOOQa1G/q1G/qO;_QcO1G/qO;iQcO1G/qO;sQcO1G/qOOQa1G/R1G/ROOQa1G/P1G/PO]QbO1G/vOOQ`1G/{1G/{OOQ`-E7x-E7xO>hQQO,59uOOQO,59v,59vOOQO-E7y-E7yO>pQbO1G/`O3vQbO1G/SO3vQbO1G/tO?TQbO1G/wO?`QQO7+$wOOQa7+$w7+$wO?kQbO7+%WOOQa7+%W7+%WOOQO-E7{-E7{OOQ`-E7|-E7|OOQ`'#D|'#D|O?uQQO'#D|O?zQbO'#EiOOQ`,59{,59{O@kQbO'#D_O@pQQO'#DbOOQ`7+%b7+%bO@uQbO7+%bO@zQbO7+%bOASQbO7+$zOA_QbO7+$zOA{QbO7+$nOBTQbO7+%`OOQ`7+%c7+%cOBYQbO7+%cOB_QbO7+%cOOQa<hAN>hOOQ`AN>QAN>QOCuQbOAN>QOCzQbOAN>QOOQ`-E7}-E7}OOQ`AN=tAN=tODSQbOAN=tO-[QbO,5:RO3vQbO,5:TOOQ`AN>iAN>iOOQ`7+%P7+%POOQ`G23lG23lODXQbOG23lPD^QbO'#DgOOQ`G23`G23`ODcQQO1G/mOOQ`1G/o1G/oOOQ`LD)WLD)WO3vQbO7+%XOOQ`<UQbO'#DaO>uQbO1G/vOOQ`1G/{1G/{OOQ`-E7x-E7xO?QQQO,59uOOQO,59v,59vOOQO-E7y-E7yO?YQbO1G/`O4]QbO1G/SO4]QbO1G/tO?mQbO1G/wO?xQQO7+$wOOQa7+$w7+$wO@TQbO7+%WOOQa7+%W7+%WOOQO-E7{-E7{OOQ`-E7|-E7|OOQ`'#D|'#D|O@_QQO'#D|O@dQbO'#EjOOQ`,59{,59{OATQbO'#D_OAYQQO'#DbOOQ`7+%b7+%bOA_QbO7+%bOAdQbO7+%bOAlQbO7+$zOAwQbO7+$zOBeQbO7+$nOBmQbO7+%`OOQ`7+%c7+%cOBrQbO7+%cOBwQbO7+%cOOQa<hAN>hOOQ`AN>QAN>QOD_QbOAN>QODdQbOAN>QOOQ`-E7}-E7}OOQ`AN=tAN=tODlQbOAN=tO-kQbO,5:RO4]QbO,5:TOOQ`AN>iAN>iOOQ`7+%P7+%POOQ`G23lG23lODqQbOG23lPDvQbO'#DgOOQ`G23`G23`OD{QQO1G/mOOQ`1G/o1G/oOOQ`LD)WLD)WO4]QbO7+%XOOQ`<c#Y#o,w#o;'S#{;'S;=`$d<%lO#{U>j[wQtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^?g[#VWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^@d[#XWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^Aa^#WWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#f,w#f#gB]#g#o,w#o;'S#{;'S;=`$d<%lO#{UBb^tSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#i,w#i#j=b#j#o,w#o;'S#{;'S;=`$d<%lO#{UCeU!aQtSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~C|O#a~", - tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!|~~", 11)], + tokenData: "C|~R|OX#{XY$jYZ%TZp#{pq$jqs#{st%ntu'tuw#{wx'yxy(Oyz(iz{#{{|)S|}#{}!O+v!O!P#{!P!Q.]!Q![)q![!]6x!]!^%T!^!}#{!}#O7c#O#P9X#P#Q9^#Q#R#{#R#S9w#S#T#{#T#Y,w#Y#Z:b#Z#b,w#b#c?`#c#f,w#f#g@]#g#h,w#h#iAY#i#o,w#o#p#{#p#qC^#q;'S#{;'S;=`$d<%l~#{~O#{~~CwS$QUtSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{S$gP;=`<%l#{^$qUtS!yYOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U%[UtS#]QOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{^%sWtSOp#{pq&]qt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{^&dZiYtSOY&]YZ#{Zt&]tu'Vuw&]wx'Vx#O&]#O#P'V#P;'S&];'S;=`'n<%lO&]Y'[SiYOY'VZ;'S'V;'S;=`'h<%lO'VY'kP;=`<%l'V^'qP;=`<%l&]~'yO#U~~(OO#S~U(VUtS#OQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U(pUtS#`QOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U)XWtSOt#{uw#{x!Q#{!Q![)q![#O#{#P;'S#{;'S;=`$d<%lO#{U)xYtSnQOt#{uw#{x!O#{!O!P*h!P!Q#{!Q![)q![#O#{#P;'S#{;'S;=`$d<%lO#{U*mWtSOt#{uw#{x!Q#{!Q![+V![#O#{#P;'S#{;'S;=`$d<%lO#{U+^WtSnQOt#{uw#{x!Q#{!Q![+V![#O#{#P;'S#{;'S;=`$d<%lO#{U+{^tSOt#{uw#{x}#{}!O,w!O!Q#{!Q![)q![!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{U,|[tSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{U-yU{QtSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U.bWtSOt#{uw#{x!P#{!P!Q.z!Q#O#{#P;'S#{;'S;=`$d<%lO#{U/P^tSOY/{YZ#{Zt/{tu1Ouw/{wx1Ox!P/{!P!Q#{!Q!}/{!}#O5q#O#P3^#P;'S/{;'S;=`6r<%lO/{U0S^tSxQOY/{YZ#{Zt/{tu1Ouw/{wx1Ox!P/{!P!Q3s!Q!}/{!}#O5q#O#P3^#P;'S/{;'S;=`6r<%lO/{Q1TXxQOY1OZ!P1O!P!Q1p!Q!}1O!}#O2_#O#P3^#P;'S1O;'S;=`3m<%lO1OQ1sP!P!Q1vQ1{UxQ#Z#[1v#]#^1v#a#b1v#g#h1v#i#j1v#m#n1vQ2bVOY2_Z#O2_#O#P2w#P#Q1O#Q;'S2_;'S;=`3W<%lO2_Q2zSOY2_Z;'S2_;'S;=`3W<%lO2_Q3ZP;=`<%l2_Q3aSOY1OZ;'S1O;'S;=`3m<%lO1OQ3pP;=`<%l1OU3xWtSOt#{uw#{x!P#{!P!Q4b!Q#O#{#P;'S#{;'S;=`$d<%lO#{U4ibtSxQOt#{uw#{x#O#{#P#Z#{#Z#[4b#[#]#{#]#^4b#^#a#{#a#b4b#b#g#{#g#h4b#h#i#{#i#j4b#j#m#{#m#n4b#n;'S#{;'S;=`$d<%lO#{U5v[tSOY5qYZ#{Zt5qtu2_uw5qwx2_x#O5q#O#P2w#P#Q/{#Q;'S5q;'S;=`6l<%lO5qU6oP;=`<%l5qU6uP;=`<%l/{U7PUtS!QQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U7jW#[QtSOt#{uw#{x!_#{!_!`8S!`#O#{#P;'S#{;'S;=`$d<%lO#{U8XVtSOt#{uw#{x#O#{#P#Q8n#Q;'S#{;'S;=`$d<%lO#{U8uU#ZQtSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~9^O#V~U9eU#_QtSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U:OUtS!XQOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{U:g]tSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#U;`#U#o,w#o;'S#{;'S;=`$d<%lO#{U;e^tSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#`,w#`#ac#Y#o,w#o;'S#{;'S;=`$d<%lO#{U>j[wQtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^?g[#WWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^@d[#YWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#o,w#o;'S#{;'S;=`$d<%lO#{^Aa^#XWtSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#f,w#f#gB]#g#o,w#o;'S#{;'S;=`$d<%lO#{UBb^tSOt#{uw#{x}#{}!O,w!O!_#{!_!`-r!`#O#{#P#T#{#T#i,w#i#j=b#j#o,w#o;'S#{;'S;=`$d<%lO#{UCeU!aQtSOt#{uw#{x#O#{#P;'S#{;'S;=`$d<%lO#{~C|O#b~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!}~~", 11)], topRules: {"Program":[0,26]}, specialized: [{term: 20, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 20, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 1634 + tokenPrec: 1658 }) diff --git a/src/parser/tests/strings.test.ts b/src/parser/tests/strings.test.ts index 3f78f56..288315b 100644 --- a/src/parser/tests/strings.test.ts +++ b/src/parser/tests/strings.test.ts @@ -127,3 +127,31 @@ describe('string escape sequences', () => { `) }) }) + +describe('curly strings', () => { + test('work on one line', () => { + expect('{ one two three }').toMatchTree(` + String one two three + `) + }) + + test('work on multiple lines', () => { + expect(`{ + one + two + three }`).toMatchTree(` + String + one + two + three `) + }) + + test('can contain other curlies', () => { + expect(`{ { one } + two + { three } }`).toMatchTree(` + String { one } + two + { three } `) + }) +}) \ No newline at end of file diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index cbaac67..e6b6971 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -1,5 +1,5 @@ import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' -import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do } from './shrimp.terms' +import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, curlyString } from './shrimp.terms' // doobie doobie do (we need the `do` keyword to know when we're defining params) export function specializeKeyword(ident: string) { @@ -18,6 +18,12 @@ export const setGlobals = (newGlobals: string[]) => { export const tokenizer = new ExternalTokenizer( (input: InputStream, stack: Stack) => { const ch = getFullCodePoint(input, 0) + + // Handle curly strings + if (ch === 123) { // { + return consumeCurlyString(input, stack) + } + if (!isWordChar(ch)) return // Don't consume things that start with digits - let Number token handle it @@ -151,6 +157,31 @@ const consumeRestOfWord = (input: InputStream, startPos: number, canBeWord: bool return pos } +const consumeCurlyString = (input: InputStream, stack: Stack) => { + if (!stack.canShift(curlyString)) return + + let depth = 0 + let pos = 0 + + while (true) { + const ch = input.peek(pos) + if (ch < 0) return // EOF - invalid + + if (ch === 123) depth++ // { + else if (ch === 125) { // } + depth-- + if (depth === 0) { + pos++ // consume final } + break + } + } + + pos++ + } + + input.acceptToken(curlyString, pos) +} + // Check if this identifier is in scope (for property access detection) // Returns IdentifierBeforeDot token if in scope, null otherwise const checkForDotGet = (input: InputStream, stack: Stack, pos: number): number | null => {