diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index df60f2b..4c68954 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -164,6 +164,23 @@ export class Compiler { return [[`PUSH`, value === 'true']] } + case terms.RegExp: { + // remove the surrounding slashes and any flags + const [_, pattern, flags] = value.match(/^\/\/(.*)\/\/([gimsuy]*)$/) || [] + if (!pattern) { + throw new CompilerError(`Invalid regex literal: ${value}`, node.from, node.to) + } + let regex: RegExp + + try { + regex = new RegExp(pattern, flags) + } catch (e) { + throw new CompilerError(`Invalid regex literal: ${value}`, node.from, node.to) + } + + return [['PUSH', regex]] + } + case terms.Identifier: { return [[`TRY_LOAD`, value]] } diff --git a/src/compiler/compiler.test.ts b/src/compiler/tests/compiler.test.ts similarity index 90% rename from src/compiler/compiler.test.ts rename to src/compiler/tests/compiler.test.ts index 63dc3a7..3fbe62c 100644 --- a/src/compiler/compiler.test.ts +++ b/src/compiler/tests/compiler.test.ts @@ -182,7 +182,7 @@ describe('string interpolation', () => { expect(`'price is \\$10'`).toEvaluateTo('price is $10') }) - test('string with mixed interpolation and escapes', () => { + test.only('string with mixed interpolation and escapes', () => { expect(`x = 5; 'value: $x\\ntotal: $(x * 2)'`).toEvaluateTo('value: 5\ntotal: 10') }) @@ -194,3 +194,21 @@ describe('string interpolation', () => { expect(`a = 3; b = 4; 'result: $(a * (b + 1))'`).toEvaluateTo('result: 15') }) }) + +describe('RegExp', () => { + test('simple regex', () => { + expect('//hello//').toEvaluateTo(/hello/) + }) + + test('regex with flags', () => { + expect('//[a-z]+//gi').toEvaluateTo(/[a-z]+/gi) + }) + + test('regex in assignment', () => { + expect('pattern = //\\d+//; pattern').toEvaluateTo(/\d+/) + }) + + test('invalid regex pattern', () => { + expect('//[unclosed//').toFailEvaluation() + }) +}) diff --git a/src/compiler/pipe.test.ts b/src/compiler/tests/pipe.test.ts similarity index 100% rename from src/compiler/pipe.test.ts rename to src/compiler/tests/pipe.test.ts diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index e126797..2d1a409 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -5,7 +5,7 @@ @top Program { item* } @tokens { - @precedence { Number "-" } + @precedence { Number "-" RegExp "/"} StringFragment { !['\\$]+ } NamedArgPrefix { $[a-z]+ "=" } @@ -19,6 +19,7 @@ colon[closedBy="end", @name="colon"] { ":" } end[openedBy="colon", @name="end"] { "end" } Underscore { "_" } + RegExp { "//" (![/\\\n[] | "\\" ![\n] | "[" (![\n\\\]] | "\\" ![\n])* "]")+ ("//" $[gimsuy]*)? } // Stolen from the lezer JavaScript grammar "fn" [@name=keyword] "if" [@name=keyword] "elsif" [@name=keyword] @@ -197,7 +198,7 @@ EscapeSeq { // to go through ambiguousFunctionCall (which is what we want semantically). // Yes, it is annoying and I gave up trying to use GLR to fix it. expressionWithoutIdentifier { - ParenExpr | Word | String | Number | Boolean + ParenExpr | Word | String | Number | Boolean | RegExp } block { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 9990b95..a3e3ca1 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -16,15 +16,16 @@ export const EscapeSeq = 26, Number = 27, Boolean = 28, - FunctionDef = 29, - Params = 31, - colon = 32, - end = 33, - Underscore = 34, - NamedArg = 35, - NamedArgPrefix = 36, - IfExpr = 38, - ThenBlock = 41, - ElsifExpr = 42, - ElseExpr = 44, - Assign = 46 + RegExp = 29, + FunctionDef = 30, + Params = 32, + colon = 33, + end = 34, + Underscore = 35, + NamedArg = 36, + NamedArgPrefix = 37, + IfExpr = 39, + ThenBlock = 42, + ElsifExpr = 43, + ElseExpr = 45, + Assign = 47 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 18ecbf2..e62de4b 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -4,20 +4,20 @@ import {tokenizer} from "./tokenizer" import {highlighting} from "./highlight" export const parser = LRParser.deserialize({ version: 14, - states: ".WQVQaOOO!rQbO'#CdO#SQPO'#CeO#bQPO'#DhO$[QaO'#CcO$cOSO'#CsOOQ`'#Dl'#DlO$qQPO'#DkO%YQaO'#DvOOQ`'#Cy'#CyOOQO'#Di'#DiO%bQPO'#DhO%pQaO'#DzOOQO'#DS'#DSOOQO'#Dh'#DhO%wQPO'#DgOOQ`'#Dg'#DgOOQ`'#D]'#D]QVQaOOOOQ`'#Dk'#DkOOQ`'#Cb'#CbO&PQaO'#DPOOQ`'#Dj'#DjOOQ`'#D^'#D^O&^QbO,58{O&}QaO,59vO%pQaO,59PO%pQaO,59PO'[QbO'#CdO(gQPO'#CeO(wQPO,58}O)YQPO,58}O)TQPO,58}O*TQPO,58}O*]QaO'#CuO*eQWO'#CvOOOO'#Dp'#DpOOOO'#D_'#D_O*yOSO,59_OOQ`,59_,59_OOQ`'#D`'#D`O+XQaO'#C{O+aQPO,5:bO+fQaO'#DbO+kQPO,58zO+|QPO,5:fO,TQPO,5:fOOQ`,5:R,5:ROOQ`-E7Z-E7ZOOQ`,59k,59kOOQ`-E7[-E7[OOQO1G/b1G/bOOQO1G.k1G.kO,YQPO1G.kO%pQaO,59UO%pQaO,59UOOQ`1G.i1G.iOOOO,59a,59aOOOO,59b,59bOOOO-E7]-E7]OOQ`1G.y1G.yOOQ`-E7^-E7^O,tQaO1G/|O-UQbO'#CdOOQO,59|,59|OOQO-E7`-E7`O-uQaO1G0QOOQO1G.p1G.pO.VQPO1G.pO.aQPO7+%hO.fQaO7+%iOOQO'#DU'#DUOOQO7+%l7+%lO.vQaO7+%mOOQ`<oAN>oO%pQaO'#DWOOQO'#Dc'#DcO0ZQPOAN>sO0fQPO'#DYOOQOAN>sAN>sO0kQPOAN>sO0pQPO,59rO0wQPO,59rOOQO-E7a-E7aOOQOG24_G24_O0|QPOG24_O1RQPO,59tO1WQPO1G/^OOQOLD)yLD)yO.fQaO1G/`O.vQaO7+$xOOQO7+$z7+$zOOQO<pAN>pO%yQaO'#DXOOQO'#Dd'#DdO0dQPOAN>tO0oQPO'#DZOOQOAN>tAN>tO0tQPOAN>tO0yQPO,59sO1QQPO,59sOOQO-E7b-E7bOOQOG24`G24`O1VQPOG24`O1[QPO,59uO1aQPO1G/_OOQOLD)zLD)zO.oQaO1G/aO/PQaO7+$yOOQO7+${7+${OOQO<S[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#g2i#g#h>x#h#o2i#o;'S$_;'S;=`$v<%lO$_V>}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#X2i#X#Y?s#Y#o2i#o;'S$_;'S;=`$v<%lO$_V?zYlRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_V@qYnRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VAf[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#Y2i#Y#ZB[#Z#o2i#o;'S$_;'S;=`$v<%lO$_VBcYwPhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^CYY!gWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_VC}[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gDs#g#o2i#o;'S$_;'S;=`$v<%lO$_VDzYfRhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$_^EqY!iWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#o2i#o;'S$_;'S;=`$v<%lO$__Fh[!hWhSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#f2i#f#gG^#g#o2i#o;'S$_;'S;=`$v<%lO$_VGc[hSOt$_uw$_x!_$_!_!`2O!`#O$_#P#T$_#T#i2i#i#j>x#j#o2i#o;'S$_;'S;=`$v<%lO$_VH`UuRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~HwO!p~", + tokenData: "!!{~R!SOX$_XY$|YZ%gZp$_pq$|qr&Qrt$_tu'Yuw$_wx'_xy'dyz'}z{(h{|)R|}$_}!O)l!O!P$_!P!Q,b!Q![*]![!]5P!]!^%g!^!_5j!_!`6t!`!a7_!a#O$_#O#P8i#P#R$_#R#S8n#S#T$_#T#U9X#U#X:m#X#Y=S#Y#ZDY#Z#]:m#]#^Ie#^#b:m#b#cKV#c#dK|#d#f:m#f#gMn#g#h:m#h#iNe#i#o:m#o#p$_#p#q!!]#q;'S$_;'S;=`$v<%l~$_~O$_~~!!vS$dUhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_S$yP;=`<%l$__%TUhS!ZZOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V%nUhS!mROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V&VWhSOt$_uw$_x!_$_!_!`&o!`#O$_#P;'S$_;'S;=`$v<%lO$_V&vU`RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~'_O!f~~'dO!d~V'kUhS!bROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(UUhS!cROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V(oUYRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)YU[RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V)sWhS]ROt$_uw$_x!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V*dYhSkROt$_uw$_x!O$_!O!P+S!P!Q$_!Q![*]![#O$_#P;'S$_;'S;=`$v<%lO$_V+XWhSOt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V+xWhSkROt$_uw$_x!Q$_!Q![+q![#O$_#P;'S$_;'S;=`$v<%lO$_V,iWhSZROt$_uw$_x!P$_!P!Q-R!Q#O$_#P;'S$_;'S;=`$v<%lO$_V-W^hSOY.SYZ$_Zt.Stu/Vuw.Swx/Vx!P.S!P!Q$_!Q!}.S!}#O3x#O#P1e#P;'S.S;'S;=`4y<%lO.SV.Z^hSmROY.SYZ$_Zt.Stu/Vuw.Swx/Vx!P.S!P!Q1z!Q!}.S!}#O3x#O#P1e#P;'S.S;'S;=`4y<%lO.SR/[XmROY/VZ!P/V!P!Q/w!Q!}/V!}#O0f#O#P1e#P;'S/V;'S;=`1t<%lO/VR/zP!P!Q/}R0SUmR#Z#[/}#]#^/}#a#b/}#g#h/}#i#j/}#m#n/}R0iVOY0fZ#O0f#O#P1O#P#Q/V#Q;'S0f;'S;=`1_<%lO0fR1RSOY0fZ;'S0f;'S;=`1_<%lO0fR1bP;=`<%l0fR1hSOY/VZ;'S/V;'S;=`1t<%lO/VR1wP;=`<%l/VV2PWhSOt$_uw$_x!P$_!P!Q2i!Q#O$_#P;'S$_;'S;=`$v<%lO$_V2pbhSmROt$_uw$_x#O$_#P#Z$_#Z#[2i#[#]$_#]#^2i#^#a$_#a#b2i#b#g$_#g#h2i#h#i$_#i#j2i#j#m$_#m#n2i#n;'S$_;'S;=`$v<%lO$_V3}[hSOY3xYZ$_Zt3xtu0fuw3xwx0fx#O3x#O#P1O#P#Q.S#Q;'S3x;'S;=`4s<%lO3xV4vP;=`<%l3xV4|P;=`<%l.ST5WUhSqPOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V5qWaRhSOt$_uw$_x!_$_!_!`6Z!`#O$_#P;'S$_;'S;=`$v<%lO$_V6bUbRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V6{U_RhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V7fWcRhSOt$_uw$_x!_$_!_!`8O!`#O$_#P;'S$_;'S;=`$v<%lO$_V8VUdRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~8nO!g~V8uUhSsROt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_V9^[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#b:m#b#c;b#c#o:m#o;'S$_;'S;=`$v<%lO$_U:ZUuQhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_U:rYhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_V;g[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#W:m#W#X<]#X#o:m#o;'S$_;'S;=`$v<%lO$_VT#a#b:m#b#cBh#c#o:m#o;'S$_;'S;=`$v<%lO$_V>Y[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#g:m#g#h?O#h#o:m#o;'S$_;'S;=`$v<%lO$_V?T^hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#X:m#X#Y@P#Y#]:m#]#^@v#^#o:m#o;'S$_;'S;=`$v<%lO$_V@WY!OPhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_V@{[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#Y:m#Y#ZAq#Z#o:m#o;'S$_;'S;=`$v<%lO$_VAxY|PhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_VBm[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#W:m#W#XCc#X#o:m#o;'S$_;'S;=`$v<%lO$_VCjYhSrROt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_VD_]hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#UEW#U#b:m#b#cHn#c#o:m#o;'S$_;'S;=`$v<%lO$_VE][hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#`:m#`#aFR#a#o:m#o;'S$_;'S;=`$v<%lO$_VFW[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#g:m#g#hF|#h#o:m#o;'S$_;'S;=`$v<%lO$_VGR[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#X:m#X#YGw#Y#o:m#o;'S$_;'S;=`$v<%lO$_VHOYlRhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_VHuYoRhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_VIj[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#Y:m#Y#ZJ`#Z#o:m#o;'S$_;'S;=`$v<%lO$_VJgYxPhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_^K^Y!hWhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_VLR[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#f:m#f#gLw#g#o:m#o;'S$_;'S;=`$v<%lO$_VMOYfRhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$_^MuY!jWhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#o:m#o;'S$_;'S;=`$v<%lO$__Nl[!iWhSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#f:m#f#g! b#g#o:m#o;'S$_;'S;=`$v<%lO$_V! g[hSOt$_uw$_x!_$_!_!`:S!`#O$_#P#T$_#T#i:m#i#jF|#j#o:m#o;'S$_;'S;=`$v<%lO$_V!!dUvRhSOt$_uw$_x#O$_#P;'S$_;'S;=`$v<%lO$_~!!{O!q~", tokenizers: [0, 1, 2, 3, tokenizer], topRules: {"Program":[0,3]}, - tokenPrec: 749 + tokenPrec: 758 }) diff --git a/src/parser/tests/basics.test.ts b/src/parser/tests/basics.test.ts index 42b8db1..cdbba7c 100644 --- a/src/parser/tests/basics.test.ts +++ b/src/parser/tests/basics.test.ts @@ -268,29 +268,3 @@ describe('Assign', () => { end end`) }) }) - -describe('Word escapes', () => { - test('parses escaped spaces in words', () => { - expect('echo my\\ file').toMatchTree(` - FunctionCall - Identifier echo - PositionalArg - Word my\\ file`) - }) - - test('parses multiple escaped spaces', () => { - expect('cat file\\ with\\ spaces.txt').toMatchTree(` - FunctionCall - Identifier cat - PositionalArg - Word file\\ with\\ spaces.txt`) - }) - - test('parses escaped backslash', () => { - expect('echo path\\\\file').toMatchTree(` - FunctionCall - Identifier echo - PositionalArg - Word path\\\\file`) - }) -}) diff --git a/src/parser/tests/strings.test.ts b/src/parser/tests/strings.test.ts index 00f4cd3..0fd6231 100644 --- a/src/parser/tests/strings.test.ts +++ b/src/parser/tests/strings.test.ts @@ -116,4 +116,14 @@ describe('string escape sequences', () => { StringFragment 20 `) }) + + test('escape sequences with interpolation', () => { + expect("'value: $x\\n'").toMatchTree(` + String + StringFragment value: + Interpolation + Identifier x + EscapeSeq \\n + `) + }) }) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index b9f2060..07fbc97 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -16,17 +16,6 @@ export const tokenizer = new ExternalTokenizer((input: InputStream, stack: Stack if (!isWordChar(ch)) break - // Handle backslash escapes: consume backslash + next char - if (ch === 92 /* \ */) { - isValidIdentifier = false - pos += getCharSize(ch) // skip backslash - const nextCh = getFullCodePoint(input, pos) - if (nextCh !== -1) { // if not EOF - pos += getCharSize(nextCh) // skip escaped char - } - continue - } - // Certain characters might end a word or identifier if they are followed by whitespace. // This allows things like `a = hello; 2` of if `x: y` to parse correctly. if (canBeWord && (ch === 59 /* ; */ || ch === 58) /* : */) { diff --git a/src/testSetup.ts b/src/testSetup.ts index 2f9976e..d6cca27 100644 --- a/src/testSetup.ts +++ b/src/testSetup.ts @@ -101,7 +101,11 @@ expect.extend({ const vm = new VM(compiler.bytecode) await vm.run() const result = await vm.run() - const value = VMResultToValue(result) + let value = VMResultToValue(result) + + // Just treat regex as strings for comparison purposes + if (expected instanceof RegExp) expected = String(expected) + if (value instanceof RegExp) value = String(value) if (value === expected) { return { pass: true } @@ -192,7 +196,12 @@ const trimWhitespace = (str: string): string => { } const VMResultToValue = (result: Value): unknown => { - if (result.type === 'number' || result.type === 'boolean' || result.type === 'string') { + if ( + result.type === 'number' || + result.type === 'boolean' || + result.type === 'string' || + result.type === 'regex' + ) { return result.value } else if (result.type === 'null') { return null