From dba8430d9a0044c567e15dabada67a87a10d88a5 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Sat, 25 Oct 2025 19:18:27 -0700 Subject: [PATCH 1/7] Add # comments --- src/parser/shrimp.grammar | 3 ++- src/parser/shrimp.ts | 14 +++++++------- src/parser/tests/basics.test.ts | 24 ++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 741dad0..c608994 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -2,7 +2,7 @@ @context trackScope from "./scopeTracker" -@skip { space } +@skip { space | comment } @top Program { item* } @@ -18,6 +18,7 @@ newlineOrSemicolon { "\n" | ";" } eof { @eof } space { " " | "\t" } + comment { "#" ![\n]* } leftParen { "(" } rightParen { ")" } colon[closedBy="end", @name="colon"] { ":" } diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 1db89f9..dfcbbe4 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -7,11 +7,11 @@ import {highlighting} from "./highlight" const spec_Identifier = {__proto__:null,do:52, end:58, null:74, if:88, elseif:96, else:100} export const parser = LRParser.deserialize({ version: 14, - states: ".jQVQbOOO#XQcO'#CrO$RQRO'#CsO$aQcO'#DmO$xQbO'#DsOOQ`'#Cu'#CuO%QQbO'#CqO%rOSO'#CzOOQa'#Dq'#DqO&QOpO'#DSO&VQcO'#DpOOQ`'#Dn'#DnO&nQbO'#DmO&|QbO'#EQOOQ`'#DX'#DXO'kQRO'#DaOOQ`'#Dm'#DmO'pQQO'#DlOOQ`'#Dl'#DlOOQ`'#Db'#DbQVQbOOOOQa'#Dp'#DpOOQ`'#Cp'#CpO'xQbO'#DUOOQ`'#Do'#DoOOQ`'#Dc'#DcO(SQbO,59ZO&|QbO,59_O&|QbO,59_OOQ`'#Dd'#DdO(pQbO'#CwO(xQQO,5:_O)iQRO'#CsO)yQRO,59]O*[QRO,59]O*VQQO,59]O+VQQO,59]O+_QbO'#C|O+gQWO'#C}OOOO'#Dy'#DyOOOO'#Df'#DfO+{OSO,59fOOQa,59f,59fO,ZO`O,59nO,`QbO'#DgO,eQbO,59YO,vQRO,5:lO,}QQO,5:lO-SQbO,59{OOQ`,5:W,5:WOOQ`-E7`-E7`OOQ`,59p,59pOOQ`-E7a-E7aOOQa1G.y1G.yO-^QcO1G.yOOQ`-E7b-E7bO-xQbO1G/yO&|QbO,59`O&|QbO,59`OOQa1G.w1G.wOOOO,59h,59hOOOO,59i,59iOOOO-E7d-E7dOOQa1G/Q1G/QOOQa1G/Y1G/YO!QQbO'#CrOOQ`,5:R,5:ROOQ`-E7e-E7eO.VQbO1G0WOOQ`1G/g1G/gO.dQbO7+%eO.iQbO7+%fOOQO1G.z1G.zO.vQRO1G.zOOQ`'#DZ'#DZOOQ`7+%r7+%rO/QQbO7+%sOOQ`<lAN>lO&|QbO'#D]OOQ`'#Dh'#DhO0_QbOAN>yO0jQQO'#D_OOQ`AN>yAN>yO0oQbOAN>yO0tQRO,59wO0{QQO,59wOOQ`-E7f-E7fOOQ`G24eG24eO1QQbOG24eO1VQQO,59yO1[QQO1G/cOOQ`LD*PLD*PO.iQbO1G/eO/QQbO7+$}OOQ`7+%P7+%POOQ`<mAN>mO'PQbO'#D]OOQ`'#Dh'#DhO0bQbOAN>zO0mQQO'#D_OOQ`AN>zAN>zO0rQbOAN>zO0wQRO,59wO1OQQO,59wOOQ`-E7f-E7fOOQ`G24fG24fO1TQbOG24fO1YQQO,59yO1_QQO1G/cOOQ`LD*QLD*QO.lQbO1G/eO/TQbO7+$}OOQ`7+%P7+%POOQ`<i~RzOX#uXY$dYZ$}Zp#upq$dqs#ust%htu'Puw#uwx'Uxy'Zyz'tz{#u{|(_|}#u}!O(_!O!P#u!P!Q+R!Q![(|![!]3n!]!^$}!^#O#u#O#P4X#P#R#u#R#S4^#S#T#u#T#Y4w#Y#Z6V#Z#b4w#b#c:e#c#f4w#f#g;[#g#h4w#h#idS#zUoSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uS$aP;=`<%l#u^$kUoS!_YOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU%UUoS!jQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u^%oZoS!`YOY%hYZ#uZt%htu&buw%hwx&bx#O%h#O#P&b#P;'S%h;'S;=`&y<%lO%hY&gS!`YOY&bZ;'S&b;'S;=`&s<%lO&bY&vP;=`<%l&b^&|P;=`<%l%h~'UO!o~~'ZO!m~U'bUoS!gQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU'{UoS!lQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU(dWoSOt#uuw#ux!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)TYoSrQOt#uuw#ux!O#u!O!P)s!P!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)xWoSOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU*iWoSrQOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU+WWoSOt#uuw#ux!P#u!P!Q+p!Q#O#u#P;'S#u;'S;=`$^<%lO#uU+u^oSOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q#u!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qU,x^oStQOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q0i!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qQ-yXtQOY-tZ!P-t!P!Q.f!Q!}-t!}#O/T#O#P0S#P;'S-t;'S;=`0c<%lO-tQ.iP!P!Q.lQ.qUtQ#Z#[.l#]#^.l#a#b.l#g#h.l#i#j.l#m#n.lQ/WVOY/TZ#O/T#O#P/m#P#Q-t#Q;'S/T;'S;=`/|<%lO/TQ/pSOY/TZ;'S/T;'S;=`/|<%lO/TQ0PP;=`<%l/TQ0VSOY-tZ;'S-t;'S;=`0c<%lO-tQ0fP;=`<%l-tU0nWoSOt#uuw#ux!P#u!P!Q1W!Q#O#u#P;'S#u;'S;=`$^<%lO#uU1_boStQOt#uuw#ux#O#u#P#Z#u#Z#[1W#[#]#u#]#^1W#^#a#u#a#b1W#b#g#u#g#h1W#h#i#u#i#j1W#j#m#u#m#n1W#n;'S#u;'S;=`$^<%lO#uU2l[oSOY2gYZ#uZt2gtu/Tuw2gwx/Tx#O2g#O#P/m#P#Q,q#Q;'S2g;'S;=`3b<%lO2gU3eP;=`<%l2gU3kP;=`<%l,qU3uUoSlQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~4^O!p~U4eUoSwQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU4|YoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#uU5sUyQoSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU6[ZoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#U6}#U#o4w#o;'S#u;'S;=`$^<%lO#uU7S[oSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#`4w#`#a7x#a#o4w#o;'S#u;'S;=`$^<%lO#uU7}[oSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#g4w#g#h8s#h#o4w#o;'S#u;'S;=`$^<%lO#uU8x[oSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#X4w#X#Y9n#Y#o4w#o;'S#u;'S;=`$^<%lO#uU9uYsQoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^:lY!qWoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^;cY!sWoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^QUzQoSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~>iO!w~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!t~~", 11)], topRules: {"Program":[0,17]}, specialized: [{term: 13, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 753 + tokenPrec: 756 }) diff --git a/src/parser/tests/basics.test.ts b/src/parser/tests/basics.test.ts index 3299ff3..6cc5989 100644 --- a/src/parser/tests/basics.test.ts +++ b/src/parser/tests/basics.test.ts @@ -348,3 +348,27 @@ describe('DotGet whitespace sensitivity', () => { expect('readme.txt').toMatchTree(`Word readme.txt`) }) }) + +describe('Comments', () => { + test('are barely there', () => { + expect(`x = 5 # one banana\ny = 2 # two bananas`).toMatchTree(` + Assign + AssignableIdentifier x + Eq = + Number 5 + Assign + AssignableIdentifier y + Eq = + Number 2`) + + expect('# some comment\nbasename = 5 # very astute\n basename / prop\n# good info').toMatchTree(` + Assign + AssignableIdentifier basename + Eq = + Number 5 + BinOp + Identifier basename + Slash / + Identifier prop`) + }) +}) \ No newline at end of file From e4100c7d89bd8928d74b63bfb9101def7cdf3b91 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Sat, 25 Oct 2025 19:51:57 -0700 Subject: [PATCH 2/7] failing single line if test --- src/compiler/tests/compiler.test.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/tests/compiler.test.ts b/src/compiler/tests/compiler.test.ts index 35da324..603a9eb 100644 --- a/src/compiler/tests/compiler.test.ts +++ b/src/compiler/tests/compiler.test.ts @@ -139,6 +139,10 @@ describe('compiler', () => { scattered end`).toEvaluateTo('dwarf') }) + + test('single line if', () => { + expect(`if 3 < 9: shire`).toEvaluateTo('shire') + }) }) describe('errors', () => { From 299ad2c9a99168a78d2d37a89f6c687cf08dc848 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Sat, 25 Oct 2025 20:12:23 -0700 Subject: [PATCH 3/7] failing test for multiline function --- src/compiler/tests/compiler.test.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/compiler/tests/compiler.test.ts b/src/compiler/tests/compiler.test.ts index 35da324..652b29c 100644 --- a/src/compiler/tests/compiler.test.ts +++ b/src/compiler/tests/compiler.test.ts @@ -85,6 +85,21 @@ describe('compiler', () => { expect(`bloop = do: 'bloop' end; bloop`).toEvaluateTo('bloop') }) + test('function call with if statement and multiple expressions', () => { + expect(` + abc = do: + if false: + echo nope + end + + true + end + + abc + `) + .toEvaluateTo(true) + }) + test('simple conditionals', () => { expect(`(3 < 6)`).toEvaluateTo(true) expect(`(10 > 20)`).toEvaluateTo(false) From 7cf7ac3703bfe1fa924d9883710a580625e031e0 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Sun, 26 Oct 2025 13:02:05 -0700 Subject: [PATCH 4/7] allow more unicode in variable names --- src/parser/tests/basics.test.ts | 198 ++++++++++++++++++++++++++++++++ src/parser/tokenizer.ts | 28 ++++- 2 files changed, 221 insertions(+), 5 deletions(-) diff --git a/src/parser/tests/basics.test.ts b/src/parser/tests/basics.test.ts index 3299ff3..1716240 100644 --- a/src/parser/tests/basics.test.ts +++ b/src/parser/tests/basics.test.ts @@ -30,6 +30,204 @@ describe('Identifier', () => { FunctionCallOrIdentifier Identifier moo-😊-34`) }) + + test('parses mathematical unicode symbols like πœ‹ as identifiers', () => { + expect('πœ‹').toMatchTree(` + FunctionCallOrIdentifier + Identifier πœ‹`) + }) +}) + +describe('Unicode Symbol Support', () => { + describe('Emoji (currently supported)', () => { + test('Basic Emoticons (U+1F600-U+1F64F)', () => { + expect('πŸ˜€').toMatchTree(` + FunctionCallOrIdentifier + Identifier πŸ˜€`) + + expect('😊-counter').toMatchTree(` + FunctionCallOrIdentifier + Identifier 😊-counter`) + }) + + test('Miscellaneous Symbols and Pictographs (U+1F300-U+1F5FF)', () => { + expect('🌍').toMatchTree(` + FunctionCallOrIdentifier + Identifier 🌍`) + + expect('πŸ”₯-handler').toMatchTree(` + FunctionCallOrIdentifier + Identifier πŸ”₯-handler`) + }) + + test('Transport and Map Symbols (U+1F680-U+1F6FF)', () => { + expect('πŸš€').toMatchTree(` + FunctionCallOrIdentifier + Identifier πŸš€`) + + expect('πŸš€-launch').toMatchTree(` + FunctionCallOrIdentifier + Identifier πŸš€-launch`) + }) + + test('Regional Indicator Symbols / Flags (U+1F1E6-U+1F1FF)', () => { + // Note: Flags are typically two regional indicators combined + expect('πŸ‡Ί').toMatchTree(` + FunctionCallOrIdentifier + Identifier πŸ‡Ί`) + }) + + test('Supplemental Symbols and Pictographs (U+1F900-U+1F9FF)', () => { + expect('πŸ€–').toMatchTree(` + FunctionCallOrIdentifier + Identifier πŸ€–`) + + expect('πŸ¦€-lang').toMatchTree(` + FunctionCallOrIdentifier + Identifier πŸ¦€-lang`) + }) + + test('Dingbats (U+2700-U+27BF)', () => { + expect('βœ‚').toMatchTree(` + FunctionCallOrIdentifier + Identifier βœ‚`) + + expect('✨-magic').toMatchTree(` + FunctionCallOrIdentifier + Identifier ✨-magic`) + }) + + test('Miscellaneous Symbols (U+2600-U+26FF)', () => { + expect('⚑').toMatchTree(` + FunctionCallOrIdentifier + Identifier ⚑`) + + expect('β˜€-bright').toMatchTree(` + FunctionCallOrIdentifier + Identifier β˜€-bright`) + }) + }) + + describe('Greek Letters (not currently supported)', () => { + test('Greek lowercase alpha Ξ± (U+03B1)', () => { + expect('Ξ±').toMatchTree(` + FunctionCallOrIdentifier + Identifier Ξ±`) + }) + + test('Greek lowercase beta Ξ² (U+03B2)', () => { + expect('Ξ²').toMatchTree(` + FunctionCallOrIdentifier + Identifier Ξ²`) + }) + + test('Greek lowercase lambda Ξ» (U+03BB)', () => { + expect('Ξ»').toMatchTree(` + FunctionCallOrIdentifier + Identifier Ξ»`) + }) + + test('Greek lowercase pi Ο€ (U+03C0)', () => { + // Note: This is different from mathematical pi πœ‹ + expect('Ο€').toMatchTree(` + FunctionCallOrIdentifier + Identifier Ο€`) + }) + }) + + describe('Mathematical Alphanumeric Symbols (not currently supported)', () => { + test('Mathematical italic small pi πœ‹ (U+1D70B)', () => { + expect('πœ‹').toMatchTree(` + FunctionCallOrIdentifier + Identifier πœ‹`) + }) + + test('Mathematical bold small x 𝐱 (U+1D431)', () => { + expect('𝐱').toMatchTree(` + FunctionCallOrIdentifier + Identifier 𝐱`) + }) + + test('Mathematical script capital F 𝓕 (U+1D4D5)', () => { + expect('𝓕').toMatchTree(` + FunctionCallOrIdentifier + Identifier 𝓕`) + }) + }) + + describe('Mathematical Operators (not currently supported)', () => { + test('Infinity symbol ∞ (U+221E)', () => { + expect('∞').toMatchTree(` + FunctionCallOrIdentifier + Identifier ∞`) + }) + + test('Sum symbol βˆ‘ (U+2211)', () => { + expect('βˆ‘').toMatchTree(` + FunctionCallOrIdentifier + Identifier βˆ‘`) + }) + + test('Integral symbol ∫ (U+222B)', () => { + expect('∫').toMatchTree(` + FunctionCallOrIdentifier + Identifier ∫`) + }) + }) + + describe('Superscripts and Subscripts (not currently supported)', () => { + test('Superscript two Β² (U+00B2)', () => { + expect('xΒ²').toMatchTree(` + FunctionCallOrIdentifier + Identifier xΒ²`) + }) + + test('Subscript two β‚‚ (U+2082)', () => { + expect('hβ‚‚o').toMatchTree(` + FunctionCallOrIdentifier + Identifier hβ‚‚o`) + }) + }) + + describe('Arrows (not currently supported)', () => { + test('Rightward arrow β†’ (U+2192)', () => { + expect('β†’').toMatchTree(` + FunctionCallOrIdentifier + Identifier β†’`) + }) + + test('Leftward arrow ← (U+2190)', () => { + expect('←').toMatchTree(` + FunctionCallOrIdentifier + Identifier ←`) + }) + + test('Double rightward arrow β‡’ (U+21D2)', () => { + expect('β‡’').toMatchTree(` + FunctionCallOrIdentifier + Identifier β‡’`) + }) + }) + + describe('CJK Symbols (not currently supported)', () => { + test('Hiragana あ (U+3042)', () => { + expect('あ').toMatchTree(` + FunctionCallOrIdentifier + Identifier あ`) + }) + + test('Katakana γ‚« (U+30AB)', () => { + expect('γ‚«').toMatchTree(` + FunctionCallOrIdentifier + Identifier γ‚«`) + }) + + test('CJK Unified Ideograph δΈ­ (U+4E2D)', () => { + expect('δΈ­').toMatchTree(` + FunctionCallOrIdentifier + Identifier δΈ­`) + }) + }) }) describe('Parentheses', () => { diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 8963ffb..8df852a 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -14,7 +14,7 @@ export const tokenizer = new ExternalTokenizer( // Don't consume things that start with - or + followed by a digit (negative/positive numbers) if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return - const isValidStart = isLowercaseLetter(ch) || isEmoji(ch) + const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch) const canBeWord = stack.canShift(Word) // Consume all word characters, tracking if it remains a valid identifier @@ -106,8 +106,8 @@ const consumeWordToken = ( if (!isWordChar(nextCh)) break } - // Track identifier validity: must be lowercase, digit, dash, or emoji - if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmoji(ch)) { + // Track identifier validity: must be lowercase, digit, dash, or emoji/unicode + if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmojiOrUnicode(ch)) { if (!canBeWord) break isValidIdentifier = false } @@ -217,7 +217,7 @@ const getFullCodePoint = (input: InputStream, pos: number): number => { return ch } -const isEmoji = (ch: number): boolean => { +const isEmojiOrUnicode = (ch: number): boolean => { return ( // Basic Emoticons (ch >= 0x1f600 && ch <= 0x1f64f) || @@ -242,7 +242,25 @@ const isEmoji = (ch: number): boolean => { // Additional miscellaneous items (ch >= 0x238c && ch <= 0x2454) || // Combining Diacritical Marks for Symbols - (ch >= 0x20d0 && ch <= 0x20ff) + (ch >= 0x20d0 && ch <= 0x20ff) || + // Latin-1 Supplement (includes Β², Β³, ΒΉ and other special chars) + (ch >= 0x00a0 && ch <= 0x00ff) || + // Greek and Coptic (U+0370-U+03FF) + (ch >= 0x0370 && ch <= 0x03ff) || + // Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF) + (ch >= 0x1d400 && ch <= 0x1d7ff) || + // Mathematical Operators (U+2200-U+22FF) + (ch >= 0x2200 && ch <= 0x22ff) || + // Superscripts and Subscripts (U+2070-U+209F) + (ch >= 0x2070 && ch <= 0x209f) || + // Arrows (U+2190-U+21FF) + (ch >= 0x2190 && ch <= 0x21ff) || + // Hiragana (U+3040-U+309F) + (ch >= 0x3040 && ch <= 0x309f) || + // Katakana (U+30A0-U+30FF) + (ch >= 0x30a0 && ch <= 0x30ff) || + // CJK Unified Ideographs (U+4E00-U+9FFF) + (ch >= 0x4e00 && ch <= 0x9fff) ) } From 6e432dd7a1d75c3aa407fbdc30bfba9999db3f4e Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Mon, 27 Oct 2025 11:30:49 -0700 Subject: [PATCH 5/7] Made it work --- src/compiler/compiler.ts | 9 +++++++-- src/compiler/tests/compiler.test.ts | 2 +- src/parser/shrimp.grammar | 6 +++++- src/parser/shrimp.terms.ts | 1 + src/parser/shrimp.ts | 10 +++++----- src/parser/tests/control-flow.test.ts | 15 +++++++++------ 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/compiler/compiler.ts b/src/compiler/compiler.ts index 212da55..b7e6274 100644 --- a/src/compiler/compiler.ts +++ b/src/compiler/compiler.ts @@ -303,7 +303,8 @@ export class Compiler { return instructions } - case terms.ThenBlock: { + case terms.ThenBlock: + case terms.SingleLineThenBlock: { const instructions = getAllChildren(node) .map((child) => this.#compileNode(child, input)) .flat() @@ -468,7 +469,11 @@ export class Compiler { } default: - throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to) + throw new CompilerError( + `Compiler doesn't know how to handle a "${node.type.name}" node.`, + node.from, + node.to + ) } } } diff --git a/src/compiler/tests/compiler.test.ts b/src/compiler/tests/compiler.test.ts index 603a9eb..40b2271 100644 --- a/src/compiler/tests/compiler.test.ts +++ b/src/compiler/tests/compiler.test.ts @@ -141,7 +141,7 @@ describe('compiler', () => { }) test('single line if', () => { - expect(`if 3 < 9: shire`).toEvaluateTo('shire') + expect(`if 3 < 9: shire end`).toEvaluateTo('shire') }) }) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index c608994..d1ceb9b 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -105,7 +105,7 @@ IfExpr { } singleLineIf { - @specialize[@name=keyword] (ConditionalOp | expression) colon ThenBlock { consumeToTerminator } + @specialize[@name=keyword] (ConditionalOp | expression) colon SingleLineThenBlock @specialize[@name=keyword] } multilineIf { @@ -124,6 +124,10 @@ ThenBlock { block } +SingleLineThenBlock { + consumeToTerminator +} + ConditionalOp { expression Eq expression | expression Neq expression | diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 75f88be..2b65bbc 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -41,6 +41,7 @@ export const NamedArg = 40, NamedArgPrefix = 41, IfExpr = 43, + SingleLineThenBlock = 45, ThenBlock = 46, ElseIfExpr = 47, ElseExpr = 49, diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index dfcbbe4..10dc75f 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -7,10 +7,10 @@ import {highlighting} from "./highlight" const spec_Identifier = {__proto__:null,do:52, end:58, null:74, if:88, elseif:96, else:100} export const parser = LRParser.deserialize({ version: 14, - states: ".jQYQbOOO#[QcO'#CrO$UQRO'#CsO$dQcO'#DnO${QbO'#DtOOQ`'#Cu'#CuO%TQbO'#CqO%uOSO'#CzOOQa'#Dr'#DrO&TOpO'#DSO&YQcO'#DqOOQ`'#Do'#DoO&qQbO'#DnO'PQbO'#EROOQ`'#DX'#DXO'nQRO'#DaOOQ`'#Dn'#DnO'sQQO'#DmOOQ`'#Dm'#DmOOQ`'#Db'#DbQYQbOOOOQa'#Dq'#DqOOQ`'#Cp'#CpO'{QbO'#DUOOQ`'#Dp'#DpOOQ`'#Dc'#DcO(VQbO,59ZO'PQbO,59_O'PQbO,59_OOQ`'#Dd'#DdO(sQbO'#CwO({QQO,5:`O)lQRO'#CsO)|QRO,59]O*_QRO,59]O*YQQO,59]O+YQQO,59]O+bQbO'#C|O+jQWO'#C}OOOO'#Dz'#DzOOOO'#Df'#DfO,OOSO,59fOOQa,59f,59fO,^O`O,59nO,cQbO'#DgO,hQbO,59YO,yQRO,5:mO-QQQO,5:mO-VQbO,59{OOQ`,5:X,5:XOOQ`-E7`-E7`OOQ`,59p,59pOOQ`-E7a-E7aOOQa1G.y1G.yO-aQcO1G.yOOQ`-E7b-E7bO-{QbO1G/zO'PQbO,59`O'PQbO,59`OOQa1G.w1G.wOOOO,59h,59hOOOO,59i,59iOOOO-E7d-E7dOOQa1G/Q1G/QOOQa1G/Y1G/YO!TQbO'#CrOOQ`,5:R,5:ROOQ`-E7e-E7eO.YQbO1G0XOOQ`1G/g1G/gO.gQbO7+%fO.lQbO7+%gOOQO1G.z1G.zO.yQRO1G.zOOQ`'#DZ'#DZOOQ`7+%s7+%sO/TQbO7+%tOOQ`<mAN>mO'PQbO'#D]OOQ`'#Dh'#DhO0bQbOAN>zO0mQQO'#D_OOQ`AN>zAN>zO0rQbOAN>zO0wQRO,59wO1OQQO,59wOOQ`-E7f-E7fOOQ`G24fG24fO1TQbOG24fO1YQQO,59yO1_QQO1G/cOOQ`LD*QLD*QO.lQbO1G/eO/TQbO7+$}OOQ`7+%P7+%POOQ`<mAN>mO'PQbO'#D]OOQ`'#Dh'#DhO0gQbOAN>zO0rQQO'#D_OOQ`AN>zAN>zO0wQbOAN>zO0|QRO,59wO1TQQO,59wOOQ`-E7f-E7fOOQ`G24fG24fO1YQbOG24fO1_QQO,59yO1dQQO1G/cOOQ`LD*QLD*QO.lQbO1G/eO/YQbO7+$}OOQ`7+%P7+%POOQ`< spec_Identifier[value] || -1}], - tokenPrec: 756 + tokenPrec: 761 }) diff --git a/src/parser/tests/control-flow.test.ts b/src/parser/tests/control-flow.test.ts index df1bc3c..11c81d0 100644 --- a/src/parser/tests/control-flow.test.ts +++ b/src/parser/tests/control-flow.test.ts @@ -4,7 +4,7 @@ import '../shrimp.grammar' // Importing this so changes cause it to retest! describe('if/elseif/else', () => { test('parses single line if', () => { - expect(`if y = 1: 'cool'`).toMatchTree(` + expect(`if y = 1: 'cool' end`).toMatchTree(` IfExpr keyword if ConditionalOp @@ -12,12 +12,13 @@ describe('if/elseif/else', () => { Eq = Number 1 colon : - ThenBlock + SingleLineThenBlock String StringFragment cool + keyword end `) - expect('a = if x: 2').toMatchTree(` + expect('a = if x: 2 end').toMatchTree(` Assign AssignableIdentifier a Eq = @@ -25,8 +26,9 @@ describe('if/elseif/else', () => { keyword if Identifier x colon : - ThenBlock + SingleLineThenBlock Number 2 + keyword end `) }) @@ -138,7 +140,7 @@ describe('if/elseif/else', () => { }) test('does not parse identifiers that start with if', () => { - expect('iffy = if true: 2').toMatchTree(` + expect('iffy = if true: 2 end').toMatchTree(` Assign AssignableIdentifier iffy Eq = @@ -146,8 +148,9 @@ describe('if/elseif/else', () => { keyword if Boolean true colon : - ThenBlock + SingleLineThenBlock Number 2 + keyword end `) }) }) From 28fab1235c48efd0b6724fe51bbfae20aac21464 Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Mon, 27 Oct 2025 12:07:13 -0700 Subject: [PATCH 6/7] Works with blank lines --- src/parser/shrimp.grammar | 2 +- src/parser/shrimp.ts | 8 ++++---- src/parser/tests/multiline.test.ts | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 741dad0..3dbddd8 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -199,5 +199,5 @@ expressionWithoutIdentifier { } block { - (consumeToTerminator newlineOrSemicolon)* + (consumeToTerminator? newlineOrSemicolon)* } \ No newline at end of file diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 1db89f9..50cb347 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -7,9 +7,9 @@ import {highlighting} from "./highlight" const spec_Identifier = {__proto__:null,do:52, end:58, null:74, if:88, elseif:96, else:100} export const parser = LRParser.deserialize({ version: 14, - states: ".jQVQbOOO#XQcO'#CrO$RQRO'#CsO$aQcO'#DmO$xQbO'#DsOOQ`'#Cu'#CuO%QQbO'#CqO%rOSO'#CzOOQa'#Dq'#DqO&QOpO'#DSO&VQcO'#DpOOQ`'#Dn'#DnO&nQbO'#DmO&|QbO'#EQOOQ`'#DX'#DXO'kQRO'#DaOOQ`'#Dm'#DmO'pQQO'#DlOOQ`'#Dl'#DlOOQ`'#Db'#DbQVQbOOOOQa'#Dp'#DpOOQ`'#Cp'#CpO'xQbO'#DUOOQ`'#Do'#DoOOQ`'#Dc'#DcO(SQbO,59ZO&|QbO,59_O&|QbO,59_OOQ`'#Dd'#DdO(pQbO'#CwO(xQQO,5:_O)iQRO'#CsO)yQRO,59]O*[QRO,59]O*VQQO,59]O+VQQO,59]O+_QbO'#C|O+gQWO'#C}OOOO'#Dy'#DyOOOO'#Df'#DfO+{OSO,59fOOQa,59f,59fO,ZO`O,59nO,`QbO'#DgO,eQbO,59YO,vQRO,5:lO,}QQO,5:lO-SQbO,59{OOQ`,5:W,5:WOOQ`-E7`-E7`OOQ`,59p,59pOOQ`-E7a-E7aOOQa1G.y1G.yO-^QcO1G.yOOQ`-E7b-E7bO-xQbO1G/yO&|QbO,59`O&|QbO,59`OOQa1G.w1G.wOOOO,59h,59hOOOO,59i,59iOOOO-E7d-E7dOOQa1G/Q1G/QOOQa1G/Y1G/YO!QQbO'#CrOOQ`,5:R,5:ROOQ`-E7e-E7eO.VQbO1G0WOOQ`1G/g1G/gO.dQbO7+%eO.iQbO7+%fOOQO1G.z1G.zO.vQRO1G.zOOQ`'#DZ'#DZOOQ`7+%r7+%rO/QQbO7+%sOOQ`<lAN>lO&|QbO'#D]OOQ`'#Dh'#DhO0_QbOAN>yO0jQQO'#D_OOQ`AN>yAN>yO0oQbOAN>yO0tQRO,59wO0{QQO,59wOOQ`-E7f-E7fOOQ`G24eG24eO1QQbOG24eO1VQQO,59yO1[QQO1G/cOOQ`LD*PLD*PO.iQbO1G/eO/QQbO7+$}OOQ`7+%P7+%POOQ`<lAN>lO&|QbO'#D]OOQ`'#Dh'#DhO0hQbOAN>yO0sQQO'#D_OOQ`AN>yAN>yO0xQbOAN>yO0}QRO,59wO1UQQO,59wOOQ`-E7f-E7fOOQ`G24eG24eO1ZQbOG24eO1`QQO,59yO1eQQO1G/cOOQ`LD*PLD*PO.iQbO1G/eO/TQbO7+$}OOQ`7+%P7+%POOQ`< spec_Identifier[value] || -1}], - tokenPrec: 753 + tokenPrec: 762 }) diff --git a/src/parser/tests/multiline.test.ts b/src/parser/tests/multiline.test.ts index 27bddc7..7ac995a 100644 --- a/src/parser/tests/multiline.test.ts +++ b/src/parser/tests/multiline.test.ts @@ -71,4 +71,20 @@ end keyword end `) }) + + test('multiline with empty lines', () => { + expect(` + do: + 2 + + end + `).toMatchTree(` + FunctionDef + keyword do + Params + colon : + Number 2 + keyword end + `) + }) }) From 6c8c07e8691b16c6ef11151415f7b9ecba033cf9 Mon Sep 17 00:00:00 2001 From: Corey Johnson Date: Mon, 27 Oct 2025 12:36:29 -0700 Subject: [PATCH 7/7] Update shrimp.ts --- src/parser/shrimp.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 50cb347..ba5f038 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -7,11 +7,11 @@ import {highlighting} from "./highlight" const spec_Identifier = {__proto__:null,do:52, end:58, null:74, if:88, elseif:96, else:100} export const parser = LRParser.deserialize({ version: 14, - states: ".pQVQbOOO#XQcO'#CrO$RQRO'#CsO$aQcO'#DmO$xQbO'#DsOOQ`'#Cu'#CuO%QQbO'#CqO%rOSO'#CzOOQa'#Dq'#DqO&QOpO'#DSO&VQcO'#DpOOQ`'#Dn'#DnO&nQbO'#DmO&|QbO'#EQOOQ`'#DX'#DXO'kQRO'#DaOOQ`'#Dm'#DmO'pQQO'#DlOOQ`'#Dl'#DlOOQ`'#Db'#DbQVQbOOOOQa'#Dp'#DpOOQ`'#Cp'#CpO'xQbO'#DUOOQ`'#Do'#DoOOQ`'#Dc'#DcO(SQbO,59ZO&|QbO,59_O&|QbO,59_OOQ`'#Dd'#DdO(pQbO'#CwO(xQQO,5:_O)iQRO'#CsO)yQRO,59]O*[QRO,59]O*VQQO,59]O+VQQO,59]O+_QbO'#C|O+gQWO'#C}OOOO'#Dy'#DyOOOO'#Df'#DfO+{OSO,59fOOQa,59f,59fO,ZO`O,59nO,`QbO'#DgO,eQbO,59YO,vQRO,5:lO,}QQO,5:lO-SQbO,59{OOQ`,5:W,5:WOOQ`-E7`-E7`OOQ`,59p,59pOOQ`-E7a-E7aOOQa1G.y1G.yO-^QcO1G.yOOQ`-E7b-E7bO-xQbO1G/yO&|QbO,59`O&|QbO,59`OOQa1G.w1G.wOOOO,59h,59hOOOO,59i,59iOOOO-E7d-E7dOOQa1G/Q1G/QOOQa1G/Y1G/YO!QQbO'#CrOOQ`,5:R,5:ROOQ`-E7e-E7eO.VQbO1G0WOOQ`1G/g1G/gO.dQbO7+%eO.iQbO7+%fOOQO1G.z1G.zO.yQRO1G.zOOQ`'#DZ'#DZOOQ`7+%r7+%rO/TQbO7+%sOOQ`<lAN>lO&|QbO'#D]OOQ`'#Dh'#DhO0hQbOAN>yO0sQQO'#D_OOQ`AN>yAN>yO0xQbOAN>yO0}QRO,59wO1UQQO,59wOOQ`-E7f-E7fOOQ`G24eG24eO1ZQbOG24eO1`QQO,59yO1eQQO1G/cOOQ`LD*PLD*PO.iQbO1G/eO/TQbO7+$}OOQ`7+%P7+%POOQ`<mAN>mO'PQbO'#D]OOQ`'#Dh'#DhO0pQbOAN>zO0{QQO'#D_OOQ`AN>zAN>zO1QQbOAN>zO1VQRO,59wO1^QQO,59wOOQ`-E7f-E7fOOQ`G24fG24fO1cQbOG24fO1hQQO,59yO1mQQO1G/cOOQ`LD*QLD*QO.lQbO1G/eO/]QbO7+$}OOQ`7+%P7+%POOQ`<i~RzOX#uXY$dYZ$}Zp#upq$dqs#ust%htu'Puw#uwx'Uxy'Zyz'tz{#u{|(_|}#u}!O(_!O!P#u!P!Q+R!Q![(|![!]3n!]!^$}!^#O#u#O#P4X#P#R#u#R#S4^#S#T#u#T#Y4w#Y#Z6V#Z#b4w#b#c:e#c#f4w#f#g;[#g#h4w#h#idS#zUoSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uS$aP;=`<%l#u^$kUoS!_YOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU%UUoS!jQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u^%oZoS!`YOY%hYZ#uZt%htu&buw%hwx&bx#O%h#O#P&b#P;'S%h;'S;=`&y<%lO%hY&gS!`YOY&bZ;'S&b;'S;=`&s<%lO&bY&vP;=`<%l&b^&|P;=`<%l%h~'UO!o~~'ZO!m~U'bUoS!gQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU'{UoS!lQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU(dWoSOt#uuw#ux!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)TYoSrQOt#uuw#ux!O#u!O!P)s!P!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)xWoSOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU*iWoSrQOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU+WWoSOt#uuw#ux!P#u!P!Q+p!Q#O#u#P;'S#u;'S;=`$^<%lO#uU+u^oSOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q#u!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qU,x^oStQOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q0i!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qQ-yXtQOY-tZ!P-t!P!Q.f!Q!}-t!}#O/T#O#P0S#P;'S-t;'S;=`0c<%lO-tQ.iP!P!Q.lQ.qUtQ#Z#[.l#]#^.l#a#b.l#g#h.l#i#j.l#m#n.lQ/WVOY/TZ#O/T#O#P/m#P#Q-t#Q;'S/T;'S;=`/|<%lO/TQ/pSOY/TZ;'S/T;'S;=`/|<%lO/TQ0PP;=`<%l/TQ0VSOY-tZ;'S-t;'S;=`0c<%lO-tQ0fP;=`<%l-tU0nWoSOt#uuw#ux!P#u!P!Q1W!Q#O#u#P;'S#u;'S;=`$^<%lO#uU1_boStQOt#uuw#ux#O#u#P#Z#u#Z#[1W#[#]#u#]#^1W#^#a#u#a#b1W#b#g#u#g#h1W#h#i#u#i#j1W#j#m#u#m#n1W#n;'S#u;'S;=`$^<%lO#uU2l[oSOY2gYZ#uZt2gtu/Tuw2gwx/Tx#O2g#O#P/m#P#Q,q#Q;'S2g;'S;=`3b<%lO2gU3eP;=`<%l2gU3kP;=`<%l,qU3uUoSlQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~4^O!p~U4eUoSwQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU4|YoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#uU5sUyQoSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU6[ZoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#U6}#U#o4w#o;'S#u;'S;=`$^<%lO#uU7S[oSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#`4w#`#a7x#a#o4w#o;'S#u;'S;=`$^<%lO#uU7}[oSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#g4w#g#h8s#h#o4w#o;'S#u;'S;=`$^<%lO#uU8x[oSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#X4w#X#Y9n#Y#o4w#o;'S#u;'S;=`$^<%lO#uU9uYsQoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^:lY!qWoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^;cY!sWoSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^QUzQoSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~>iO!w~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!t~~", 11)], topRules: {"Program":[0,17]}, specialized: [{term: 13, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 762 + tokenPrec: 770 })