Compare commits

...

15 Commits

Author SHA1 Message Date
318142dfbb Update shrimp.ts 2025-10-27 12:45:59 -07:00
ffdd666685 Merge remote-tracking branch 'origin/main' into dotget-function-calls 2025-10-27 12:45:53 -07:00
0fc1f9f895 Merge pull request 'allow more unicode in variable names' (#8) from more-unicode-variable-names into main
Reviewed-on: #8
2025-10-27 19:43:55 +00:00
cdcaf5c9d3 Merge pull request 'failing test for multiline function' (#6) from multiline-fn into main
Reviewed-on: #6
2025-10-27 19:37:40 +00:00
6c8c07e869 Update shrimp.ts 2025-10-27 12:36:29 -07:00
2fcd840493 Merge remote-tracking branch 'origin/main' into multiline-fn 2025-10-27 12:36:12 -07:00
28fab1235c Works with blank lines 2025-10-27 12:07:13 -07:00
cbd3fe6315 Merge pull request 'failing single line if test' (#5) from single-line-if into main
Reviewed-on: #5
2025-10-27 18:31:48 +00:00
6e432dd7a1 Made it work 2025-10-27 11:30:49 -07:00
050acbfaeb Merge remote-tracking branch 'origin/main' into single-line-if 2025-10-27 10:54:33 -07:00
34d1b8b998 Merge pull request 'Add # comments' (#4) from comments into main
Reviewed-on: #4
2025-10-27 17:50:29 +00:00
7cf7ac3703 allow more unicode in variable names 2025-10-26 13:03:17 -07:00
299ad2c9a9 failing test for multiline function 2025-10-25 20:15:55 -07:00
e4100c7d89 failing single line if test 2025-10-25 19:51:57 -07:00
dba8430d9a Add # comments 2025-10-25 19:18:27 -07:00
9 changed files with 313 additions and 24 deletions

View File

@ -303,7 +303,8 @@ export class Compiler {
return instructions
}
case terms.ThenBlock: {
case terms.ThenBlock:
case terms.SingleLineThenBlock: {
const instructions = getAllChildren(node)
.map((child) => this.#compileNode(child, input))
.flat()
@ -468,7 +469,11 @@ export class Compiler {
}
default:
throw new CompilerError(`Unsupported syntax node: ${node.type.name}`, node.from, node.to)
throw new CompilerError(
`Compiler doesn't know how to handle a "${node.type.name}" node.`,
node.from,
node.to
)
}
}
}

View File

@ -85,6 +85,21 @@ describe('compiler', () => {
expect(`bloop = do: 'bloop' end; bloop`).toEvaluateTo('bloop')
})
test('function call with if statement and multiple expressions', () => {
expect(`
abc = do:
if false:
echo nope
end
true
end
abc
`)
.toEvaluateTo(true)
})
test('simple conditionals', () => {
expect(`(3 < 6)`).toEvaluateTo(true)
expect(`(10 > 20)`).toEvaluateTo(false)
@ -139,6 +154,10 @@ describe('compiler', () => {
scattered
end`).toEvaluateTo('dwarf')
})
test('single line if', () => {
expect(`if 3 < 9: shire end`).toEvaluateTo('shire')
})
})
describe('errors', () => {

View File

@ -2,7 +2,7 @@
@context trackScope from "./scopeTracker"
@skip { space }
@skip { space | comment }
@top Program { item* }
@ -18,6 +18,7 @@
newlineOrSemicolon { "\n" | ";" }
eof { @eof }
space { " " | "\t" }
comment { "#" ![\n]* }
leftParen { "(" }
rightParen { ")" }
colon[closedBy="end", @name="colon"] { ":" }
@ -104,7 +105,7 @@ IfExpr {
}
singleLineIf {
@specialize[@name=keyword]<Identifier, "if"> (ConditionalOp | expression) colon ThenBlock { consumeToTerminator }
@specialize[@name=keyword]<Identifier, "if"> (ConditionalOp | expression) colon SingleLineThenBlock @specialize[@name=keyword]<Identifier, "end">
}
multilineIf {
@ -123,6 +124,10 @@ ThenBlock {
block
}
SingleLineThenBlock {
consumeToTerminator
}
ConditionalOp {
expression Eq expression |
expression Neq expression |
@ -199,5 +204,5 @@ expressionWithoutIdentifier {
}
block {
(consumeToTerminator newlineOrSemicolon)*
(consumeToTerminator? newlineOrSemicolon)*
}

View File

@ -42,6 +42,7 @@ export const
NamedArg = 40,
NamedArgPrefix = 41,
IfExpr = 43,
SingleLineThenBlock = 45,
ThenBlock = 46,
ElseIfExpr = 47,
ElseExpr = 49,

View File

@ -7,11 +7,11 @@ import {highlighting} from "./highlight"
const spec_Identifier = {__proto__:null,end:62, null:76, if:88, elseif:96, else:100}
export const parser = LRParser.deserialize({
version: 14,
states: ".jQVQbOOO!QOpO'#CqO#^QcO'#CuO$WQRO'#CvO$fQcO'#DmO$}QbO'#DtOOQ`'#Cx'#CxO%VQbO'#CtO%wOSO'#C|OOQa'#Dr'#DrO&VQcO'#DqOOQ`'#Dn'#DnO&nQbO'#DmO&|QbO'#EQOOQ`'#DX'#DXO'kQRO'#DaOOQ`'#Dm'#DmO'pQQO'#DlOOQ`'#Dl'#DlOOQ`'#Db'#DbQVQbOOO'xObO,59]OOQa'#Dq'#DqOOQ`'#Cs'#CsO(QQbO'#DUOOQ`'#Dp'#DpOOQ`'#Dc'#DcO([QbO,59[O&|QbO,59bO&|QbO,59bOOQ`'#Dd'#DdO(xQbO'#CyO)QQQO,5:`O)qQRO'#CvO*RQRO,59`O*dQRO,59`O*_QQO,59`O+_QQO,59`O+gQbO'#DOO+oQWO'#DPOOOO'#Dz'#DzOOOO'#Df'#DfO,TOSO,59hOOQa,59h,59hO,cQbO'#DgO,kQbO,59ZO,|QRO,5:lO-TQQO,5:lO-YQbO,59{OOQ`,5:W,5:WOOQ`-E7`-E7`OOQa1G.w1G.wOOQ`,59p,59pOOQ`-E7a-E7aOOQa1G.|1G.|O-dQcO1G.|OOQ`-E7b-E7bO.OQbO1G/zO&|QbO,59cO&|QbO,59cOOQa1G.z1G.zOOOO,59j,59jOOOO,59k,59kOOOO-E7d-E7dOOQa1G/S1G/SO!VQbO'#CuOOQ`,5:R,5:ROOQ`-E7e-E7eO.]QbO1G0WOOQ`1G/g1G/gO.jQbO7+%fO.oQbO7+%gOOQO1G.}1G.}O.|QRO1G.}OOQ`'#DZ'#DZOOQ`7+%r7+%rO/WQbO7+%sOOQ`<<IQ<<IQO/kQQO'#DeO/pQbO'#DwO0TQbO<<IROOQ`'#D['#D[O0YQbO<<I_OOQ`,5:P,5:POOQ`-E7c-E7cOOQ`AN>mAN>mO&|QbO'#D]OOQ`'#Dh'#DhO0eQbOAN>yO0pQQO'#D_OOQ`AN>yAN>yO0uQbOAN>yO0zQRO,59wO1RQQO,59wOOQ`-E7f-E7fOOQ`G24eG24eO1WQbOG24eO1]QQO,59yO1bQQO1G/cOOQ`LD*PLD*PO.oQbO1G/eO/WQbO7+$}OOQ`7+%P7+%POOQ`<<Hi<<Hi",
stateData: "1j~O!_OS~O]QO^_O_XO`POaTOfXOtXOuXOvXO|]O!gVO!jbO!mWO~O!ceO~O]fO_XO`POaTOfXOtXOuXOvXOwgOyhO!gVO!mWOziX!jiX!viX!liXoiX~OP!eXQ!eXR!eXS!eXT!eXU!eXV!eXW!eXX!eXY!eXZ!eX[!eX~P!VOPlOQlORmOSmO~OPlOQlORmOSmO!j!aX!v!aXo!aX~O]nOnmP~O]QO_XO`POaTOfXOtXOuXOvXO!gVO!mWO~OqxO!m{O!ovO!pwO~OP!eXQ!eXR!eXS!eX!j!aX!v!aXo!aX~Oz|O!j!aX!v!aXo!aX~O]fO_XO`POfXOtXOuXOvXO!gVO!mWO~OV!QO~O!j!RO!v!RO~O]!TOf!TO~OaTOw!UO~P&|OaTOwgOyhOzda!jda!vda!ldaoda~P&|O]nOnmX~On!ZO~OT!]OU!]OV![OW![OX![OY![OZ![O[![O~OPlOQlORmOSmO~P)VOPlOQlORmOSmO!l!^O~O!l!^OP!eXQ!eXR!eXS!eXT!eXU!eXV!eXW!eXX!eXY!eXZ!eX[!eX~Oz|O!l!^O~O]!_O!gVO~O!m!`O!o!`O!p!`O!q!`O!r!`O!s!`O~OqxO!m!bO!ovO!pwO~O]!cO`PO~Oz|O!jca!vca!lcaoca~On!fO~P)VOn!fO~O^_O|]O~P%VOPlOQlORjiSji!jji!vji!ljioji~O^_O|]O!j!iO~P%VO^_O|]O!j!nO~P%VOo!oO~O^_O|]Oo!kP~P%VO!lkinki~P)VO^_O|]Oo!kP!Q!kP!S!kP~P%VO!j!uO~O^_O|]Oo!kX!Q!kX!S!kX~P%VOo!wO~Oo!|O!Q!xO!S!{O~Oo#RO!Q!xO!S!{O~On#TO~Oo#RO~On#UO~P)VOn#UO~Oo#VO~O!j#WO~O!j#XO~Ofu~",
goto: "+t!vPPPPPPPPPPPPPPPPPPP!w#W#fP$S$X#W$s%Y%f%}PP&QP&i&iPPPP$SPP&mP&y&|'VP'ZP&m'a'g'n't'}(T([PPP(b(f(zP)^)c*^P*y*yP+[PP+dPPPPP+h+hd`Od!Q!Z!f!i!n!q#W#XRtViZOVd|!Q!Z!f!i!n!q#W#XfQOVd!Q!Z!f!i!n!q#W#XdfQ]hklm![!]!c!xR!c|ViQk!czXOQV]dhklm!Q!Z![!]!c!f!i!n!q!x#W#XR!_vdSOd!Q!Z!f!i!n!q#W#XQrVQ!WlR!XmQtVQ!P]Q!j!]R#P!xd`Od!Q!Z!f!i!n!q#W#XUgQk!cQtVR!UhRpT{XOQV]dhklm!Q!Z![!]!c!f!i!n!q!x#W#XTxWze`Od!Q!Z!f!i!n!q#W#XR!m!fQ!t!nQ#Y#WR#Z#XT!y!t!zQ!}!tR#S!zQdOR!SdSkQ!cR!VkQoTR!YoW!q!i!n#W#XR!v!qQzWR!azS}[uR!e}Q!z!tR#Q!zTcOdSaOdQ!g!QQ!h!ZQ!l!fZ!p!i!n!q#W#Xd[Od!Q!Z!f!i!n!q#W#XQuVR!d|VjQk!cdROd!Q!Z!f!i!n!q#W#XUgQk!cQqVQ!O]Q!UhQ!WlQ!XmQ!j![Q!k!]R#O!xdYOd!Q!Z!f!i!n!q#W#XdfQ]hklm![!]!c!xRsVoUOQVdhk!Q!Z!c!f!i!n!q#W#XQ!r!iV!s!n#W#XTyWze^Od!Q!Z!f!i!n!q#W#X",
nodeNames: "⚠ Star Slash Plus Minus And Or Eq Neq Lt Lte Gt Gte Identifier AssignableIdentifier Word IdentifierBeforeDot Do Program PipeExpr FunctionCall DotGet Number PositionalArg ParenExpr FunctionCallOrIdentifier BinOp ConditionalOp FunctionDef Params colon keyword String StringFragment Interpolation EscapeSeq Boolean Regex Null Underscore NamedArg NamedArgPrefix operator IfExpr keyword ThenBlock ThenBlock ElseIfExpr keyword ElseExpr keyword Assign",
maxTerm: 84,
states: ".vQYQbOOO!TOpO'#CqO#aQcO'#CuO$ZQRO'#CvO$iQcO'#DnO%QQbO'#DuOOQ`'#Cx'#CxO%YQbO'#CtO%zOSO'#C|OOQa'#Ds'#DsO&YQcO'#DrOOQ`'#Do'#DoO&qQbO'#DnO'PQbO'#EROOQ`'#DX'#DXO'nQRO'#DaOOQ`'#Dn'#DnO'sQQO'#DmOOQ`'#Dm'#DmOOQ`'#Db'#DbQYQbOOO'{ObO,59]OOQa'#Dr'#DrOOQ`'#Cs'#CsO(TQbO'#DUOOQ`'#Dq'#DqOOQ`'#Dc'#DcO(_QbO,59[O'PQbO,59bO'PQbO,59bOOQ`'#Dd'#DdO({QbO'#CyO)TQQO,5:aO)tQRO'#CvO*UQRO,59`O*gQRO,59`O*bQQO,59`O+bQQO,59`O+jQbO'#DOO+rQWO'#DPOOOO'#D{'#D{OOOO'#Df'#DfO,WOSO,59hOOQa,59h,59hO,fQbO'#DgO,nQbO,59ZO-PQRO,5:mO-WQQO,5:mO-]QbO,59{OOQ`,5:X,5:XOOQ`-E7`-E7`OOQa1G.w1G.wOOQ`,59p,59pOOQ`-E7a-E7aOOQa1G.|1G.|O-gQcO1G.|OOQ`-E7b-E7bO.RQbO1G/{O'PQbO,59cO'PQbO,59cOOQa1G.z1G.zOOOO,59j,59jOOOO,59k,59kOOOO-E7d-E7dOOQa1G/S1G/SO!YQbO'#CuOOQ`,5:R,5:ROOQ`-E7e-E7eO.`QbO1G0XOOQ`1G/g1G/gO.mQbO7+%gO.rQbO7+%hOOQO1G.}1G.}O/SQRO1G.}OOQ`'#DZ'#DZO/^QbO7+%sO/cQbO7+%tOOQ`<<IR<<IROOQ`'#De'#DeO/yQQO'#DeO0OQbO'#DxO0fQbO<<ISOOQ`<<I_<<I_OOQ`'#D['#D[O0kQbO<<I`OOQ`,5:P,5:POOQ`-E7c-E7cOOQ`AN>nAN>nO'PQbO'#D]OOQ`'#Dh'#DhO0vQbOAN>zO1RQQO'#D_OOQ`AN>zAN>zO1WQbOAN>zO1]QRO,59wO1dQQO,59wOOQ`-E7f-E7fOOQ`G24fG24fO1iQbOG24fO1nQQO,59yO1sQQO1G/cOOQ`LD*QLD*QO.rQbO1G/eO/cQbO7+$}OOQ`7+%P7+%POOQ`<<Hi<<Hi",
stateData: "1{~O!_OS!`OS~O]QO^_O_XO`POaTOfXOtXOuXOvXO|]O!hVO!kbO!nWO~O!deO~O]fO_XO`POaTOfXOtXOuXOvXOwgOyhO!hVO!nWOziX!kiX!wiX!miXoiX~OP!fXQ!fXR!fXS!fXT!fXU!fXV!fXW!fXX!fXY!fXZ!fX[!fX~P!YOPlOQlORmOSmO~OPlOQlORmOSmO!k!bX!w!bXo!bX~O]nOnmP~O]QO_XO`POaTOfXOtXOuXOvXO!hVO!nWO~OqxO!n{O!pvO!qwO~OP!fXQ!fXR!fXS!fX!k!bX!w!bXo!bX~Oz|O!k!bX!w!bXo!bX~O]fO_XO`POfXOtXOuXOvXO!hVO!nWO~OV!QO~O!k!RO!w!RO~O]!TOf!TO~OaTOw!UO~P'POaTOwgOyhOzda!kda!wda!mdaoda~P'PO]nOnmX~On!ZO~OT!]OU!]OV![OW![OX![OY![OZ![O[![O~OPlOQlORmOSmO~P)YOPlOQlORmOSmO!m!^O~O!m!^OP!fXQ!fXR!fXS!fXT!fXU!fXV!fXW!fXX!fXY!fXZ!fX[!fX~Oz|O!m!^O~O]!_O!hVO~O!n!`O!p!`O!q!`O!r!`O!s!`O!t!`O~OqxO!n!bO!pvO!qwO~O]!cO`PO~Oz|O!kca!wca!mcaoca~On!fO~P)YOn!fO~O^_O|]O~P%YOPlOQlORjiSji!kji!wji!mjioji~O^_O|]O!k!iO~P%YO^_O|]O!k!nO~P%YOo!oO~O^_O|]O!k!pOo!lP~P%YO!mkinki~P)YOo!tO~O^_O|]O!k!pOo!lP!Q!lP!S!lP~P%YO!k!wO~O^_O|]O!k!pOo!lX!Q!lX!S!lX~P%YOo!yO~Oo#OO!Q!zO!S!}O~Oo#TO!Q!zO!S!}O~On#VO~Oo#TO~On#WO~P)YOn#WO~Oo#XO~O!k#YO~O!k#ZO~Ofu~",
goto: "+u!wPPPPPPPPPPPPPPPPPPP!x#X#gP$T$Y#X$t%Z%g&OPP&RP&j&jPPPP$TPP&nP&z&}'WP'[P&n'b'h'o'u(O(U(]PPPP(c(g({P)_)d*_P*z*zP+]PP+ePPPPP+i+id`Od!Q!Z!f!i!n!r#Y#ZRtViZOVd|!Q!Z!f!i!n!r#Y#ZfQOVd!Q!Z!f!i!n!r#Y#ZdfQ]hklm![!]!c!zR!c|ViQk!czXOQV]dhklm!Q!Z![!]!c!f!i!n!r!z#Y#ZR!_vdSOd!Q!Z!f!i!n!r#Y#ZQrVQ!WlR!XmQtVQ!P]Q!j!]R#R!zd`Od!Q!Z!f!i!n!r#Y#ZUgQk!cQtVR!UhRpT{XOQV]dhklm!Q!Z![!]!c!f!i!n!r!z#Y#ZTxWze`Od!Q!Z!f!i!n!r#Y#ZR!m!fQ!v!nQ#[#YR#]#ZT!{!v!|Q#P!vR#U!|QdOR!SdSkQ!cR!VkQoTR!YoW!r!i!n#Y#ZR!x!rQzWR!azS}[uR!e}Q!|!vR#S!|TcOdSaOdQ!g!QQ!h!ZQ!l!fZ!q!i!n!r#Y#Zd[Od!Q!Z!f!i!n!r#Y#ZQuVR!d|VjQk!cdROd!Q!Z!f!i!n!r#Y#ZUgQk!cQqVQ!O]Q!UhQ!WlQ!XmQ!j![Q!k!]R#Q!zdYOd!Q!Z!f!i!n!r#Y#ZdfQ]hklm![!]!c!zRsVoUOQVdhk!Q!Z!c!f!i!n!r#Y#ZQ!s!iV!u!n#Y#ZTyWze^Od!Q!Z!f!i!n!r#Y#Z",
nodeNames: "⚠ Star Slash Plus Minus And Or Eq Neq Lt Lte Gt Gte Identifier AssignableIdentifier Word IdentifierBeforeDot Do Program PipeExpr FunctionCall DotGet Number PositionalArg ParenExpr FunctionCallOrIdentifier BinOp ConditionalOp FunctionDef Params colon keyword String StringFragment Interpolation EscapeSeq Boolean Regex Null Underscore NamedArg NamedArgPrefix operator IfExpr keyword SingleLineThenBlock ThenBlock ElseIfExpr keyword ElseExpr keyword Assign",
maxTerm: 85,
context: trackScope,
nodeProps: [
["closedBy", 30,"end"]
@ -19,9 +19,9 @@ export const parser = LRParser.deserialize({
propSources: [highlighting],
skippedNodes: [0],
repeatNodeCount: 7,
tokenData: "<}~RyOX#rXY$aYZ$zZp#rpq$aqt#rtu%euw#rwx%jxy%oyz&Yz{#r{|&s|}#r}!O&s!O!P#r!P!Q)g!Q!['b![!]2S!]!^$z!^#O#r#O#P2m#P#R#r#R#S2r#S#T#r#T#Y3]#Y#Z4k#Z#b3]#b#c8y#c#f3]#f#g9p#g#h3]#h#i:g#i#o3]#o#p#r#p#q<_#q;'S#r;'S;=`$Z<%l~#r~O#r~~<xS#wUqSOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#rS$^P;=`<%l#r^$hUqS!_YOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#rU%RUqS!jQOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#r~%jO!o~~%oO!m~U%vUqS!gQOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#rU&aUqS!lQOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#rU&xWqSOt#ruw#rx!Q#r!Q!['b![#O#r#P;'S#r;'S;=`$Z<%lO#rU'iYqSfQOt#ruw#rx!O#r!O!P(X!P!Q#r!Q!['b![#O#r#P;'S#r;'S;=`$Z<%lO#rU(^WqSOt#ruw#rx!Q#r!Q![(v![#O#r#P;'S#r;'S;=`$Z<%lO#rU(}WqSfQOt#ruw#rx!Q#r!Q![(v![#O#r#P;'S#r;'S;=`$Z<%lO#rU)lWqSOt#ruw#rx!P#r!P!Q*U!Q#O#r#P;'S#r;'S;=`$Z<%lO#rU*Z^qSOY+VYZ#rZt+Vtu,Yuw+Vwx,Yx!P+V!P!Q#r!Q!}+V!}#O0{#O#P.h#P;'S+V;'S;=`1|<%lO+VU+^^qSuQOY+VYZ#rZt+Vtu,Yuw+Vwx,Yx!P+V!P!Q.}!Q!}+V!}#O0{#O#P.h#P;'S+V;'S;=`1|<%lO+VQ,_XuQOY,YZ!P,Y!P!Q,z!Q!},Y!}#O-i#O#P.h#P;'S,Y;'S;=`.w<%lO,YQ,}P!P!Q-QQ-VUuQ#Z#[-Q#]#^-Q#a#b-Q#g#h-Q#i#j-Q#m#n-QQ-lVOY-iZ#O-i#O#P.R#P#Q,Y#Q;'S-i;'S;=`.b<%lO-iQ.USOY-iZ;'S-i;'S;=`.b<%lO-iQ.eP;=`<%l-iQ.kSOY,YZ;'S,Y;'S;=`.w<%lO,YQ.zP;=`<%l,YU/SWqSOt#ruw#rx!P#r!P!Q/l!Q#O#r#P;'S#r;'S;=`$Z<%lO#rU/sbqSuQOt#ruw#rx#O#r#P#Z#r#Z#[/l#[#]#r#]#^/l#^#a#r#a#b/l#b#g#r#g#h/l#h#i#r#i#j/l#j#m#r#m#n/l#n;'S#r;'S;=`$Z<%lO#rU1Q[qSOY0{YZ#rZt0{tu-iuw0{wx-ix#O0{#O#P.R#P#Q+V#Q;'S0{;'S;=`1v<%lO0{U1yP;=`<%l0{U2PP;=`<%l+VU2ZUqSnQOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#r~2rO!p~U2yUqSwQOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#rU3bYqSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#o3]#o;'S#r;'S;=`$Z<%lO#rU4XUyQqSOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#rU4pZqSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#U5c#U#o3]#o;'S#r;'S;=`$Z<%lO#rU5h[qSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#`3]#`#a6^#a#o3]#o;'S#r;'S;=`$Z<%lO#rU6c[qSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#g3]#g#h7X#h#o3]#o;'S#r;'S;=`$Z<%lO#rU7^[qSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#X3]#X#Y8S#Y#o3]#o;'S#r;'S;=`$Z<%lO#rU8ZYtQqSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#o3]#o;'S#r;'S;=`$Z<%lO#r^9QY!qWqSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#o3]#o;'S#r;'S;=`$Z<%lO#r^9wY!sWqSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#o3]#o;'S#r;'S;=`$Z<%lO#r^:n[!rWqSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#f3]#f#g;d#g#o3]#o;'S#r;'S;=`$Z<%lO#rU;i[qSOt#ruw#rx!_#r!_!`4Q!`#O#r#P#T#r#T#i3]#i#j7X#j#o3]#o;'S#r;'S;=`$Z<%lO#rU<fUzQqSOt#ruw#rx#O#r#P;'S#r;'S;=`$Z<%lO#r~<}O!v~",
tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!c~~", 11)],
tokenData: ">i~RzOX#uXY$dYZ$}Zp#upq$dqs#ust%htu'Puw#uwx'Uxy'Zyz'tz{#u{|(_|}#u}!O(_!O!P#u!P!Q+R!Q![(|![!]3n!]!^$}!^#O#u#O#P4X#P#R#u#R#S4^#S#T#u#T#Y4w#Y#Z6V#Z#b4w#b#c:e#c#f4w#f#g;[#g#h4w#h#i<R#i#o4w#o#p#u#p#q=y#q;'S#u;'S;=`$^<%l~#u~O#u~~>dS#zUqSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uS$aP;=`<%l#u^$kUqS!_YOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU%UUqS!kQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u^%oZqS!`YOY%hYZ#uZt%htu&buw%hwx&bx#O%h#O#P&b#P;'S%h;'S;=`&y<%lO%hY&gS!`YOY&bZ;'S&b;'S;=`&s<%lO&bY&vP;=`<%l&b^&|P;=`<%l%h~'UO!p~~'ZO!n~U'bUqS!hQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU'{UqS!mQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU(dWqSOt#uuw#ux!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)TYqSfQOt#uuw#ux!O#u!O!P)s!P!Q#u!Q![(|![#O#u#P;'S#u;'S;=`$^<%lO#uU)xWqSOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU*iWqSfQOt#uuw#ux!Q#u!Q![*b![#O#u#P;'S#u;'S;=`$^<%lO#uU+WWqSOt#uuw#ux!P#u!P!Q+p!Q#O#u#P;'S#u;'S;=`$^<%lO#uU+u^qSOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q#u!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qU,x^qSuQOY,qYZ#uZt,qtu-tuw,qwx-tx!P,q!P!Q0i!Q!},q!}#O2g#O#P0S#P;'S,q;'S;=`3h<%lO,qQ-yXuQOY-tZ!P-t!P!Q.f!Q!}-t!}#O/T#O#P0S#P;'S-t;'S;=`0c<%lO-tQ.iP!P!Q.lQ.qUuQ#Z#[.l#]#^.l#a#b.l#g#h.l#i#j.l#m#n.lQ/WVOY/TZ#O/T#O#P/m#P#Q-t#Q;'S/T;'S;=`/|<%lO/TQ/pSOY/TZ;'S/T;'S;=`/|<%lO/TQ0PP;=`<%l/TQ0VSOY-tZ;'S-t;'S;=`0c<%lO-tQ0fP;=`<%l-tU0nWqSOt#uuw#ux!P#u!P!Q1W!Q#O#u#P;'S#u;'S;=`$^<%lO#uU1_bqSuQOt#uuw#ux#O#u#P#Z#u#Z#[1W#[#]#u#]#^1W#^#a#u#a#b1W#b#g#u#g#h1W#h#i#u#i#j1W#j#m#u#m#n1W#n;'S#u;'S;=`$^<%lO#uU2l[qSOY2gYZ#uZt2gtu/Tuw2gwx/Tx#O2g#O#P/m#P#Q,q#Q;'S2g;'S;=`3b<%lO2gU3eP;=`<%l2gU3kP;=`<%l,qU3uUqSnQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~4^O!q~U4eUqSwQOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU4|YqSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#uU5sUyQqSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#uU6[ZqSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#U6}#U#o4w#o;'S#u;'S;=`$^<%lO#uU7S[qSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#`4w#`#a7x#a#o4w#o;'S#u;'S;=`$^<%lO#uU7}[qSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#g4w#g#h8s#h#o4w#o;'S#u;'S;=`$^<%lO#uU8x[qSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#X4w#X#Y9n#Y#o4w#o;'S#u;'S;=`$^<%lO#uU9uYtQqSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^:lY!rWqSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^;cY!tWqSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#o4w#o;'S#u;'S;=`$^<%lO#u^<Y[!sWqSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#f4w#f#g=O#g#o4w#o;'S#u;'S;=`$^<%lO#uU=T[qSOt#uuw#ux!_#u!_!`5l!`#O#u#P#T#u#T#i4w#i#j8s#j#o4w#o;'S#u;'S;=`$^<%lO#uU>QUzQqSOt#uuw#ux#O#u#P;'S#u;'S;=`$^<%lO#u~>iO!w~",
tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO!d~~", 11)],
topRules: {"Program":[0,18]},
specialized: [{term: 13, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 13, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}],
tokenPrec: 759
tokenPrec: 776
})

View File

@ -30,6 +30,204 @@ describe('Identifier', () => {
FunctionCallOrIdentifier
Identifier moo-😊-34`)
})
test('parses mathematical unicode symbols like 𝜋 as identifiers', () => {
expect('𝜋').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝜋`)
})
})
describe('Unicode Symbol Support', () => {
describe('Emoji (currently supported)', () => {
test('Basic Emoticons (U+1F600-U+1F64F)', () => {
expect('😀').toMatchTree(`
FunctionCallOrIdentifier
Identifier 😀`)
expect('😊-counter').toMatchTree(`
FunctionCallOrIdentifier
Identifier 😊-counter`)
})
test('Miscellaneous Symbols and Pictographs (U+1F300-U+1F5FF)', () => {
expect('🌍').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🌍`)
expect('🔥-handler').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🔥-handler`)
})
test('Transport and Map Symbols (U+1F680-U+1F6FF)', () => {
expect('🚀').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🚀`)
expect('🚀-launch').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🚀-launch`)
})
test('Regional Indicator Symbols / Flags (U+1F1E6-U+1F1FF)', () => {
// Note: Flags are typically two regional indicators combined
expect('🇺').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🇺`)
})
test('Supplemental Symbols and Pictographs (U+1F900-U+1F9FF)', () => {
expect('🤖').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🤖`)
expect('🦀-lang').toMatchTree(`
FunctionCallOrIdentifier
Identifier 🦀-lang`)
})
test('Dingbats (U+2700-U+27BF)', () => {
expect('✂').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
expect('✨-magic').toMatchTree(`
FunctionCallOrIdentifier
Identifier -magic`)
})
test('Miscellaneous Symbols (U+2600-U+26FF)', () => {
expect('⚡').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
expect('☀-bright').toMatchTree(`
FunctionCallOrIdentifier
Identifier -bright`)
})
})
describe('Greek Letters (not currently supported)', () => {
test('Greek lowercase alpha α (U+03B1)', () => {
expect('α').toMatchTree(`
FunctionCallOrIdentifier
Identifier α`)
})
test('Greek lowercase beta β (U+03B2)', () => {
expect('β').toMatchTree(`
FunctionCallOrIdentifier
Identifier β`)
})
test('Greek lowercase lambda λ (U+03BB)', () => {
expect('λ').toMatchTree(`
FunctionCallOrIdentifier
Identifier λ`)
})
test('Greek lowercase pi π (U+03C0)', () => {
// Note: This is different from mathematical pi 𝜋
expect('π').toMatchTree(`
FunctionCallOrIdentifier
Identifier π`)
})
})
describe('Mathematical Alphanumeric Symbols (not currently supported)', () => {
test('Mathematical italic small pi 𝜋 (U+1D70B)', () => {
expect('𝜋').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝜋`)
})
test('Mathematical bold small x 𝐱 (U+1D431)', () => {
expect('𝐱').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝐱`)
})
test('Mathematical script capital F 𝓕 (U+1D4D5)', () => {
expect('𝓕').toMatchTree(`
FunctionCallOrIdentifier
Identifier 𝓕`)
})
})
describe('Mathematical Operators (not currently supported)', () => {
test('Infinity symbol ∞ (U+221E)', () => {
expect('∞').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Sum symbol ∑ (U+2211)', () => {
expect('∑').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Integral symbol ∫ (U+222B)', () => {
expect('∫').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
})
describe('Superscripts and Subscripts (not currently supported)', () => {
test('Superscript two ² (U+00B2)', () => {
expect('x²').toMatchTree(`
FunctionCallOrIdentifier
Identifier x²`)
})
test('Subscript two ₂ (U+2082)', () => {
expect('h₂o').toMatchTree(`
FunctionCallOrIdentifier
Identifier ho`)
})
})
describe('Arrows (not currently supported)', () => {
test('Rightward arrow → (U+2192)', () => {
expect('→').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Leftward arrow ← (U+2190)', () => {
expect('←').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Double rightward arrow ⇒ (U+21D2)', () => {
expect('⇒').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
})
describe('CJK Symbols (not currently supported)', () => {
test('Hiragana あ (U+3042)', () => {
expect('あ').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('Katakana カ (U+30AB)', () => {
expect('カ').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
test('CJK Unified Ideograph 中 (U+4E2D)', () => {
expect('中').toMatchTree(`
FunctionCallOrIdentifier
Identifier `)
})
})
})
describe('Parentheses', () => {
@ -349,3 +547,27 @@ describe('DotGet whitespace sensitivity', () => {
expect('readme.txt').toMatchTree(`Word readme.txt`)
})
})
describe('Comments', () => {
test('are barely there', () => {
expect(`x = 5 # one banana\ny = 2 # two bananas`).toMatchTree(`
Assign
AssignableIdentifier x
Eq =
Number 5
Assign
AssignableIdentifier y
Eq =
Number 2`)
expect('# some comment\nbasename = 5 # very astute\n basename / prop\n# good info').toMatchTree(`
Assign
AssignableIdentifier basename
Eq =
Number 5
BinOp
Identifier basename
Slash /
Identifier prop`)
})
})

View File

@ -4,7 +4,7 @@ import '../shrimp.grammar' // Importing this so changes cause it to retest!
describe('if/elseif/else', () => {
test('parses single line if', () => {
expect(`if y = 1: 'cool'`).toMatchTree(`
expect(`if y = 1: 'cool' end`).toMatchTree(`
IfExpr
keyword if
ConditionalOp
@ -12,12 +12,13 @@ describe('if/elseif/else', () => {
Eq =
Number 1
colon :
ThenBlock
SingleLineThenBlock
String
StringFragment cool
keyword end
`)
expect('a = if x: 2').toMatchTree(`
expect('a = if x: 2 end').toMatchTree(`
Assign
AssignableIdentifier a
Eq =
@ -25,8 +26,9 @@ describe('if/elseif/else', () => {
keyword if
Identifier x
colon :
ThenBlock
SingleLineThenBlock
Number 2
keyword end
`)
})
@ -138,7 +140,7 @@ describe('if/elseif/else', () => {
})
test('does not parse identifiers that start with if', () => {
expect('iffy = if true: 2').toMatchTree(`
expect('iffy = if true: 2 end').toMatchTree(`
Assign
AssignableIdentifier iffy
Eq =
@ -146,8 +148,9 @@ describe('if/elseif/else', () => {
keyword if
Boolean true
colon :
ThenBlock
SingleLineThenBlock
Number 2
keyword end
`)
})
})

View File

@ -71,4 +71,20 @@ end
keyword end
`)
})
test('multiline with empty lines', () => {
expect(`
do:
2
end
`).toMatchTree(`
FunctionDef
keyword do
Params
colon :
Number 2
keyword end
`)
})
})

View File

@ -19,7 +19,7 @@ export const tokenizer = new ExternalTokenizer(
// Don't consume things that start with - or + followed by a digit (negative/positive numbers)
if ((ch === 45 /* - */ || ch === 43) /* + */ && isDigit(input.peek(1))) return
const isValidStart = isLowercaseLetter(ch) || isEmoji(ch)
const isValidStart = isLowercaseLetter(ch) || isEmojiOrUnicode(ch)
const canBeWord = stack.canShift(Word)
// Consume all word characters, tracking if it remains a valid identifier
@ -111,8 +111,8 @@ const consumeWordToken = (
if (!isWordChar(nextCh)) break
}
// Track identifier validity: must be lowercase, digit, dash, or emoji
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmoji(ch)) {
// Track identifier validity: must be lowercase, digit, dash, or emoji/unicode
if (!isLowercaseLetter(ch) && !isDigit(ch) && ch !== 45 /* - */ && !isEmojiOrUnicode(ch)) {
if (!canBeWord) break
isValidIdentifier = false
}
@ -222,7 +222,7 @@ const getFullCodePoint = (input: InputStream, pos: number): number => {
return ch
}
const isEmoji = (ch: number): boolean => {
const isEmojiOrUnicode = (ch: number): boolean => {
return (
// Basic Emoticons
(ch >= 0x1f600 && ch <= 0x1f64f) ||
@ -247,7 +247,25 @@ const isEmoji = (ch: number): boolean => {
// Additional miscellaneous items
(ch >= 0x238c && ch <= 0x2454) ||
// Combining Diacritical Marks for Symbols
(ch >= 0x20d0 && ch <= 0x20ff)
(ch >= 0x20d0 && ch <= 0x20ff) ||
// Latin-1 Supplement (includes ², ³, ¹ and other special chars)
(ch >= 0x00a0 && ch <= 0x00ff) ||
// Greek and Coptic (U+0370-U+03FF)
(ch >= 0x0370 && ch <= 0x03ff) ||
// Mathematical Alphanumeric Symbols (U+1D400-U+1D7FF)
(ch >= 0x1d400 && ch <= 0x1d7ff) ||
// Mathematical Operators (U+2200-U+22FF)
(ch >= 0x2200 && ch <= 0x22ff) ||
// Superscripts and Subscripts (U+2070-U+209F)
(ch >= 0x2070 && ch <= 0x209f) ||
// Arrows (U+2190-U+21FF)
(ch >= 0x2190 && ch <= 0x21ff) ||
// Hiragana (U+3040-U+309F)
(ch >= 0x3040 && ch <= 0x309f) ||
// Katakana (U+30A0-U+30FF)
(ch >= 0x30a0 && ch <= 0x30ff) ||
// CJK Unified Ideographs (U+4E00-U+9FFF)
(ch >= 0x4e00 && ch <= 0x9fff)
)
}