From 6dd4bed399303c52adc65e17a927c56cf9d768b7 Mon Sep 17 00:00:00 2001 From: Chris Wanstrath Date: Sun, 9 Nov 2025 23:58:47 -0800 Subject: [PATCH] allow lines to start with | (pipes) --- src/parser/shrimp.grammar | 7 +- src/parser/shrimp.terms.ts | 90 +++++++++++----------- src/parser/shrimp.ts | 26 +++---- src/parser/tests/pipes.test.ts | 124 +++++++++++++++++++++++++++++++ src/parser/tests/strings.test.ts | 9 +-- src/parser/tokenizer.ts | 33 +++++++- src/utils/tree.ts | 3 +- 7 files changed, 224 insertions(+), 68 deletions(-) diff --git a/src/parser/shrimp.grammar b/src/parser/shrimp.grammar index 80adcdb..4a8ee5a 100644 --- a/src/parser/shrimp.grammar +++ b/src/parser/shrimp.grammar @@ -21,7 +21,7 @@ ("-" | "+")? $[0-9]+ ("_"? $[0-9]+)* ('.' $[0-9]+ ("_"? $[0-9]+)*)? } Boolean { "true" | "false" } - newlineOrSemicolon { "\n" | ";" } + semicolon { ";" } eof { @eof } space { " " | "\t" } Comment { "#" ![\n]* } @@ -33,6 +33,8 @@ "|"[@name=operator] } +newlineOrSemicolon { newline | semicolon } + end { @specialize[@name=keyword] } while { @specialize[@name=keyword] } if { @specialize[@name=keyword] } @@ -45,6 +47,7 @@ import { @specialize[@name=keyword] } null { @specialize[@name=Null] } @external tokens tokenizer from "./tokenizer" { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, CurlyString } +@external tokens pipeStartsLineTokenizer from "./tokenizer" { newline, PipeStartsLine } @external specialize {Identifier} specializeKeyword from "./tokenizer" { Do } @precedence { @@ -84,7 +87,7 @@ consumeToTerminator { } PipeExpr { - pipeOperand (!pipe "|" pipeOperand)+ + pipeOperand (!pipe (PipeStartsLine? "|") newlineOrSemicolon* pipeOperand)+ } pipeOperand { diff --git a/src/parser/shrimp.terms.ts b/src/parser/shrimp.terms.ts index 0f3ba5b..d9799da 100644 --- a/src/parser/shrimp.terms.ts +++ b/src/parser/shrimp.terms.ts @@ -32,47 +32,49 @@ export const Word = 30, IdentifierBeforeDot = 31, CurlyString = 32, - Do = 33, - Comment = 34, - Program = 35, - PipeExpr = 36, - WhileExpr = 38, - keyword = 83, - ConditionalOp = 40, - ParenExpr = 41, - FunctionCallWithNewlines = 42, - DotGet = 43, - Number = 44, - PositionalArg = 45, - FunctionDef = 46, - Params = 47, - NamedParam = 48, - NamedArgPrefix = 49, - String = 50, - StringFragment = 51, - Interpolation = 52, - EscapeSeq = 53, - DoubleQuote = 54, - Boolean = 55, - Null = 56, - colon = 57, - CatchExpr = 58, - Block = 60, - FinallyExpr = 61, - Underscore = 64, - NamedArg = 65, - IfExpr = 66, - FunctionCall = 68, - ElseIfExpr = 69, - ElseExpr = 71, - FunctionCallOrIdentifier = 72, - BinOp = 73, - Regex = 74, - Dict = 75, - Array = 76, - FunctionCallWithBlock = 77, - TryExpr = 78, - Throw = 80, - Import = 82, - CompoundAssign = 84, - Assign = 85 + newline = 101, + PipeStartsLine = 33, + Do = 34, + Comment = 35, + Program = 36, + PipeExpr = 37, + WhileExpr = 39, + keyword = 84, + ConditionalOp = 41, + ParenExpr = 42, + FunctionCallWithNewlines = 43, + DotGet = 44, + Number = 45, + PositionalArg = 46, + FunctionDef = 47, + Params = 48, + NamedParam = 49, + NamedArgPrefix = 50, + String = 51, + StringFragment = 52, + Interpolation = 53, + EscapeSeq = 54, + DoubleQuote = 55, + Boolean = 56, + Null = 57, + colon = 58, + CatchExpr = 59, + Block = 61, + FinallyExpr = 62, + Underscore = 65, + NamedArg = 66, + IfExpr = 67, + FunctionCall = 69, + ElseIfExpr = 70, + ElseExpr = 72, + FunctionCallOrIdentifier = 73, + BinOp = 74, + Regex = 75, + Dict = 76, + Array = 77, + FunctionCallWithBlock = 78, + TryExpr = 79, + Throw = 81, + Import = 83, + CompoundAssign = 85, + Assign = 86 diff --git a/src/parser/shrimp.ts b/src/parser/shrimp.ts index 051f00e..783d0a3 100644 --- a/src/parser/shrimp.ts +++ b/src/parser/shrimp.ts @@ -1,27 +1,27 @@ // This file was generated by lezer-generator. You probably shouldn't edit it. import {LRParser, LocalTokenGroup} from "@lezer/lr" import {operatorTokenizer} from "./operatorTokenizer" -import {tokenizer, specializeKeyword} from "./tokenizer" +import {tokenizer, pipeStartsLineTokenizer, specializeKeyword} from "./tokenizer" import {trackScope} from "./parserScopeContext" import {highlighting} from "./highlight" -const spec_Identifier = {__proto__:null,while:78, null:112, catch:118, finally:124, end:126, if:134, else:140, try:158, throw:162, import:166} +const spec_Identifier = {__proto__:null,while:80, null:114, catch:120, finally:126, end:128, if:136, else:142, try:160, throw:164, import:168} export const parser = LRParser.deserialize({ version: 14, - states: "=|QYQbOOO!mOpO'#DXO!rOSO'#D`OOQa'#D`'#D`O%mQcO'#DvO(mQcO'#EiOOQ`'#Ew'#EwO)WQRO'#DwO+]QcO'#EgO+vQbO'#DVOOQa'#Dy'#DyO.[QbO'#DzOOQa'#Ei'#EiO.cQcO'#EiO0aQcO'#EhO1fQcO'#EgO1sQRO'#ESOOQ`'#Eg'#EgO2[QbO'#EgO2cQQO'#EfOOQ`'#Ef'#EfOOQ`'#EU'#EUQYQbOOO2nQbO'#D[O2yQbO'#DpO3tQbO'#DSO4oQQO'#D|O3tQbO'#EOO4tQbO'#EQO4|ObO,59sO5XQbO'#DbO5aQWO'#DcOOOO'#Eo'#EoOOOO'#EZ'#EZO5uOSO,59zOOQa,59z,59zOOQ`'#DZ'#DZO6TQbO'#DoOOQ`'#Em'#EmOOQ`'#E^'#E^O6_QbO,5:^OOQa'#Eh'#EhO3tQbO,5:cO3tQbO,5:cO3tQbO,5:cO3tQbO,5:cO3tQbO,59pO3tQbO,59pO3tQbO,59pO3tQbO,59pOOQ`'#EW'#EWO+vQbO,59qO7XQcO'#DvO7`QcO'#EiO7gQRO,59qO7qQQO,59qO7vQQO,59qO8OQQO,59qO8ZQRO,59qO8sQRO,59qO8zQQO'#DQO9PQbO,5:fO9WQQO,5:eOOQa,5:f,5:fO9cQbO,5:fO9mQbO,5:oO9mQbO,5:nO:}QbO,5:gO;UQbO,59lOOQ`,5;Q,5;QO9mQbO'#EVOOQ`-E8S-E8SOOQ`'#EX'#EXO;pQbO'#D]O;{QbO'#D^OOQO'#EY'#EYO;sQQO'#D]O`QRO'#EvOOQO'#Ev'#EvO>gQQO,5:[O>lQRO,59nO>sQRO,59nO:}QbO,5:hO?RQcO,5:jO@aQcO,5:jO@}QcO,5:jOArQbO,5:lOOQ`'#Eb'#EbO4tQbO,5:lOOQa1G/_1G/_OOOO,59|,59|OOOO,59},59}OOOO-E8X-E8XOOQa1G/f1G/fOOQ`,5:Z,5:ZOOQ`-E8[-E8[OOQa1G/}1G/}OCkQcO1G/}OCuQcO1G/}OETQcO1G/}OE_QcO1G/}OElQcO1G/}OOQa1G/[1G/[OF}QcO1G/[OGUQcO1G/[OG]QcO1G/[OH[QcO1G/[OGdQcO1G/[OOQ`-E8U-E8UOHrQRO1G/]OH|QQO1G/]OIRQQO1G/]OIZQQO1G/]OIfQRO1G/]OImQRO1G/]OItQbO,59rOJOQQO1G/]OOQa1G/]1G/]OJWQQO1G0POOQa1G0Q1G0QOJcQbO1G0QOOQO'#E`'#E`OJWQQO1G0POOQa1G0P1G0POOQ`'#Ea'#EaOJcQbO1G0QOJmQbO1G0ZOKXQbO1G0YOKsQbO'#DjOLUQbO'#DjOLiQbO1G0ROOQ`-E8T-E8TOOQ`,5:q,5:qOOQ`-E8V-E8VOLtQQO,59wOOQO,59x,59xOOQO-E8W-E8WOL|QbO1G/bO:}QbO1G/vO:}QbO1G/YOMTQbO1G0SOM`QbO1G0WOM}QbO1G0WOOQ`-E8`-E8`ONUQQO7+$wOOQa7+$w7+$wON^QQO1G/^ONfQQO7+%kOOQa7+%k7+%kONqQbO7+%lOOQa7+%l7+%lOOQO-E8^-E8^OOQ`-E8_-E8_OOQ`'#E['#E[ON{QQO'#E[O! TQbO'#EuOOQ`,5:U,5:UO! hQbO'#DhO! mQQO'#DkOOQ`7+%m7+%mO! rQbO7+%mO! wQbO7+%mO!!PQbO7+$|O!!_QbO7+$|O!!oQbO7+%bO!!wQbO7+$tOOQ`7+%n7+%nO!!|QbO7+%nO!#RQbO7+%nO!#ZQbO7+%rOOQa<sAN>sOOQ`AN>SAN>SO!%zQbOAN>SO!&PQbOAN>SOOQ`-E8]-E8]OOQ`AN>hAN>hO!&XQbOAN>hO2yQbO,5:_O:}QbO,5:aOOQ`AN>tAN>tPItQbO'#EWOOQ`7+%Y7+%YOOQ`G23nG23nO!&^QbOG23nP!%^QbO'#DsOOQ`G24SG24SO!&cQQO1G/yOOQ`1G/{1G/{OOQ`LD)YLD)YO:}QbO7+%eOOQ`<xQYQ!SOOOOQ!Q'#Ej'#EjO!pO!bO'#DYO!uOSO'#DaOOQ!R'#Da'#DaO%sQ!TO'#DwO(yQ!TO'#EmOOQ!Q'#Ez'#EzO)gQRO'#DxO+oQ!TO'#EiO,]Q!SO'#DWOOQ!R'#Dz'#DzO.wQ!SO'#D{OOQ!R'#Em'#EmO/OQ!TO'#EmO1SQ!TO'#ElO2bQ!TO'#EiO2oQRO'#ETOOQ!Q'#Ei'#EiO3WQ!SO'#EiO3_QrO'#EhOOQ!Q'#Eh'#EhOOQ!Q'#EV'#EVQYQ!SOOO3pQbO'#D]O3{QbO'#DqO4vQbO'#DTO5qQQO'#D}O4vQbO'#EPO5vQbO'#ERO6OObO,59tO6ZQbO'#DcO6cQWO'#DdOOOO'#Er'#ErOOOO'#E['#E[O6wOSO,59{OOQ!R,59{,59{OOQ!Q'#D['#D[O7VQbO'#DpOOQ!Q'#Ep'#EpOOQ!Q'#E_'#E_O7aQ!SO,5:_OOQ!R'#El'#ElO4vQbO,5:dO4vQbO,5:dO4vQbO,5:dO4vQbO,5:dO4vQbO,59qO4vQbO,59qO4vQbO,59qO4vQbO,59qOOQ!Q'#EX'#EXO,]Q!SO,59rO8aQ!TO'#DwO8kQ!TO'#EmO8uQsO,59rO9SQQO,59rO9XQrO,59rO9dQrO,59rO9rQsO,59rO:bQsO,59rO:iQrO'#DRO:qQ!SO,5:gO:xQrO,5:fOOQ!R,5:g,5:gO;WQ!SO,5:gO;eQbO,5:pO;eQbO,5:oOYQ!SO,5:hO]QQO,59wO>bQcO'#ElO?_QRO'#EyO@[QRO'#EyOOQO'#Ey'#EyO@cQQO,5:]O@hQRO,59oO@oQRO,59oOYQ!SO,5:iO@}Q!TO,5:kOBcQ!TO,5:kOCVQ!TO,5:kOCdQ!SO,5:mOOQ!Q'#Ec'#EcO5vQbO,5:mOOQ!R1G/`1G/`OOOO,59},59}OOOO,5:O,5:OOOOO-E8Y-E8YOOQ!R1G/g1G/gOOQ!Q,5:[,5:[OOQ!Q-E8]-E8]OOQ!R1G0O1G0OOEiQ!TO1G0OOEsQ!TO1G0OOGXQ!TO1G0OOGcQ!TO1G0OOGpQ!TO1G0OOOQ!R1G/]1G/]OIXQ!TO1G/]OI`Q!TO1G/]OIgQ!TO1G/]OJlQ!TO1G/]OInQ!TO1G/]OOQ!Q-E8V-E8VOKSQsO1G/^OKaQQO1G/^OKfQrO1G/^OKqQrO1G/^OLPQsO1G/^OLWQsO1G/^OL_Q!SO,59sOLiQrO1G/^OOQ!R1G/^1G/^OLtQrO1G0QOOQ!R1G0R1G0ROMSQ!SO1G0ROOQp'#Ea'#EaOLtQrO1G0QOOQ!R1G0Q1G0QOOQ!Q'#Eb'#EbOMSQ!SO1G0ROMaQ!SO1G0[ONRQ!SO1G0ZONsQ!SO'#DkO! XQ!SO'#DkO! iQbO1G0SOOQ!Q-E8U-E8UOYQ!SO,5:rOOQ!Q,5:r,5:rOYQ!SO,5:rOOQ!Q-E8W-E8WO! tQQO,59xOOQO,59y,59yOOQO-E8X-E8XOYQ!SO1G/cOYQ!SO1G/wOYQ!SO1G/ZO! |QbO1G0TO!!XQ!SO1G0XO!!|Q!SO1G0XOOQ!Q-E8a-E8aO!#TQrO7+$xOOQ!R7+$x7+$xO!#`QrO1G/_O!#kQrO7+%lOOQ!R7+%l7+%lO!#yQ!SO7+%mOOQ!R7+%m7+%mOOQp-E8_-E8_OOQ!Q-E8`-E8`OOQ!Q'#E]'#E]O!$WQrO'#E]O!$fQ!SO'#ExOOQ`,5:V,5:VO!$vQbO'#DiO!${QQO'#DlOOQ!Q7+%n7+%nO!%QQbO7+%nO!%VQbO7+%nOOQ!Q1G0^1G0^OYQ!SO1G0^O!%_Q!SO7+$}O!%pQ!SO7+$}O!%}QbO7+%cO!&VQbO7+$uOOQ!Q7+%o7+%oO!&[QbO7+%oO!&aQbO7+%oO!&iQ!SO7+%sOOQ!R<tAN>tOOQ!QAN>TAN>TO!)cQbOAN>TO!)hQbOAN>TOOQ`-E8^-E8^OOQ!QAN>iAN>iO!)pQbOAN>iO3{QbO,5:`OYQ!SO,5:bOOQ!QAN>uAN>uPL_Q!SO'#EXOOQ`7+%Z7+%ZOOQ!QG23oG23oO!)uQbOG23oP!(uQbO'#DtOOQ!QG24TG24TO!)zQQO1G/zOOQ`1G/|1G/|OOQ!QLD)ZLD)ZOYQ!SO7+%fOOQ`<T!`#O$R#P;'S$R;'S;=`$j<%lO$RU>YV!TSOt$Ruw$Rx#O$R#P#Q>o#Q;'S$R;'S;=`$j<%lO$RU>vU#mQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$R~?_O#e~U?fU#oQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU@PU!TS!bQOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RU@h^!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#o@c#o;'S$R;'S;=`$j<%lO$RUAkU!RQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$RUBS_!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#UCR#U#o@c#o;'S$R;'S;=`$j<%lO$RUCW`!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#`@c#`#aDY#a#o@c#o;'S$R;'S;=`$j<%lO$RUD_`!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#g@c#g#hEa#h#o@c#o;'S$R;'S;=`$j<%lO$RUEf`!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#X@c#X#YFh#Y#o@c#o;'S$R;'S;=`$j<%lO$RUFo^!XQ!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#o@c#o;'S$R;'S;=`$j<%lO$R^Gr^#fW!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#o@c#o;'S$R;'S;=`$j<%lO$R^Hu^#hW!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#o@c#o;'S$R;'S;=`$j<%lO$R^Ix`#gW!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#f@c#f#gJz#g#o@c#o;'S$R;'S;=`$j<%lO$RUKP`!TSOt$Ruw$Rx}$R}!O@c!O!Q$R!Q![@c![!_$R!_!`Ad!`#O$R#P#T$R#T#i@c#i#jEa#j#o@c#o;'S$R;'S;=`$j<%lO$RULYUuQ!TSOt$Ruw$Rx#O$R#P;'S$R;'S;=`$j<%lO$R~LqO#p~", - tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, new LocalTokenGroup("[~RP!O!PU~ZO#`~~", 11)], - topRules: {"Program":[0,35]}, + tokenData: "Ln~R}OX$OXY$mYp$Opq$mqr$Ors%Wst'^tu(uuw$Owx(zxy)Pyz)jz{$O{|*T|}$O}!O*T!O!P$O!P!Q3p!Q!R*u!R![-j![!]<]!]!^Q!`#O$O#P;'S$O;'S;=`$g<%lO$OU>VV!USOt$Ouw$Ox#O$O#P#Q>l#Q;'S$O;'S;=`$g<%lO$OU>sU#pQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$O~?[O#h~U?cU#rQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU?|U!US!cQOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OU@e^!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#o@`#o;'S$O;'S;=`$g<%lO$OUAhU!SQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$OUBP_!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#UCO#U#o@`#o;'S$O;'S;=`$g<%lO$OUCT`!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#`@`#`#aDV#a#o@`#o;'S$O;'S;=`$g<%lO$OUD[`!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#g@`#g#hE^#h#o@`#o;'S$O;'S;=`$g<%lO$OUEc`!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#X@`#X#YFe#Y#o@`#o;'S$O;'S;=`$g<%lO$OUFl^!YQ!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#o@`#o;'S$O;'S;=`$g<%lO$O^Go^#iW!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#o@`#o;'S$O;'S;=`$g<%lO$O^Hr^#kW!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#o@`#o;'S$O;'S;=`$g<%lO$O^Iu`#jW!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#f@`#f#gJw#g#o@`#o;'S$O;'S;=`$g<%lO$OUJ|`!USOt$Ouw$Ox}$O}!O@`!O!Q$O!Q![@`![!_$O!_!`Aa!`#O$O#P#T$O#T#i@`#i#jE^#j#o@`#o;'S$O;'S;=`$g<%lO$OULVUvQ!USOt$Ouw$Ox#O$O#P;'S$O;'S;=`$g<%lO$O~LnO#s~", + tokenizers: [operatorTokenizer, 1, 2, 3, tokenizer, pipeStartsLineTokenizer, new LocalTokenGroup("[~RP!O!PU~ZO#c~~", 11)], + topRules: {"Program":[0,36]}, specialized: [{term: 28, get: (value: any, stack: any) => (specializeKeyword(value, stack) << 1), external: specializeKeyword},{term: 28, get: (value: keyof typeof spec_Identifier) => spec_Identifier[value] || -1}], - tokenPrec: 2370 + tokenPrec: 2531 }) diff --git a/src/parser/tests/pipes.test.ts b/src/parser/tests/pipes.test.ts index 0d5da6a..e67d0a9 100644 --- a/src/parser/tests/pipes.test.ts +++ b/src/parser/tests/pipes.test.ts @@ -1,4 +1,5 @@ import { expect, describe, test } from 'bun:test' +import { parser } from '../shrimp' import '../shrimp.grammar' // Importing this so changes cause it to retest! @@ -176,3 +177,126 @@ describe('pipe expressions', () => { `) }) }) + +describe('pipe continuation', () => { + test('pipe on next line', () => { + expect(`hello +| echo`).toMatchTree(` + PipeExpr + FunctionCallOrIdentifier + Identifier hello + PipeStartsLine \\n + operator | + FunctionCallOrIdentifier + Identifier echo + `) + + expect(`echo hello +| grep h`).toMatchTree(` + PipeExpr + FunctionCall + Identifier echo + PositionalArg + Identifier hello + PipeStartsLine \\n + operator | + FunctionCall + Identifier grep + PositionalArg + Identifier h + `) + }) + + test('pipe on next non-empty line', () => { + expect(`hello + + +| echo`).toMatchTree(` + PipeExpr + FunctionCallOrIdentifier + Identifier hello + PipeStartsLine \\n\\n\\n + operator | + FunctionCallOrIdentifier + Identifier echo + `) + }) + + test('multi-line pipe chain', () => { + expect(`echo hello +| grep h +| sort`).toMatchTree(` + PipeExpr + FunctionCall + Identifier echo + PositionalArg + Identifier hello + PipeStartsLine \\n + operator | + FunctionCall + Identifier grep + PositionalArg + Identifier h + PipeStartsLine \\n + operator | + FunctionCallOrIdentifier + Identifier sort + `) + }) + + test('pipe with indentation', () => { + expect(`echo hello + | grep h + | sort`).toMatchTree(` + PipeExpr + FunctionCall + Identifier echo + PositionalArg + Identifier hello + PipeStartsLine \\n + operator | + FunctionCall + Identifier grep + PositionalArg + Identifier h + PipeStartsLine \\n + operator | + FunctionCallOrIdentifier + Identifier sort + `) + }) + + test('pipe after operand on next line (trailing pipe style)', () => { + expect(`echo hello | +grep h`).toMatchTree(` + PipeExpr + FunctionCall + Identifier echo + PositionalArg + Identifier hello + operator | + FunctionCall + Identifier grep + PositionalArg + Identifier h + `) + }) + + test('same-line pipes still work', () => { + expect('echo hello | grep h | sort').toMatchTree(` + PipeExpr + FunctionCall + Identifier echo + PositionalArg + Identifier hello + operator | + FunctionCall + Identifier grep + PositionalArg + Identifier h + operator | + FunctionCallOrIdentifier + Identifier sort + `) + }) +}) diff --git a/src/parser/tests/strings.test.ts b/src/parser/tests/strings.test.ts index 7b4a672..9544a8e 100644 --- a/src/parser/tests/strings.test.ts +++ b/src/parser/tests/strings.test.ts @@ -142,10 +142,7 @@ describe('curly strings', () => { two three }`).toMatchTree(` String - CurlyString { - one - two - three }`) + CurlyString {\\n one\\n two\\n three }`) }) test('can contain other curlies', () => { @@ -153,9 +150,7 @@ describe('curly strings', () => { two { three } }`).toMatchTree(` String - CurlyString { { one } - two - { three } }`) + CurlyString { { one }\\n two\\n { three } }`) }) }) diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts index 8ad55c2..5d804fb 100644 --- a/src/parser/tokenizer.ts +++ b/src/parser/tokenizer.ts @@ -1,5 +1,5 @@ import { ExternalTokenizer, InputStream, Stack } from '@lezer/lr' -import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString } from './shrimp.terms' +import { Identifier, AssignableIdentifier, Word, IdentifierBeforeDot, Do, CurlyString, newline, PipeStartsLine } from './shrimp.terms' // doobie doobie do (we need the `do` keyword to know when we're defining params) export function specializeKeyword(ident: string) { @@ -346,3 +346,34 @@ const isEmojiOrUnicode = (ch: number): boolean => { } const getCharSize = (ch: number) => (ch > 0xffff ? 2 : 1) // emoji takes 2 UTF-16 code units + +export const pipeStartsLineTokenizer = new ExternalTokenizer((input: InputStream, stack: Stack) => { + const ch = input.peek(0) + + if (ch !== 10 /* \n */) return + + // ignore whitespace + let offset = 1 + let lastNewlineOffset = 0 + + while (true) { + const ch = input.peek(offset) + if (ch === 10 /* \n */) { + lastNewlineOffset = offset + offset++ + } else if (ch === 32 /* space */ || ch === 9 /* tab */) { + offset++ + } else { + break + } + } + + // look for pipe after skipping empty lines + if (input.peek(offset) === 124 /* | */) { + input.advance(lastNewlineOffset + 1) + input.acceptToken(PipeStartsLine) + } else { + input.advance(1) + input.acceptToken(newline) + } +}) diff --git a/src/utils/tree.ts b/src/utils/tree.ts index 45a9318..14d63d5 100644 --- a/src/utils/tree.ts +++ b/src/utils/tree.ts @@ -18,7 +18,8 @@ export const treeToString = (tree: Tree, input: string): string => { } while (cursor.nextSibling()) cursor.parent() } else { - const cleanText = nodeName === 'String' ? text.slice(1, -1) : text + let cleanText = nodeName === 'String' ? text.slice(1, -1) : text + cleanText = cleanText.replaceAll('\n', '\\n') lines.push(`${indent}${nodeName} ${cleanText}`) } }