import { expect, describe, test } from 'bun:test' describe('constant types', () => { test('null', () => { expect(`null`).toBeToken('Null') }) test('boolean', () => { expect(`true`).toMatchToken('Boolean', 'true') expect(`false`).toMatchToken('Boolean', 'false') }) }) describe('numbers', () => { test('non-numbers', () => { expect(`1st`).toMatchToken('Word', '1st') expect(`1_`).toMatchToken('Word', '1_') expect(`100.`).toMatchTokens( { type: 'Number', value: '100' }, { type: 'Operator', value: '.' }, ) }) test('simple numbers', () => { expect(`1`).toMatchToken('Number', '1') expect(`200`).toMatchToken('Number', '200') expect(`5.20`).toMatchToken('Number', '5.20') expect(`0.20`).toMatchToken('Number', '0.20') expect(`-20`).toMatchToken('Number', '-20') expect(`+20`).toMatchToken('Number', '+20') expect(`-2134.34`).toMatchToken('Number', '-2134.34') expect(`+20.5325`).toMatchToken('Number', '+20.5325') expect(`1_000`).toMatchToken('Number', '1_000') expect(`53_232_220`).toMatchToken('Number', '53_232_220') }) test('binary numbers', () => { expect('0b110').toMatchToken('Number', '0b110') }) test('hex numbers', () => { expect('0xdeadbeef').toMatchToken('Number', '0xdeadbeef') expect('0x02d3f4').toMatchToken('Number', '0x02d3f4') }) test('hex numbers uppercase', () => { expect('0xFF').toMatchToken('Number', '0xFF') }) test('octal numbers', () => { expect('0o644').toMatchToken('Number', '0o644') expect('0o055').toMatchToken('Number', '0o055') }) test('negative binary', () => { expect('-0b110').toMatchToken('Number', '-0b110') }) test('negative hex', () => { expect('-0xFF').toMatchToken('Number', '-0xFF') }) test('negative octal', () => { expect('-0o755').toMatchToken('Number', '-0o755') }) test('positive prefix binary', () => { expect('+0b110').toMatchToken('Number', '+0b110') }) test('positive prefix hex', () => { expect('+0xFF').toMatchToken('Number', '+0xFF') }) test('positive prefix octal', () => { expect('+0o644').toMatchToken('Number', '+0o644') }) test('underscores in number', () => { expect(`1_000`).toMatchToken('Number', '1_000') expect(`1_0`).toMatchToken('Number', '1_0') expect('0b11_0').toMatchToken('Number', '0b11_0') expect('0xdead_beef').toMatchToken('Number', '0xdead_beef') expect('0o64_4').toMatchToken('Number', '0o64_4') }) }) describe('identifiers', () => { test('regular', () => { expect('name').toBeToken('Identifier') expect('bobby-mcgee').toBeToken('Identifier') expect('starts-with?').toBeToken('Identifier') expect('📢').toMatchToken('Identifier', '📢') expect(' 📢 ').toMatchToken('Identifier', '📢') expect(' oink-🐷-oink').toMatchToken('Identifier', 'oink-🐷-oink') expect('$').toMatchToken('Identifier', '$') expect('$cool').toMatchToken('Identifier', '$cool') }) test('one character identifiers', () => { expect('a').toMatchToken('Identifier', 'a') expect('z').toMatchToken('Identifier', 'z') expect('$').toMatchToken('Identifier', '$') expect('📢').toMatchToken('Identifier', '📢') expect('?').toBeToken('Word') // ? alone is not valid identifier start }) test('two character identifiers', () => { expect('ab').toMatchToken('Identifier', 'ab') expect('a1').toMatchToken('Identifier', 'a1') expect('a-').toMatchToken('Identifier', 'a-') expect('a?').toMatchToken('Identifier', 'a?') // ? valid at end expect('ab?').toMatchToken('Identifier', 'ab?') }) test('three+ character identifiers', () => { expect('abc').toMatchToken('Identifier', 'abc') expect('a-b').toMatchToken('Identifier', 'a-b') expect('a1b').toMatchToken('Identifier', 'a1b') expect('abc?').toMatchToken('Identifier', 'abc?') // ? valid at end expect('a-b-c?').toMatchToken('Identifier', 'a-b-c?') }) test('edge cases', () => { expect('-bobby-mcgee').toBeToken('Word') expect('starts-with??').toMatchToken('Identifier', 'starts-with??') expect('starts?with?').toMatchToken('Identifier', 'starts?with?') expect('a??b').toMatchToken('Identifier', 'a??b') expect('oink-oink!').toBeToken('Word') expect('dog#pound').toMatchToken('Word', 'dog#pound') expect('http://website.com').toMatchToken('Word', 'http://website.com') expect('school$cool').toMatchToken('Identifier', 'school$cool') expect('EXIT:').toMatchTokens( { type: 'Word', value: 'EXIT' }, { type: 'Colon' }, ) expect(`if y == 1: 'cool' end`).toMatchTokens( { type: 'Keyword', value: 'if' }, { type: 'Identifier', value: 'y' }, { type: 'Operator', value: '==' }, { type: 'Number', value: '1' }, { type: 'Colon' }, { type: 'String', value: `'cool'` }, { type: 'Keyword', value: 'end' }, ) }) }) describe('paths', () => { test('starting with ./', () => { expect('./tmp').toMatchToken('Word', './tmp') }) test('starting with /', () => { expect('/home/chris/dev').toMatchToken('Word', '/home/chris/dev') }) test('identifiers with dots tokenize separately', () => { expect('readme.txt').toMatchTokens( { type: 'Identifier', value: 'readme' }, { type: 'Operator', value: '.' }, { type: 'Identifier', value: 'txt' }, ) }) test('words (non-identifiers) consume dots', () => { expect('README.md').toMatchToken('Word', 'README.md') }) test('all sorts of weird stuff', () => { expect('dog#pound').toMatchToken('Word', 'dog#pound') expect('my/kinda/place').toMatchToken('my/kinda/place') expect('file://%/$##/@40!/index.php').toMatchToken('Word', 'file://%/$##/@40!/index.php') }) }) describe('strings', () => { test('single quoted', () => { expect(`'hello world'`).toMatchToken('String', `'hello world'`) expect(`'it\\'s a beautiful world'`).toMatchToken("'it\\'s a beautiful world'") }) test('double quoted', () => { expect(`"hello world"`).toMatchToken('String', `"hello world"`) expect(`"it's a beautiful world"`).toMatchToken('String', `"it's a beautiful world"`) }) test('empty strings', () => { expect(`''`).toMatchToken('String', `''`) expect(`""`).toMatchToken('String', `""`) }) test('escape sequences', () => { expect(`'hello\\nworld'`).toMatchToken('String', `'hello\\nworld'`) expect(`'tab\\there'`).toMatchToken('String', `'tab\\there'`) expect(`'quote\\''`).toMatchToken('String', `'quote\\''`) expect(`'backslash\\\\'`).toMatchToken('String', `'backslash\\\\'`) expect(`'dollar\\$sign'`).toMatchToken('String', `'dollar\\$sign'`) }) test('unclosed strings - error case', () => { // These should either fail or produce unexpected results expect(`'hello`).toMatchToken('String', `'hello`) expect(`"world`).toMatchToken('String', `"world`) }) }) describe('curly strings', () => { test('curly quoted', () => { expect('{ one two three }').toMatchToken('String', `{ one two three }`) }) test('work on multiple lines', () => { expect(`{ one two three }`).toMatchToken('String', `{ one two three }`) }) test('can contain other curlies', () => { expect(`{ { one } two { three } }`).toMatchToken('String', `{ { one } two { three } }`) }) test('empty curly string', () => { expect('{}').toMatchToken('String', '{}') }) test('unclosed curly string - error case', () => { // Should either fail or produce unexpected results expect('{ hello').toMatchToken('String', '{ hello') expect('{ nested { unclosed }').toMatchToken('String', '{ nested { unclosed }') }) }) describe('operators', () => { test('math operators', () => { // assignment expect('=').toMatchToken('Operator', '=') // logic expect('or').toMatchToken('Operator', 'or') expect('and').toMatchToken('Operator', 'and') // bitwise expect('band').toMatchToken('Operator', 'band') expect('bor').toMatchToken('Operator', 'bor') expect('bxor').toMatchToken('Operator', 'bxor') expect('>>>').toMatchToken('Operator', '>>>') expect('>>').toMatchToken('Operator', '>>') expect('<<').toMatchToken('Operator', '<<') // compound assignment expect('??=').toMatchToken('Operator', '??=') expect('+=').toMatchToken('Operator', '+=') expect('-=').toMatchToken('Operator', '-=') expect('*=').toMatchToken('Operator', '*=') expect('/=').toMatchToken('Operator', '/=') expect('%=').toMatchToken('Operator', '%=') // nullish expect('??').toMatchToken('Operator', '??') // math expect('**').toMatchToken('Operator', '**') expect('*').toMatchToken('Operator', '*') expect('/').toMatchToken('Operator', '/') expect('+').toMatchToken('Operator', '+') expect('-').toMatchToken('Operator', '-') expect('%').toMatchToken('Operator', '%') // comparison expect('>=').toMatchToken('Operator', '>=') expect('<=').toMatchToken('Operator', '<=') expect('!=').toMatchToken('Operator', '!=') expect('==').toMatchToken('Operator', '==') expect('>').toMatchToken('Operator', '>') expect('<').toMatchToken('Operator', '<') // property access expect('.').toMatchToken('Operator', '.') }) }) describe('keywords', () => { test('keywords', () => { expect(`import`).toMatchToken('Keyword', 'import') expect(`end`).toMatchToken('Keyword', 'end') expect(`do`).toMatchToken('Keyword', 'do') expect(`while`).toMatchToken('Keyword', 'while') expect(`if`).toMatchToken('Keyword', 'if') expect(`else`).toMatchToken('Keyword', 'else') expect(`try`).toMatchToken('Keyword', 'try') expect(`catch`).toMatchToken('Keyword', 'catch') expect(`finally`).toMatchToken('Keyword', 'finally') expect(`throw`).toMatchToken('Keyword', 'throw') }) }) describe('regex', () => { test('use double slash', () => { expect(`//[0-9]+//`).toMatchToken('Regex', '//[0-9]+//') }) }) describe('punctuation', () => { test('underscore', () => { expect(`_`).toBeToken('Underscore') expect(`__`).toMatchToken('Word', '__') }) test('semicolon', () => { expect(`;`).toBeToken('Semicolon') }) test('newline', () => { expect('\n').toBeToken('Newline') }) test('colon', () => { expect(':').toBeToken('Colon') }) }) describe('comments', () => { test('comments', () => { expect(`# hey friends`).toMatchToken('Comment', '# hey friends') expect(`#hey-friends`).toMatchToken('Comment', '#hey-friends') }) }) describe('brackets', () => { test('parens', () => { expect(`(`).toBeToken('OpenParen') expect(`)`).toBeToken('CloseParen') }) test('staples', () => { expect(`[`).toBeToken('OpenBracket') expect(`]`).toBeToken('CloseBracket') }) }) describe('multiple tokens', () => { test('constants work fine', () => { expect(`null true false`).toMatchTokens( { type: 'Null' }, { type: 'Boolean', value: 'true' }, { type: 'Boolean', value: 'false' }, ) }) test('numbers', () => { expect(`100 -400.42 null`).toMatchTokens( { type: 'Number', value: '100' }, { type: 'Number', value: '-400.42' }, { type: 'Null' }, ) }) test('whitespace', () => { expect(` 'hello world' 'goodbye world' `).toMatchTokens( { type: 'Newline' }, { type: 'String', value: "'hello world'" }, { type: 'Newline' }, { type: 'Newline' }, { type: 'String', value: "'goodbye world'" }, { type: 'Newline' }, ) }) test('newline in parens is ignored', () => { expect(`( 'hello world' 'goodbye world' )`).toMatchTokens( { type: 'OpenParen' }, { type: 'String', value: "'hello world'" }, { type: 'String', value: "'goodbye world'" }, { type: 'CloseParen' }, ) }) test('newline in brackets is ignored', () => { expect(`[ a b c d e f ]`).toMatchTokens( { type: 'OpenBracket' }, { type: 'Identifier', value: "a" }, { type: 'Identifier', value: "b" }, { type: 'Identifier', value: "c" }, { type: 'Identifier', value: "d" }, { type: 'Identifier', value: "e" }, { type: 'Identifier', value: "f" }, { type: 'CloseBracket' }, ) }) test('function call', () => { expect('echo hello world').toMatchTokens( { type: 'Identifier', value: 'echo' }, { type: 'Identifier', value: 'hello' }, { type: 'Identifier', value: 'world' }, ) }) test('assignment', () => { expect('x = 5').toMatchTokens( { type: 'Identifier', value: 'x' }, { type: 'Operator', value: '=' }, { type: 'Number', value: '5' }, ) }) test('math expression', () => { expect('1 + 2 * 3').toMatchTokens( { type: 'Number', value: '1' }, { type: 'Operator', value: '+' }, { type: 'Number', value: '2' }, { type: 'Operator', value: '*' }, { type: 'Number', value: '3' }, ) }) test('inline comment', () => { expect('x = 5 # set x').toMatchTokens( { type: 'Identifier', value: 'x' }, { type: 'Operator', value: '=' }, { type: 'Number', value: '5' }, { type: 'Comment', value: '# set x' }, ) }) test('line comment', () => { expect('x = 5 \n# hello\n set x').toMatchTokens( { type: 'Identifier', value: 'x' }, { type: 'Operator', value: '=' }, { type: 'Number', value: '5' }, { type: 'Newline' }, { type: 'Comment', value: '# hello' }, { type: 'Newline' }, { type: 'Identifier', value: 'set' }, { type: 'Identifier', value: 'x' }, ) }) test('colons separate tokens', () => { expect('x do: y').toMatchTokens( { type: 'Identifier', value: 'x' }, { type: 'Keyword', value: 'do' }, { type: 'Colon' }, { type: 'Identifier', value: 'y' }, ) expect('x: y').toMatchTokens( { type: 'Identifier', value: 'x' }, { type: 'Colon' }, { type: 'Identifier', value: 'y' }, ) expect('5: y').toMatchTokens( { type: 'Number', value: '5' }, { type: 'Colon' }, { type: 'Identifier', value: 'y' }, ) expect(`if (var? 'abc'): y`).toMatchTokens( { type: 'Keyword', value: 'if' }, { type: 'OpenParen' }, { type: 'Identifier', value: 'var?' }, { type: 'String', value: `'abc'` }, { type: 'CloseParen' }, { type: 'Colon' }, { type: 'Identifier', value: 'y' }, ) expect(` do x: y end`).toMatchTokens( { type: 'Newline' }, { type: 'Keyword', value: 'do' }, { type: 'Identifier', value: 'x' }, { type: 'Colon' }, { type: 'Newline' }, { type: 'Identifier', value: 'y' }, { type: 'Newline' }, { type: 'Keyword', value: 'end' }, ) }) test('semicolons separate statements', () => { expect('x; y').toMatchTokens( { type: 'Identifier', value: 'x' }, { type: 'Semicolon' }, { type: 'Identifier', value: 'y' }, ) }) test('semicolons in parens', () => { expect('(x; y)').toMatchTokens( { type: 'OpenParen' }, { type: 'Identifier', value: 'x' }, { type: 'Semicolon' }, { type: 'Identifier', value: 'y' }, { type: 'CloseParen' }, ) }) test('dot operator beginning word with slash', () => { expect(`(basename ./cool)`).toMatchTokens( { 'type': 'OpenParen' }, { 'type': 'Identifier', 'value': 'basename' }, { 'type': 'Word', 'value': './cool' }, { 'type': 'CloseParen' } ) }) test('dot word after identifier with space', () => { expect(`expand-path .git`).toMatchTokens( { 'type': 'Identifier', 'value': 'expand-path' }, { 'type': 'Word', 'value': '.git' }, ) }) test('dot operator after identifier without space', () => { expect(`config.path`).toMatchTokens( { 'type': 'Identifier', 'value': 'config' }, { 'type': 'Operator', 'value': '.' }, { 'type': 'Identifier', 'value': 'path' }, ) }) }) describe('nesting edge cases', () => { test('deeply nested parens', () => { expect('((nested))').toMatchTokens( { type: 'OpenParen' }, { type: 'OpenParen' }, { type: 'Identifier', value: 'nested' }, { type: 'CloseParen' }, { type: 'CloseParen' }, ) }) test('mixed nesting', () => { expect('([combo])').toMatchTokens( { type: 'OpenParen' }, { type: 'OpenBracket' }, { type: 'Identifier', value: 'combo' }, { type: 'CloseBracket' }, { type: 'CloseParen' }, ) }) }) describe('invalid numbers that should be words', () => { test('invalid binary', () => { expect('0b2').toMatchToken('Word', '0b2') expect('0b123').toMatchToken('Word', '0b123') }) test('invalid octal', () => { expect('0o8').toMatchToken('Word', '0o8') expect('0o999').toMatchToken('Word', '0o999') }) test('invalid hex', () => { expect('0xGGG').toMatchToken('Word', '0xGGG') expect('0xZZZ').toMatchToken('Word', '0xZZZ') }) test('multiple decimal points', () => { expect('1.2.3').toMatchToken('Word', '1.2.3') }) }) describe('unicode and emoji', () => { test('greek letters', () => { expect('αβγ').toMatchToken('Identifier', 'αβγ') expect('delta-δ').toMatchToken('Identifier', 'delta-δ') }) test('math symbols', () => { expect('∑').toMatchToken('Identifier', '∑') expect('∏').toMatchToken('Identifier', '∏') }) test('CJK characters', () => { expect('你好').toMatchToken('Identifier', '你好') expect('こんにちは').toMatchToken('Identifier', 'こんにちは') }) }) describe('empty and whitespace input', () => { test('empty string', () => { expect('').toMatchTokens() }) test('only whitespace', () => { expect(' ').toMatchTokens() }) test('only tabs', () => { expect('\t\t\t').toMatchTokens() }) test('only newlines', () => { expect('\n\n\n').toMatchTokens( { type: 'Newline' }, { type: 'Newline' }, { type: 'Newline' }, ) }) }) describe('named args', () => { test("don't need spaces", () => { expect(`named=arg`).toMatchTokens( { type: 'NamedArgPrefix', value: 'named=' }, { type: 'Identifier', value: 'arg' }, ) }) test("can have spaces", () => { expect(`named= arg`).toMatchTokens( { type: 'NamedArgPrefix', value: 'named=' }, { type: 'Identifier', value: 'arg' }, ) }) test("can include numbers", () => { expect(`named123= arg`).toMatchTokens( { type: 'NamedArgPrefix', value: 'named123=' }, { type: 'Identifier', value: 'arg' }, ) }) }) describe('dot operator', () => { test('standalone dot', () => { expect('.').toMatchToken('Operator', '.') }) test('dot between identifiers tokenizes as separate tokens', () => { expect('config.path').toMatchTokens( { type: 'Identifier', value: 'config' }, { type: 'Operator', value: '.' }, { type: 'Identifier', value: 'path' }, ) }) test('dot with number', () => { expect('array.0').toMatchTokens( { type: 'Identifier', value: 'array' }, { type: 'Operator', value: '.' }, { type: 'Number', value: '0' }, ) }) test('chained dots', () => { expect('a.b.c').toMatchTokens( { type: 'Identifier', value: 'a' }, { type: 'Operator', value: '.' }, { type: 'Identifier', value: 'b' }, { type: 'Operator', value: '.' }, { type: 'Identifier', value: 'c' }, ) }) test('identifier-like paths tokenize separately', () => { expect('readme.txt').toMatchTokens( { type: 'Identifier', value: 'readme' }, { type: 'Operator', value: '.' }, { type: 'Identifier', value: 'txt' }, ) }) test('word-like paths remain as single token', () => { expect('./file.txt').toMatchToken('Word', './file.txt') expect('README.TXT').toMatchToken('Word', 'README.TXT') }) test('dot with paren expression', () => { expect('obj.(1 + 2)').toMatchTokens( { type: 'Identifier', value: 'obj' }, { type: 'Operator', value: '.' }, { type: 'OpenParen' }, { type: 'Number', value: '1' }, { type: 'Operator', value: '+' }, { type: 'Number', value: '2' }, { type: 'CloseParen' }, ) }) test('chained dot with paren expression', () => { expect('obj.items.(i)').toMatchTokens( { type: 'Identifier', value: 'obj' }, { type: 'Operator', value: '.' }, { type: 'Identifier', value: 'items' }, { type: 'Operator', value: '.' }, { type: 'OpenParen' }, { type: 'Identifier', value: 'i' }, { type: 'CloseParen' }, ) }) })