diff --git a/tests/basic.test.ts b/tests/basic.test.ts index 288d3c1..9e50308 100644 --- a/tests/basic.test.ts +++ b/tests/basic.test.ts @@ -1062,3 +1062,452 @@ test("mixed emoji and regular names", async () => { expect(result).toEqual({ type: 'number', value: 60 }) }) +// ======================================== +// RegExp Tests +// ======================================== + +test("RegExp - basic pattern parsing", async () => { + const str = ` + PUSH /hello/ + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('hello') + expect(result.value.flags).toBe('') + } +}) + +test("RegExp - pattern with flags", async () => { + const str = ` + PUSH /test/gi + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('test') + expect(result.value.global).toBe(true) + expect(result.value.ignoreCase).toBe(true) + } +}) + +test("RegExp - multiple flag combinations", async () => { + // Test i flag + const str1 = ` + PUSH /pattern/i + ` + const result1 = await run(toBytecode(str1)) + expect(result1.type).toBe('regex') + if (result1.type === 'regex') { + expect(result1.value.ignoreCase).toBe(true) + } + + // Test g flag + const str2 = ` + PUSH /pattern/g + ` + const result2 = await run(toBytecode(str2)) + expect(result2.type).toBe('regex') + if (result2.type === 'regex') { + expect(result2.value.global).toBe(true) + } + + // Test m flag + const str3 = ` + PUSH /pattern/m + ` + const result3 = await run(toBytecode(str3)) + expect(result3.type).toBe('regex') + if (result3.type === 'regex') { + expect(result3.value.multiline).toBe(true) + } + + // Test combined flags + const str4 = ` + PUSH /pattern/gim + ` + const result4 = await run(toBytecode(str4)) + expect(result4.type).toBe('regex') + if (result4.type === 'regex') { + expect(result4.value.global).toBe(true) + expect(result4.value.ignoreCase).toBe(true) + expect(result4.value.multiline).toBe(true) + } +}) + +test("RegExp - complex patterns", async () => { + // Character class + const str1 = ` + PUSH /[a-z0-9]+/ + ` + const result1 = await run(toBytecode(str1)) + expect(result1.type).toBe('regex') + if (result1.type === 'regex') { + expect(result1.value.source).toBe('[a-z0-9]+') + } + + // Quantifiers + const str2 = ` + PUSH /a{2,4}/ + ` + const result2 = await run(toBytecode(str2)) + expect(result2.type).toBe('regex') + if (result2.type === 'regex') { + expect(result2.value.source).toBe('a{2,4}') + } + + // Groups and alternation + const str3 = ` + PUSH /(foo|bar)/ + ` + const result3 = await run(toBytecode(str3)) + expect(result3.type).toBe('regex') + if (result3.type === 'regex') { + expect(result3.value.source).toBe('(foo|bar)') + } + + // Anchors and special chars + const str4 = ` + PUSH /^[a-z]+$/ + ` + const result4 = await run(toBytecode(str4)) + expect(result4.type).toBe('regex') + if (result4.type === 'regex') { + expect(result4.value.source).toBe('^[a-z]+$') + } +}) + +test("RegExp - escaping special characters", async () => { + const str = ` + PUSH /\\d+\\.\\d+/ + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('\\d+\\.\\d+') + } +}) + +test("RegExp - store and load", async () => { + const str = ` + PUSH /test/i + STORE pattern + LOAD pattern + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('test') + expect(result.value.ignoreCase).toBe(true) + } +}) + +test("RegExp - TRY_LOAD with regex", async () => { + const str = ` + PUSH /hello/g + STORE regex + TRY_LOAD regex + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('hello') + expect(result.value.global).toBe(true) + } +}) + +test("RegExp - NEQ comparison", async () => { + const str = ` + PUSH /foo/ + PUSH /bar/ + NEQ + ` + expect(await run(toBytecode(str))).toEqual({ type: 'boolean', value: true }) + + const str2 = ` + PUSH /test/i + PUSH /test/i + NEQ + ` + expect(await run(toBytecode(str2))).toEqual({ type: 'boolean', value: false }) +}) + +test("RegExp - is truthy", async () => { + // Regex values should be truthy (not null or false) + const str = ` + PUSH /test/ + JUMP_IF_FALSE .end + PUSH 42 + .end: + ` + expect(await run(toBytecode(str))).toEqual({ type: 'number', value: 42 }) +}) + +test("RegExp - NOT returns false (regex is truthy)", async () => { + const str = ` + PUSH /pattern/ + NOT + ` + expect(await run(toBytecode(str))).toEqual({ type: 'boolean', value: false }) +}) + +test("RegExp - in arrays", async () => { + const str = ` + PUSH /first/ + PUSH /second/i + PUSH /third/g + MAKE_ARRAY #3 + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('array') + if (result.type === 'array') { + expect(result.value).toHaveLength(3) + + expect(result.value[0]!.type).toBe('regex') + if (result.value[0]!.type === 'regex') { + expect(result.value[0]!.value.source).toBe('first') + } + + expect(result.value[1]!.type).toBe('regex') + if (result.value[1]!.type === 'regex') { + expect(result.value[1]!.value.source).toBe('second') + expect(result.value[1]!.value.ignoreCase).toBe(true) + } + + expect(result.value[2]!.type).toBe('regex') + if (result.value[2]!.type === 'regex') { + expect(result.value[2]!.value.source).toBe('third') + expect(result.value[2]!.value.global).toBe(true) + } + } +}) + +test("RegExp - retrieve from array", async () => { + const str = ` + PUSH /pattern/i + PUSH /test/g + MAKE_ARRAY #2 + PUSH 1 + ARRAY_GET + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('test') + expect(result.value.global).toBe(true) + } +}) + +test("RegExp - in dicts", async () => { + const str = ` + PUSH 'email' + PUSH /^[a-z@.]+$/i + PUSH 'phone' + PUSH /\\d{3}-\\d{4}/ + MAKE_DICT #2 + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('dict') + if (result.type === 'dict') { + expect(result.value.size).toBe(2) + + const email = result.value.get('email') + expect(email?.type).toBe('regex') + if (email?.type === 'regex') { + expect(email.value.source).toBe('^[a-z@.]+$') + expect(email.value.ignoreCase).toBe(true) + } + + const phone = result.value.get('phone') + expect(phone?.type).toBe('regex') + if (phone?.type === 'regex') { + expect(phone.value.source).toBe('\\d{3}-\\d{4}') + } + } +}) + +test("RegExp - retrieve from dict", async () => { + const str = ` + PUSH 'pattern' + PUSH /test/gim + MAKE_DICT #1 + PUSH 'pattern' + DICT_GET + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('test') + expect(result.value.global).toBe(true) + expect(result.value.ignoreCase).toBe(true) + expect(result.value.multiline).toBe(true) + } +}) + +test("RegExp - with STR_CONCAT converts to string", async () => { + const str = ` + PUSH "Pattern: " + PUSH /test/gi + STR_CONCAT #2 + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('string') + if (result.type === 'string') { + expect(result.value).toBe('Pattern: /test/gi') + } +}) + +test("RegExp - multiple regex in STR_CONCAT", async () => { + const str = ` + PUSH /foo/ + PUSH " and " + PUSH /bar/i + STR_CONCAT #3 + ` + expect(await run(toBytecode(str))).toEqual({ type: 'string', value: '/foo/ and /bar/i' }) +}) + +test("RegExp - DUP with regex", async () => { + const str = ` + PUSH /pattern/i + DUP + EQ + ` + // Same regex duplicated should be equal + expect(await run(toBytecode(str))).toEqual({ type: 'boolean', value: true }) +}) + +test("RegExp - empty pattern", async () => { + const str = ` + PUSH // + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('(?:)') + } +}) + +test("RegExp - pattern with forward slashes escaped", async () => { + const str = ` + PUSH /https:\\/\\// + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('https:\\/\\/') + } +}) + +test("RegExp - unicode patterns", async () => { + const str = ` + PUSH /こんにちは/ + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('こんにちは') + } +}) + +test("RegExp - emoji in pattern", async () => { + const str = ` + PUSH /🎉+/ + ` + const result = await run(toBytecode(str)) + expect(result.type).toBe('regex') + if (result.type === 'regex') { + expect(result.value.source).toBe('🎉+') + } +}) + +test("RegExp - comparing different regex types", async () => { + // Different patterns + const str1 = ` + PUSH /abc/ + PUSH /xyz/ + EQ + ` + expect(await run(toBytecode(str1))).toEqual({ type: 'boolean', value: false }) + + // Same pattern, different flags + const str2 = ` + PUSH /test/ + PUSH /test/i + EQ + ` + expect(await run(toBytecode(str2))).toEqual({ type: 'boolean', value: false }) + + // Different order of flags (should be equal) + const str3 = ` + PUSH /test/ig + PUSH /test/gi + EQ + ` + expect(await run(toBytecode(str3))).toEqual({ type: 'boolean', value: true }) +}) + +test("RegExp - with native functions", async () => { + const { VM } = await import("#vm") + const bytecode = toBytecode(` + PUSH "hello world" + PUSH /world/ + CALL_NATIVE match + HALT + `) + + const vm = new VM(bytecode) + + // Register a native function that takes a string and regex + vm.registerFunction('match', (str: string, pattern: RegExp) => { + return pattern.test(str) + }) + + const result = await vm.run() + expect(result).toEqual({ type: 'boolean', value: true }) +}) + +test("RegExp - native function with regex replacement", async () => { + const { VM } = await import("#vm") + const bytecode = toBytecode(` + PUSH "hello world" + PUSH /o/g + PUSH "0" + CALL_NATIVE replace + HALT + `) + + const vm = new VM(bytecode) + + vm.registerFunction('replace', (str: string, pattern: RegExp, replacement: string) => { + return str.replace(pattern, replacement) + }) + + const result = await vm.run() + expect(result).toEqual({ type: 'string', value: 'hell0 w0rld' }) +}) + +test("RegExp - native function extracting matches", async () => { + const { VM } = await import("#vm") + const bytecode = toBytecode(` + PUSH "test123abc456" + PUSH /\\d+/g + CALL_NATIVE extractNumbers + HALT + `) + + const vm = new VM(bytecode) + + vm.registerFunction('extractNumbers', (str: string, pattern: RegExp) => { + return str.match(pattern) || [] + }) + + const result = await vm.run() + expect(result.type).toBe('array') + if (result.type === 'array') { + expect(result.value).toHaveLength(2) + expect(result.value[0]).toEqual({ type: 'string', value: '123' }) + expect(result.value[1]).toEqual({ type: 'string', value: '456' }) + } +}) +