import { test, expect, describe } from "bun:test" import { run } from "#index" import { toBytecode } from "#bytecode" describe("RegExp", () => { test("basic pattern parsing", async () => { const str = ` PUSH /hello/ ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('hello') expect(result.value.flags).toBe('') } }) test("pattern with flags", async () => { const str = ` PUSH /test/gi ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('test') expect(result.value.global).toBe(true) expect(result.value.ignoreCase).toBe(true) } }) test("multiple flag combinations", async () => { // Test i flag const str1 = ` PUSH /pattern/i ` const result1 = await run(toBytecode(str1)) expect(result1.type).toBe('regex') if (result1.type === 'regex') { expect(result1.value.ignoreCase).toBe(true) } // Test g flag const str2 = ` PUSH /pattern/g ` const result2 = await run(toBytecode(str2)) expect(result2.type).toBe('regex') if (result2.type === 'regex') { expect(result2.value.global).toBe(true) } // Test m flag const str3 = ` PUSH /pattern/m ` const result3 = await run(toBytecode(str3)) expect(result3.type).toBe('regex') if (result3.type === 'regex') { expect(result3.value.multiline).toBe(true) } // Test combined flags const str4 = ` PUSH /pattern/gim ` const result4 = await run(toBytecode(str4)) expect(result4.type).toBe('regex') if (result4.type === 'regex') { expect(result4.value.global).toBe(true) expect(result4.value.ignoreCase).toBe(true) expect(result4.value.multiline).toBe(true) } }) test("complex patterns", async () => { // Character class const str1 = ` PUSH /[a-z0-9]+/ ` const result1 = await run(toBytecode(str1)) expect(result1.type).toBe('regex') if (result1.type === 'regex') { expect(result1.value.source).toBe('[a-z0-9]+') } // Quantifiers const str2 = ` PUSH /a{2,4}/ ` const result2 = await run(toBytecode(str2)) expect(result2.type).toBe('regex') if (result2.type === 'regex') { expect(result2.value.source).toBe('a{2,4}') } // Groups and alternation const str3 = ` PUSH /(foo|bar)/ ` const result3 = await run(toBytecode(str3)) expect(result3.type).toBe('regex') if (result3.type === 'regex') { expect(result3.value.source).toBe('(foo|bar)') } // Anchors and special chars const str4 = ` PUSH /^[a-z]+$/ ` const result4 = await run(toBytecode(str4)) expect(result4.type).toBe('regex') if (result4.type === 'regex') { expect(result4.value.source).toBe('^[a-z]+$') } }) test("escaping special characters", async () => { const str = ` PUSH /\\d+\\.\\d+/ ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('\\d+\\.\\d+') } }) test("store and load", async () => { const str = ` PUSH /test/i STORE pattern LOAD pattern ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('test') expect(result.value.ignoreCase).toBe(true) } }) test("TRY_LOAD with regex", async () => { const str = ` PUSH /hello/g STORE regex TRY_LOAD regex ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('hello') expect(result.value.global).toBe(true) } }) test("NEQ comparison", async () => { const str = ` PUSH /foo/ PUSH /bar/ NEQ ` expect(await run(toBytecode(str))).toEqual({ type: 'boolean', value: true }) const str2 = ` PUSH /test/i PUSH /test/i NEQ ` expect(await run(toBytecode(str2))).toEqual({ type: 'boolean', value: false }) }) test("is truthy", async () => { // Regex values should be truthy (not null or false) const str = ` PUSH /test/ JUMP_IF_FALSE .end PUSH 42 .end: ` expect(await run(toBytecode(str))).toEqual({ type: 'number', value: 42 }) }) test("NOT returns false (regex is truthy)", async () => { const str = ` PUSH /pattern/ NOT ` expect(await run(toBytecode(str))).toEqual({ type: 'boolean', value: false }) }) test("in arrays", async () => { const str = ` PUSH /first/ PUSH /second/i PUSH /third/g MAKE_ARRAY #3 ` const result = await run(toBytecode(str)) expect(result.type).toBe('array') if (result.type === 'array') { expect(result.value).toHaveLength(3) expect(result.value[0]!.type).toBe('regex') if (result.value[0]!.type === 'regex') { expect(result.value[0]!.value.source).toBe('first') } expect(result.value[1]!.type).toBe('regex') if (result.value[1]!.type === 'regex') { expect(result.value[1]!.value.source).toBe('second') expect(result.value[1]!.value.ignoreCase).toBe(true) } expect(result.value[2]!.type).toBe('regex') if (result.value[2]!.type === 'regex') { expect(result.value[2]!.value.source).toBe('third') expect(result.value[2]!.value.global).toBe(true) } } }) test("retrieve from array", async () => { const str = ` PUSH /pattern/i PUSH /test/g MAKE_ARRAY #2 PUSH 1 ARRAY_GET ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('test') expect(result.value.global).toBe(true) } }) test("in dicts", async () => { const str = ` PUSH 'email' PUSH /^[a-z@.]+$/i PUSH 'phone' PUSH /\\d{3}-\\d{4}/ MAKE_DICT #2 ` const result = await run(toBytecode(str)) expect(result.type).toBe('dict') if (result.type === 'dict') { expect(result.value.size).toBe(2) const email = result.value.get('email') expect(email?.type).toBe('regex') if (email?.type === 'regex') { expect(email.value.source).toBe('^[a-z@.]+$') expect(email.value.ignoreCase).toBe(true) } const phone = result.value.get('phone') expect(phone?.type).toBe('regex') if (phone?.type === 'regex') { expect(phone.value.source).toBe('\\d{3}-\\d{4}') } } }) test("retrieve from dict", async () => { const str = ` PUSH 'pattern' PUSH /test/gim MAKE_DICT #1 PUSH 'pattern' DICT_GET ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('test') expect(result.value.global).toBe(true) expect(result.value.ignoreCase).toBe(true) expect(result.value.multiline).toBe(true) } }) test("with STR_CONCAT converts to string", async () => { const str = ` PUSH "Pattern: " PUSH /test/gi STR_CONCAT #2 ` const result = await run(toBytecode(str)) expect(result.type).toBe('string') if (result.type === 'string') { expect(result.value).toBe('Pattern: /test/gi') } }) test("multiple regex in STR_CONCAT", async () => { const str = ` PUSH /foo/ PUSH " and " PUSH /bar/i STR_CONCAT #3 ` expect(await run(toBytecode(str))).toEqual({ type: 'string', value: '/foo/ and /bar/i' }) }) test("DUP with regex", async () => { const str = ` PUSH /pattern/i DUP EQ ` // Same regex duplicated should be equal expect(await run(toBytecode(str))).toEqual({ type: 'boolean', value: true }) }) test("empty pattern", async () => { const str = ` PUSH // ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('(?:)') } }) test("pattern with forward slashes escaped", async () => { const str = ` PUSH /https:\\/\\// ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('https:\\/\\/') } }) test("unicode patterns", async () => { const str = ` PUSH /こんにちは/ ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('こんにちは') } }) test("emoji in pattern", async () => { const str = ` PUSH /🎉+/ ` const result = await run(toBytecode(str)) expect(result.type).toBe('regex') if (result.type === 'regex') { expect(result.value.source).toBe('🎉+') } }) test("comparing different regex types", async () => { // Different patterns const str1 = ` PUSH /abc/ PUSH /xyz/ EQ ` expect(await run(toBytecode(str1))).toEqual({ type: 'boolean', value: false }) // Same pattern, different flags const str2 = ` PUSH /test/ PUSH /test/i EQ ` expect(await run(toBytecode(str2))).toEqual({ type: 'boolean', value: false }) // Different order of flags (should be equal) const str3 = ` PUSH /test/ig PUSH /test/gi EQ ` expect(await run(toBytecode(str3))).toEqual({ type: 'boolean', value: true }) }) test("with native functions", async () => { const { VM } = await import("#vm") const bytecode = toBytecode(` LOAD match PUSH "hello world" PUSH /world/ PUSH 2 PUSH 0 CALL HALT `) const vm = new VM(bytecode) // Register a native function that takes a string and regex vm.registerFunction('match', (str: string, pattern: RegExp) => { return pattern.test(str) }) const result = await vm.run() expect(result).toEqual({ type: 'boolean', value: true }) }) test("native function with regex replacement", async () => { const { VM } = await import("#vm") const bytecode = toBytecode(` LOAD replace PUSH "hello world" PUSH /o/g PUSH "0" PUSH 3 PUSH 0 CALL HALT `) const vm = new VM(bytecode) vm.registerFunction('replace', (str: string, pattern: RegExp, replacement: string) => { return str.replace(pattern, replacement) }) const result = await vm.run() expect(result).toEqual({ type: 'string', value: 'hell0 w0rld' }) }) test("native function extracting matches", async () => { const { VM } = await import("#vm") const bytecode = toBytecode(` LOAD extractNumbers PUSH "test123abc456" PUSH /\\d+/g PUSH 2 PUSH 0 CALL HALT `) const vm = new VM(bytecode) vm.registerFunction('extractNumbers', (str: string, pattern: RegExp) => { return str.match(pattern) || [] }) const result = await vm.run() expect(result.type).toBe('array') if (result.type === 'array') { expect(result.value).toHaveLength(2) expect(result.value[0]).toEqual({ type: 'string', value: '123' }) expect(result.value[1]).toEqual({ type: 'string', value: '456' }) } }) })