ReefVM/tests/regex.test.ts

460 lines
11 KiB
TypeScript

import { test, expect, describe } from "bun:test"
import { run } from "#index"
import { toBytecode } from "#bytecode"
describe("RegExp", () => {
test("basic pattern parsing", async () => {
const str = `
PUSH /hello/
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('hello')
expect(result.value.flags).toBe('')
}
})
test("pattern with flags", async () => {
const str = `
PUSH /test/gi
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('test')
expect(result.value.global).toBe(true)
expect(result.value.ignoreCase).toBe(true)
}
})
test("multiple flag combinations", async () => {
// Test i flag
const str1 = `
PUSH /pattern/i
`
const result1 = await run(toBytecode(str1))
expect(result1.type).toBe('regex')
if (result1.type === 'regex') {
expect(result1.value.ignoreCase).toBe(true)
}
// Test g flag
const str2 = `
PUSH /pattern/g
`
const result2 = await run(toBytecode(str2))
expect(result2.type).toBe('regex')
if (result2.type === 'regex') {
expect(result2.value.global).toBe(true)
}
// Test m flag
const str3 = `
PUSH /pattern/m
`
const result3 = await run(toBytecode(str3))
expect(result3.type).toBe('regex')
if (result3.type === 'regex') {
expect(result3.value.multiline).toBe(true)
}
// Test combined flags
const str4 = `
PUSH /pattern/gim
`
const result4 = await run(toBytecode(str4))
expect(result4.type).toBe('regex')
if (result4.type === 'regex') {
expect(result4.value.global).toBe(true)
expect(result4.value.ignoreCase).toBe(true)
expect(result4.value.multiline).toBe(true)
}
})
test("complex patterns", async () => {
// Character class
const str1 = `
PUSH /[a-z0-9]+/
`
const result1 = await run(toBytecode(str1))
expect(result1.type).toBe('regex')
if (result1.type === 'regex') {
expect(result1.value.source).toBe('[a-z0-9]+')
}
// Quantifiers
const str2 = `
PUSH /a{2,4}/
`
const result2 = await run(toBytecode(str2))
expect(result2.type).toBe('regex')
if (result2.type === 'regex') {
expect(result2.value.source).toBe('a{2,4}')
}
// Groups and alternation
const str3 = `
PUSH /(foo|bar)/
`
const result3 = await run(toBytecode(str3))
expect(result3.type).toBe('regex')
if (result3.type === 'regex') {
expect(result3.value.source).toBe('(foo|bar)')
}
// Anchors and special chars
const str4 = `
PUSH /^[a-z]+$/
`
const result4 = await run(toBytecode(str4))
expect(result4.type).toBe('regex')
if (result4.type === 'regex') {
expect(result4.value.source).toBe('^[a-z]+$')
}
})
test("escaping special characters", async () => {
const str = `
PUSH /\\d+\\.\\d+/
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('\\d+\\.\\d+')
}
})
test("store and load", async () => {
const str = `
PUSH /test/i
STORE pattern
LOAD pattern
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('test')
expect(result.value.ignoreCase).toBe(true)
}
})
test("TRY_LOAD with regex", async () => {
const str = `
PUSH /hello/g
STORE regex
TRY_LOAD regex
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('hello')
expect(result.value.global).toBe(true)
}
})
test("NEQ comparison", async () => {
const str = `
PUSH /foo/
PUSH /bar/
NEQ
`
await expect(str).toBeBoolean(true)
const str2 = `
PUSH /test/i
PUSH /test/i
NEQ
`
await expect(str2).toBeBoolean(false)
})
test("is truthy", async () => {
// Regex values should be truthy (not null or false)
const str = `
PUSH /test/
JUMP_IF_FALSE .end
PUSH 42
.end:
`
await expect(str).toBeNumber(42)
})
test("NOT returns false (regex is truthy)", async () => {
const str = `
PUSH /pattern/
NOT
`
await expect(str).toBeBoolean(false)
})
test("in arrays", async () => {
const str = `
PUSH /first/
PUSH /second/i
PUSH /third/g
MAKE_ARRAY #3
`
const result = await run(toBytecode(str))
expect(result.type).toBe('array')
if (result.type === 'array') {
expect(result.value).toHaveLength(3)
expect(result.value[0]!.type).toBe('regex')
if (result.value[0]!.type === 'regex') {
expect(result.value[0]!.value.source).toBe('first')
}
expect(result.value[1]!.type).toBe('regex')
if (result.value[1]!.type === 'regex') {
expect(result.value[1]!.value.source).toBe('second')
expect(result.value[1]!.value.ignoreCase).toBe(true)
}
expect(result.value[2]!.type).toBe('regex')
if (result.value[2]!.type === 'regex') {
expect(result.value[2]!.value.source).toBe('third')
expect(result.value[2]!.value.global).toBe(true)
}
}
})
test("retrieve from array", async () => {
const str = `
PUSH /pattern/i
PUSH /test/g
MAKE_ARRAY #2
PUSH 1
ARRAY_GET
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('test')
expect(result.value.global).toBe(true)
}
})
test("in dicts", async () => {
const str = `
PUSH 'email'
PUSH /^[a-z@.]+$/i
PUSH 'phone'
PUSH /\\d{3}-\\d{4}/
MAKE_DICT #2
`
const result = await run(toBytecode(str))
expect(result.type).toBe('dict')
if (result.type === 'dict') {
expect(result.value.size).toBe(2)
const email = result.value.get('email')
expect(email?.type).toBe('regex')
if (email?.type === 'regex') {
expect(email.value.source).toBe('^[a-z@.]+$')
expect(email.value.ignoreCase).toBe(true)
}
const phone = result.value.get('phone')
expect(phone?.type).toBe('regex')
if (phone?.type === 'regex') {
expect(phone.value.source).toBe('\\d{3}-\\d{4}')
}
}
})
test("retrieve from dict", async () => {
const str = `
PUSH 'pattern'
PUSH /test/gim
MAKE_DICT #1
PUSH 'pattern'
DICT_GET
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('test')
expect(result.value.global).toBe(true)
expect(result.value.ignoreCase).toBe(true)
expect(result.value.multiline).toBe(true)
}
})
test("with STR_CONCAT converts to string", async () => {
const str = `
PUSH "Pattern: "
PUSH /test/gi
STR_CONCAT #2
`
const result = await run(toBytecode(str))
expect(result.type).toBe('string')
if (result.type === 'string') {
expect(result.value).toBe('Pattern: /test/gi')
}
})
test("multiple regex in STR_CONCAT", async () => {
const str = `
PUSH /foo/
PUSH " and "
PUSH /bar/i
STR_CONCAT #3
`
await expect(str).toBeString('/foo/ and /bar/i')
})
test("DUP with regex", async () => {
const str = `
PUSH /pattern/i
DUP
EQ
`
// Same regex duplicated should be equal
await expect(str).toBeBoolean(true)
})
test("empty pattern", async () => {
const str = `
PUSH //
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('(?:)')
}
})
test("pattern with forward slashes escaped", async () => {
const str = `
PUSH /https:\\/\\//
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('https:\\/\\/')
}
})
test("unicode patterns", async () => {
const str = `
PUSH /こんにちは/
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('こんにちは')
}
})
test("emoji in pattern", async () => {
const str = `
PUSH /🎉+/
`
const result = await run(toBytecode(str))
expect(result.type).toBe('regex')
if (result.type === 'regex') {
expect(result.value.source).toBe('🎉+')
}
})
test("comparing different regex types", async () => {
// Different patterns
const str1 = `
PUSH /abc/
PUSH /xyz/
EQ
`
await expect(str1).toBeBoolean(false)
// Same pattern, different flags
const str2 = `
PUSH /test/
PUSH /test/i
EQ
`
await expect(str2).toBeBoolean(false)
// Different order of flags (should be equal)
const str3 = `
PUSH /test/ig
PUSH /test/gi
EQ
`
await expect(str3).toBeBoolean(true)
})
test("with native functions", async () => {
const { VM } = await import("#vm")
const bytecode = toBytecode(`
LOAD match
PUSH "hello world"
PUSH /world/
PUSH 2
PUSH 0
CALL
HALT
`)
const vm = new VM(bytecode)
// Register a native function that takes a string and regex
vm.set('match', (str: string, pattern: RegExp) => {
return pattern.test(str)
})
const result = await vm.run()
expect(result).toEqual({ type: 'boolean', value: true })
})
test("native function with regex replacement", async () => {
const { VM } = await import("#vm")
const bytecode = toBytecode(`
LOAD replace
PUSH "hello world"
PUSH /o/g
PUSH "0"
PUSH 3
PUSH 0
CALL
HALT
`)
const vm = new VM(bytecode)
vm.set('replace', (str: string, pattern: RegExp, replacement: string) => {
return str.replace(pattern, replacement)
})
const result = await vm.run()
expect(result).toEqual({ type: 'string', value: 'hell0 w0rld' })
})
test("native function extracting matches", async () => {
const { VM } = await import("#vm")
const bytecode = toBytecode(`
LOAD extractNumbers
PUSH "test123abc456"
PUSH /\\d+/g
PUSH 2
PUSH 0
CALL
HALT
`)
const vm = new VM(bytecode)
vm.set('extractNumbers', (str: string, pattern: RegExp) => {
return str.match(pattern) || []
})
const result = await vm.run()
expect(result.type).toBe('array')
if (result.type === 'array') {
expect(result.value).toHaveLength(2)
expect(result.value[0]).toEqual({ type: 'string', value: '123' })
expect(result.value[1]).toEqual({ type: 'string', value: '456' })
}
})
})