Remove regex dependency, replace with manual parsing

This commit is contained in:
Chris Wanstrath 2026-04-02 18:32:34 -07:00
parent a6b3de8883
commit 5ae67ba391
4 changed files with 119 additions and 104 deletions

45
Cargo.lock generated
View File

@ -2,60 +2,15 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 4 version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.184" version = "0.2.184"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
[[package]] [[package]]
name = "shout" name = "shout"
version = "0.0.18" version = "0.0.18"
dependencies = [ dependencies = [
"libc", "libc",
"regex",
] ]

View File

@ -5,4 +5,3 @@ edition = "2024"
[dependencies] [dependencies]
libc = "0.2" libc = "0.2"
regex = "1"

View File

@ -1,5 +1,3 @@
use regex::Regex;
/// Check if a single line matches a pattern that may contain inline `...` wildcards. /// Check if a single line matches a pattern that may contain inline `...` wildcards.
pub fn match_line(pattern: &str, actual: &str) -> bool { pub fn match_line(pattern: &str, actual: &str) -> bool {
if !pattern.contains("...") { if !pattern.contains("...") {
@ -7,12 +5,27 @@ pub fn match_line(pattern: &str, actual: &str) -> bool {
} }
let parts: Vec<&str> = pattern.split("...").collect(); let parts: Vec<&str> = pattern.split("...").collect();
let escaped: Vec<String> = parts.iter().map(|p| regex::escape(p)).collect(); let mut pos = 0;
let re_str = format!("^{}$", escaped.join(".*"));
match Regex::new(&re_str) { for (i, part) in parts.iter().enumerate() {
Ok(re) => re.is_match(actual), if i == 0 {
Err(_) => false, if !actual.starts_with(part) {
return false;
}
pos = part.len();
} else if i == parts.len() - 1 {
if !actual[pos..].ends_with(part) {
return false;
}
} else {
match actual[pos..].find(part) {
Some(idx) => pos += idx + part.len(),
None => return false,
}
}
} }
true
} }
/// Match expected output against actual output, supporting multi-line `...` wildcards. /// Match expected output against actual output, supporting multi-line `...` wildcards.

View File

@ -3,8 +3,6 @@ use std::process::{Command, Stdio};
use std::sync::mpsc; use std::sync::mpsc;
use std::time::Duration; use std::time::Duration;
use regex::Regex;
use crate::parse::{self, ShoutFile}; use crate::parse::{self, ShoutFile};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -64,26 +62,80 @@ fn build_script(commands: &[parse::Command], verbose: bool) -> String {
lines.join("\n") + "\n" lines.join("\n") + "\n"
} }
fn strip_ansi(line: &str) -> String { fn strip_ansi(s: &str) -> String {
// Same regex as the TS version let mut result = String::with_capacity(s.len());
let re = Regex::new(r"[\x1b\x9b][\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]").unwrap(); let mut chars = s.chars().peekable();
re.replace_all(line, "").to_string() while let Some(c) = chars.next() {
if c == '\x1b' {
if chars.peek() == Some(&'[') {
chars.next();
for fc in chars.by_ref() {
if ('@'..='~').contains(&fc) {
break;
}
}
}
} else if c == '\u{009b}' {
for fc in chars.by_ref() {
if ('@'..='~').contains(&fc) {
break;
}
}
} else {
result.push(c);
}
}
result
}
/// Parse the suffix of a sentinel starting right after SENTINEL_PREFIX.
/// Expected format: `{exit_code}_{index}__`
/// Returns (exit_code, index, bytes_consumed) or None.
fn parse_sentinel_suffix(s: &str) -> Option<(i32, usize, usize)> {
let b = s.as_bytes();
let mut i = 0;
while i < b.len() && b[i].is_ascii_digit() {
i += 1;
}
if i == 0 || b.get(i) != Some(&b'_') {
return None;
}
let exit_code: i32 = s[..i].parse().ok()?;
i += 1;
let j = i;
while i < b.len() && b[i].is_ascii_digit() {
i += 1;
}
if i == j || !s[i..].starts_with("__") {
return None;
}
let index: usize = s[j..i].parse().ok()?;
Some((exit_code, index, i + 2))
}
/// Find the next sentinel in `s` at or after byte offset `from`.
/// Returns (start, exit_code, index, end) or None.
fn find_sentinel(s: &str, from: usize) -> Option<(usize, i32, usize, usize)> {
let mut search = from;
while let Some(rel) = s[search..].find(SENTINEL_PREFIX) {
let abs = search + rel;
let after = abs + SENTINEL_PREFIX.len();
if let Some((exit_code, index, len)) = parse_sentinel_suffix(&s[after..]) {
return Some((abs, exit_code, index, after + len));
}
search = abs + 1;
}
None
} }
fn parse_sentinel_output(raw: &str, command_count: usize) -> (Vec<Vec<String>>, Vec<i32>) { fn parse_sentinel_output(raw: &str, command_count: usize) -> (Vec<Vec<String>>, Vec<i32>) {
let mut outputs = Vec::new(); let mut outputs = Vec::new();
let mut exit_codes = Vec::new(); let mut exit_codes = Vec::new();
let mut pos = 0;
let sentinel_re = Regex::new(&format!(r"{}(\d+)_(\d+)__", regex::escape(SENTINEL_PREFIX))).unwrap();
let mut remaining = raw;
for _i in 0..command_count { for _i in 0..command_count {
if let Some(m) = sentinel_re.find(remaining) { if let Some((start, exit_code, _idx, end)) = find_sentinel(raw, pos) {
let caps = sentinel_re.captures(&remaining[m.start()..]).unwrap(); let before = &raw[pos..start];
let exit_code: i32 = caps[1].parse().unwrap_or(1);
let before = &remaining[..m.start()];
let mut lines: Vec<String> = before.split('\n').map(|s| s.to_string()).collect(); let mut lines: Vec<String> = before.split('\n').map(|s| s.to_string()).collect();
// Remove leading empty line (from printf \n prefix) // Remove leading empty line (from printf \n prefix)
@ -99,16 +151,13 @@ fn parse_sentinel_output(raw: &str, command_count: usize) -> (Vec<Vec<String>>,
outputs.push(lines); outputs.push(lines);
exit_codes.push(exit_code); exit_codes.push(exit_code);
// Skip past sentinel pos = end;
let after = &remaining[m.end()..]; if raw.as_bytes().get(pos) == Some(&b'\n') {
remaining = if after.starts_with('\n') { pos += 1;
&after[1..] }
} else {
after
};
} else { } else {
// No sentinel found — rest is output for this command // No sentinel found — rest is output for this command
let mut lines: Vec<String> = remaining.split('\n').map(|s| s.to_string()).collect(); let mut lines: Vec<String> = raw[pos..].split('\n').map(|s| s.to_string()).collect();
if !lines.is_empty() && lines[0].is_empty() { if !lines.is_empty() && lines[0].is_empty() {
lines.remove(0); lines.remove(0);
} }
@ -316,8 +365,6 @@ pub fn run_file(
let mut sentinels_reported: usize = 0; let mut sentinels_reported: usize = 0;
let mut last_sentinel_end: usize = 0; let mut last_sentinel_end: usize = 0;
let sentinel_re = Regex::new(&format!(r"{}(\d+)_(\d+)__", regex::escape(SENTINEL_PREFIX))).unwrap();
loop { loop {
let remaining = deadline.saturating_duration_since(std::time::Instant::now()); let remaining = deadline.saturating_duration_since(std::time::Instant::now());
if remaining.is_zero() { if remaining.is_zero() {
@ -331,33 +378,34 @@ pub fn run_file(
// Stream command results as they come in // Stream command results as they come in
if let Some(on_result) = on_command_result { if let Some(on_result) = on_command_result {
for caps in sentinel_re.captures_iter(&accumulated[last_sentinel_end..]) { loop {
let idx: usize = caps[2].parse().unwrap_or(0); if let Some((start, exit_code, idx, end)) = find_sentinel(&accumulated, last_sentinel_end) {
if idx >= sentinels_reported { if idx >= sentinels_reported {
let exit_code: i32 = caps[1].parse().unwrap_or(1); let output_slice = &accumulated[last_sentinel_end..start];
let sentinel_match = caps.get(0).unwrap(); let mut lines: Vec<String> = output_slice.split('\n').map(|s| s.to_string()).collect();
let abs_start = last_sentinel_end + sentinel_match.start(); if !lines.is_empty() && lines[0].is_empty() {
let abs_end = last_sentinel_end + sentinel_match.end(); lines.remove(0);
let output_slice = &accumulated[last_sentinel_end..abs_start]; }
let mut lines: Vec<String> = output_slice.split('\n').map(|s| s.to_string()).collect(); lines = parse::trim_trailing_empty(&lines);
if !lines.is_empty() && lines[0].is_empty() { if lines.len() == 1 && lines[0].is_empty() {
lines.remove(0); lines.clear();
} }
lines = parse::trim_trailing_empty(&lines); let result = CommandResult {
if lines.len() == 1 && lines[0].is_empty() { command: file.commands[idx].clone(),
lines.clear(); actual: lines.iter().map(|l| strip_ansi(l)).collect(),
} exit_code,
let result = CommandResult { };
command: file.commands[idx].clone(), on_result(idx, &result);
actual: lines.iter().map(|l| strip_ansi(l)).collect(), sentinels_reported = idx + 1;
exit_code, last_sentinel_end = end;
}; if accumulated.as_bytes().get(last_sentinel_end) == Some(&b'\n') {
on_result(idx, &result); last_sentinel_end += 1;
sentinels_reported = idx + 1; }
last_sentinel_end = abs_end; } else {
if accumulated.as_bytes().get(last_sentinel_end) == Some(&b'\n') { last_sentinel_end = end + 1;
last_sentinel_end += 1;
} }
} else {
break;
} }
} }
} }