Skip to content

expr: Fix parsing regex anchors '^' and '$' #7953

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
38 changes: 32 additions & 6 deletions src/uu/expr/src/syntax_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ impl StringOp {
re_string.push('^');

// Handle first character from the input pattern
let mut pattern_chars = right.chars();
let mut pattern_chars = right.chars().peekable();
let first = pattern_chars.next();
match first {
Some('^') => {} // Start of string anchor is already added
Expand All @@ -169,13 +169,39 @@ impl StringOp {
// Escaped previous character should not affect the current.
let mut prev = first.unwrap_or_default();
let mut prev_is_escaped = false;
for curr in pattern_chars {
while let Some(curr) = pattern_chars.next() {
match curr {
// Carets are interpreted literally, unless used as character class negation "[^a]"
'^' if prev_is_escaped || !matches!(prev, '\\' | '[') => {
re_string.push_str(r"\^");
'^' => match (prev, prev_is_escaped) {
// Start of a capturing group
('(', true)
// Start of an alternative pattern
| ('|', true)
// Character class negation "[^a]"
| ('[', false)
// Explicitly escaped caret
| ('\\', false) => re_string.push(curr),
_ => re_string.push_str(r"\^"),
},
'$' => {
if let Some('\\') = pattern_chars.peek() {
let backslash = pattern_chars.next().unwrap_or_default();
match pattern_chars.peek() {
// End of a capturing group
Some(')') => re_string.push('$'),
// End of an alternative pattern
Some('|') => re_string.push('$'),
_ => re_string.push_str(r"\$"),
}
re_string.push(backslash);
} else if (prev_is_escaped || prev != '\\')
&& pattern_chars.peek().is_some()
{
re_string.push_str(r"\$");
} else {
re_string.push('$');
}
}
char => re_string.push(char),
_ => re_string.push(curr),
}

prev_is_escaped = prev == '\\' && !prev_is_escaped;
Expand Down
17 changes: 16 additions & 1 deletion tests/by-util/test_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,26 @@ fn test_regex() {
.args(&["a^b", ":", "a\\^b"])
.succeeds()
.stdout_only("3\n");
new_ucmd!()
.args(&["b", ":", "a\\|^b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["ab", ":", "\\(^a\\)b"])
.succeeds()
.stdout_only("a\n");
new_ucmd!()
.args(&["a$b", ":", "a\\$b"])
.succeeds()
.stdout_only("3\n");
new_ucmd!()
.args(&["a", ":", "a$\\|b"])
.succeeds()
.stdout_only("1\n");
new_ucmd!()
.args(&["ab", ":", "a\\(b$\\)"])
.succeeds()
.stdout_only("b\n");
new_ucmd!()
.args(&["abc", ":", "^abc"])
.succeeds()
Expand Down Expand Up @@ -766,7 +782,6 @@ mod gnu_expr {
.stdout_only("3\n");
}

#[ignore]
#[test]
fn test_bre11() {
new_ucmd!()
Expand Down
Loading