From f0d6a1e1e7aa2fe8936bcdae5976678c68f37322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 14:13:05 +0300 Subject: [PATCH 01/11] expr: Handle caret '^' at the beginning of a capturing group --- src/uu/expr/src/syntax_tree.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 3026d5d41b4..e7d80276c80 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -171,11 +171,16 @@ impl StringOp { let mut prev_is_escaped = false; for curr in pattern_chars { match curr { - // Carets are interpreted literally, unless used as character class negation "[^a]" - '^' if prev_is_escaped || !matches!(prev, '\\' | '[') => { - re_string.push_str(r"\^"); - } - char => re_string.push(char), + '^' => match (prev, prev_is_escaped) { + // Start of a capturing group + ('(', true) => re_string.push(curr), + // Character class negation "[^a]" + ('[', false) => re_string.push(curr), + // Explicitly escaped caret + ('\\', false) => re_string.push(curr), + _ => re_string.push_str(r"\^"), + }, + _ => re_string.push(curr), } prev_is_escaped = prev == '\\' && !prev_is_escaped; From 84faf9be8d2a2e8e820048154a58b319ced9bda9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 14:18:44 +0300 Subject: [PATCH 02/11] expr: Handle caret '^' at the beginning of an alternative pattern --- src/uu/expr/src/syntax_tree.rs | 2 ++ tests/by-util/test_expr.rs | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index e7d80276c80..3e5bdf7a767 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -174,6 +174,8 @@ impl StringOp { '^' => match (prev, prev_is_escaped) { // Start of a capturing group ('(', true) => re_string.push(curr), + // Start of an alternative pattern + ('|', true) => re_string.push(curr), // Character class negation "[^a]" ('[', false) => re_string.push(curr), // Explicitly escaped caret diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 193737d1025..b3c673dc523 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -282,6 +282,10 @@ fn test_regex() { .args(&["a^b", ":", "a\\^b"]) .succeeds() .stdout_only("3\n"); + new_ucmd!() + .args(&["b", ":", "a\\|^b"]) + .succeeds() + .stdout_only("1\n"); new_ucmd!() .args(&["a$b", ":", "a\\$b"]) .succeeds() From ea67ff6fc2802c3dc9423f307a274f4e36c23d06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 15:07:07 +0300 Subject: [PATCH 03/11] expr: Escape '$' characters --- src/uu/expr/src/syntax_tree.rs | 7 +++++++ tests/by-util/test_expr.rs | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 3e5bdf7a767..1b4e559f47d 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -182,6 +182,13 @@ impl StringOp { ('\\', false) => re_string.push(curr), _ => re_string.push_str(r"\^"), }, + '$' => { + if prev_is_escaped || prev != '\\' { + re_string.push_str(r"\$"); + } else { + re_string.push('$'); + } + } _ => re_string.push(curr), } diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index b3c673dc523..0886f6f28f3 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -770,7 +770,6 @@ mod gnu_expr { .stdout_only("3\n"); } - #[ignore] #[test] fn test_bre11() { new_ucmd!() From b73b9304ea10d36b996e6ddfdf4a00f7e815e96d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 15:08:09 +0300 Subject: [PATCH 04/11] expr: Handle '$' at the end of a capturing group --- src/uu/expr/src/syntax_tree.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 1b4e559f47d..0a7b1b7c38b 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -156,7 +156,7 @@ impl StringOp { re_string.push('^'); // Handle first character from the input pattern - let mut pattern_chars = right.chars(); + let mut pattern_chars = right.chars().peekable(); let first = pattern_chars.next(); match first { Some('^') => {} // Start of string anchor is already added @@ -169,7 +169,7 @@ impl StringOp { // Escaped previous character should not affect the current. let mut prev = first.unwrap_or_default(); let mut prev_is_escaped = false; - for curr in pattern_chars { + while let Some(curr) = pattern_chars.next() { match curr { '^' => match (prev, prev_is_escaped) { // Start of a capturing group @@ -183,7 +183,15 @@ impl StringOp { _ => re_string.push_str(r"\^"), }, '$' => { - if prev_is_escaped || prev != '\\' { + if let Some('\\') = pattern_chars.peek() { + let backslash = pattern_chars.next().unwrap_or_default(); + match pattern_chars.peek() { + // End of a capturing group + Some(')') => re_string.push('$'), + _ => re_string.push_str(r"\$"), + } + re_string.push(backslash); + } else if prev_is_escaped || prev != '\\' { re_string.push_str(r"\$"); } else { re_string.push('$'); From 36a37fa2a174767e0b7b7521bf0bf13bc5bb213e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 15:08:27 +0300 Subject: [PATCH 05/11] expr: Handle '$' at the end of an alternative pattern --- src/uu/expr/src/syntax_tree.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 0a7b1b7c38b..66281e69376 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -188,6 +188,8 @@ impl StringOp { match pattern_chars.peek() { // End of a capturing group Some(')') => re_string.push('$'), + // End of an alternative pattern + Some('|') => re_string.push('$'), _ => re_string.push_str(r"\$"), } re_string.push(backslash); From 1f622bc9d6f6e612401c1b7b0d21e5ae73e016ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 15:08:59 +0300 Subject: [PATCH 06/11] expr: Handle '$' at the end of the pattern --- src/uu/expr/src/syntax_tree.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 66281e69376..62634c50e1b 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -193,6 +193,8 @@ impl StringOp { _ => re_string.push_str(r"\$"), } re_string.push(backslash); + } else if pattern_chars.peek().is_none() { + re_string.push('$'); } else if prev_is_escaped || prev != '\\' { re_string.push_str(r"\$"); } else { From f61cd74ef6bf8c16fab93e2fc213195e8ab653f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 15:11:54 +0300 Subject: [PATCH 07/11] expr: Test handling '$' at the end of an alternative pattern --- tests/by-util/test_expr.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 0886f6f28f3..d608260c0d5 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -290,6 +290,10 @@ fn test_regex() { .args(&["a$b", ":", "a\\$b"]) .succeeds() .stdout_only("3\n"); + new_ucmd!() + .args(&["a", ":", "a$\\|b"]) + .succeeds() + .stdout_only("1\n"); new_ucmd!() .args(&["abc", ":", "^abc"]) .succeeds() From b7541aeac0b7c7d2f0cdebe11fb99734ec8b56c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 15:21:25 +0300 Subject: [PATCH 08/11] expr: Refactor checking if '$' is the last character --- src/uu/expr/src/syntax_tree.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 62634c50e1b..1030d8cd934 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -193,9 +193,9 @@ impl StringOp { _ => re_string.push_str(r"\$"), } re_string.push(backslash); - } else if pattern_chars.peek().is_none() { - re_string.push('$'); - } else if prev_is_escaped || prev != '\\' { + } else if (prev_is_escaped || prev != '\\') + && pattern_chars.peek().is_some() + { re_string.push_str(r"\$"); } else { re_string.push('$'); From 1d38a45e0b06e6de3c89cccda30f25c2574140f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 15:31:31 +0300 Subject: [PATCH 09/11] expr: Refactor caret options into one pattern --- src/uu/expr/src/syntax_tree.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 1030d8cd934..cf2a3eb2038 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -173,13 +173,13 @@ impl StringOp { match curr { '^' => match (prev, prev_is_escaped) { // Start of a capturing group - ('(', true) => re_string.push(curr), + ('(', true) // Start of an alternative pattern - ('|', true) => re_string.push(curr), + | ('|', true) // Character class negation "[^a]" - ('[', false) => re_string.push(curr), + | ('[', false) // Explicitly escaped caret - ('\\', false) => re_string.push(curr), + | ('\\', false) => re_string.push(curr), _ => re_string.push_str(r"\^"), }, '$' => { From 247f854a3ce4ec2515b0d2d7908d4bb5f923acd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 18:00:38 +0300 Subject: [PATCH 10/11] expr: Test ^ at the start of a subpattern --- tests/by-util/test_expr.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index d608260c0d5..9aecc26d744 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -286,6 +286,10 @@ fn test_regex() { .args(&["b", ":", "a\\|^b"]) .succeeds() .stdout_only("1\n"); + new_ucmd!() + .args(&["ab", ":", "\\(^a\\)b"]) + .succeeds() + .stdout_only("a\n"); new_ucmd!() .args(&["a$b", ":", "a\\$b"]) .succeeds() From 3f9a5f36723816a835013e2adc0524bf1a3e474a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20P=C3=A4tsi?= Date: Sun, 18 May 2025 18:00:50 +0300 Subject: [PATCH 11/11] expr: Test $ at the end of a subpattern --- tests/by-util/test_expr.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs index 9aecc26d744..2b1ebd33245 100644 --- a/tests/by-util/test_expr.rs +++ b/tests/by-util/test_expr.rs @@ -298,6 +298,10 @@ fn test_regex() { .args(&["a", ":", "a$\\|b"]) .succeeds() .stdout_only("1\n"); + new_ucmd!() + .args(&["ab", ":", "a\\(b$\\)"]) + .succeeds() + .stdout_only("b\n"); new_ucmd!() .args(&["abc", ":", "^abc"]) .succeeds()