Skip to content

Commit a85f36d

Browse files
hanickadotHana Dusíková
andauthored
Empty regex in cycles (#132)
* temporary attempt to disallow empty regex in cycles, it will be replaced soon * add support for flags * snapshot * allow alternate starting with empty (|a) * case insensitive flag for future :) * currently hitting infinite loops with loops which match empty input * this stops the infinite loop * debug print :) * getting closer :) * propagating flag as a value, possessive and lazy propagates not readed flag correctly * cleaning * flags are now values * add failing tests * cleaning * update to solve nested cycles * force inline match_re/search_re/starts_with_re * flags are referenced now to avoid copies * better string matching without recursion * cleaning, remove warning in old gcc Co-authored-by: Hana Dusíková <[email protected]>
1 parent 4363f5b commit a85f36d

File tree

11 files changed

+840
-476
lines changed

11 files changed

+840
-476
lines changed

include/ctll/list.hpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,38 @@ template <typename Head, typename... As, typename T = _nothing> constexpr auto f
4848
template <typename T = _nothing> constexpr auto front(empty_list, T = T()) noexcept -> T { return {}; }
4949

5050

51+
// set operations
52+
template <typename T> struct item_matcher {
53+
struct not_selected {
54+
template <typename... Ts> friend constexpr auto operator+(list<Ts...>, not_selected) -> list<Ts...>;
55+
};
56+
template <typename Y> struct wrapper {
57+
template <typename... Ts> friend constexpr auto operator+(list<Ts...>, wrapper<Y>) -> list<Ts...,Y>;
58+
};
59+
60+
static constexpr auto check(T) { return std::true_type{}; }
61+
static constexpr auto check(...) { return std::false_type{}; }
62+
static constexpr auto select(T) { return not_selected{}; }
63+
template <typename Y> static constexpr auto select(Y) { return wrapper<Y>{}; }
64+
};
65+
66+
template <typename T, typename... Ts> constexpr bool exists_in(T, list<Ts...>) noexcept {
67+
return (item_matcher<T>::check(Ts{}) || ... || false);
68+
}
69+
70+
template <typename T, typename... Ts> constexpr auto add_item(T item, list<Ts...> l) noexcept {
71+
if constexpr (exists_in(item, l)) {
72+
return l;
73+
} else {
74+
return list<Ts..., T>{};
75+
}
76+
}
77+
78+
template <typename T, typename... Ts> constexpr auto remove_item(T, list<Ts...>) noexcept {
79+
item_matcher<T> matcher;
80+
return decltype((list<>{} + ... + matcher.select(Ts{}))){};
81+
}
82+
5183
}
5284

5385
#endif

include/ctre/evaluation.hpp

Lines changed: 199 additions & 156 deletions
Large diffs are not rendered by default.

include/ctre/pcre.gram

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@ alphanum_characters={a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,A,B,C,D
3232
set_control_chars={:,],-}
3333
capture_control_chars={<}
3434

35-
S-><content> | epsilon,[push_empty]
35+
S-><content> | epsilon,[push_empty] | pipe,[push_empty],<content>,[make_alternate]
3636

3737
content-><string>,<content2>
38-
content_in_capture-><string_in_capture>,<content2> | epsilon,[push_empty]
38+
content_in_capture-><string_in_capture>,<content2> | epsilon,[push_empty]
39+
content_in_capture->pipe,[push_empty],<content>,[make_alternate]
40+
3941
content2->pipe,<content>,[make_alternate] |pipe,[push_empty],[make_alternate] | epsilon
4042

4143
string-><atom_repeat>,<string2>

include/ctre/pcre.hpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ struct pcre {
2020
struct c {};
2121
struct class_named_name {};
2222
struct content2 {};
23+
struct content {};
2324
struct content_in_capture {};
2425
struct d {};
2526
struct e {};
@@ -137,8 +138,9 @@ struct pcre {
137138
static constexpr auto rule(s, ctll::set<'!',',','-',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character, repeat, string2, content2>;
138139
static constexpr auto rule(s, _others) -> ctll::push<ctll::anything, push_character, repeat, string2, content2>;
139140
static constexpr auto rule(s, ctll::term<'.'>) -> ctll::push<ctll::anything, push_character_anything, repeat, string2, content2>;
141+
static constexpr auto rule(s, ctll::term<'|'>) -> ctll::push<ctll::anything, push_empty, content, make_alternate>;
140142
static constexpr auto rule(s, ctll::epsilon) -> ctll::push<push_empty>;
141-
static constexpr auto rule(s, ctll::set<'\x29','*','+','?','\x7B','|','\x7D'>) -> ctll::reject;
143+
static constexpr auto rule(s, ctll::set<'\x29','*','+','?','\x7B','\x7D'>) -> ctll::reject;
142144

143145
static constexpr auto rule(a, ctll::term<'\\'>) -> ctll::push<ctll::anything, backslash, repeat, string2, content2, make_alternate>;
144146
static constexpr auto rule(a, ctll::term<'['>) -> ctll::push<ctll::anything, c, repeat, string2, content2, make_alternate>;
@@ -201,8 +203,9 @@ struct pcre {
201203
static constexpr auto rule(block, ctll::set<'!',',','-',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character, repeat, string2, content2, make_capture, ctll::term<'\x29'>>;
202204
static constexpr auto rule(block, _others) -> ctll::push<ctll::anything, push_character, repeat, string2, content2, make_capture, ctll::term<'\x29'>>;
203205
static constexpr auto rule(block, ctll::term<'.'>) -> ctll::push<ctll::anything, push_character_anything, repeat, string2, content2, make_capture, ctll::term<'\x29'>>;
206+
static constexpr auto rule(block, ctll::term<'|'>) -> ctll::push<ctll::anything, push_empty, content, make_alternate, make_capture, ctll::term<'\x29'>>;
204207
static constexpr auto rule(block, ctll::term<'\x29'>) -> ctll::push<push_empty, make_capture, ctll::anything>;
205-
static constexpr auto rule(block, ctll::set<'*','+','\x7B','|','\x7D'>) -> ctll::reject;
208+
static constexpr auto rule(block, ctll::set<'*','+','\x7B','\x7D'>) -> ctll::reject;
206209

207210
static constexpr auto rule(block_name2, ctll::set<'>','\x7D'>) -> ctll::epsilon;
208211
static constexpr auto rule(block_name2, ctll::set<'0','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','_','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_name, block_name2>;
@@ -232,6 +235,16 @@ struct pcre {
232235
static constexpr auto rule(content2, ctll::epsilon) -> ctll::epsilon;
233236
static constexpr auto rule(content2, ctll::term<'|'>) -> ctll::push<ctll::anything, a>;
234237

238+
static constexpr auto rule(content, ctll::term<'\\'>) -> ctll::push<ctll::anything, backslash, repeat, string2, content2>;
239+
static constexpr auto rule(content, ctll::term<'['>) -> ctll::push<ctll::anything, c, repeat, string2, content2>;
240+
static constexpr auto rule(content, ctll::term<'\x28'>) -> ctll::push<ctll::anything, prepare_capture, block, repeat, string2, content2>;
241+
static constexpr auto rule(content, ctll::term<'^'>) -> ctll::push<ctll::anything, push_assert_begin, repeat, string2, content2>;
242+
static constexpr auto rule(content, ctll::term<'$'>) -> ctll::push<ctll::anything, push_assert_end, repeat, string2, content2>;
243+
static constexpr auto rule(content, ctll::set<'!',',','-',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character, repeat, string2, content2>;
244+
static constexpr auto rule(content, _others) -> ctll::push<ctll::anything, push_character, repeat, string2, content2>;
245+
static constexpr auto rule(content, ctll::term<'.'>) -> ctll::push<ctll::anything, push_character_anything, repeat, string2, content2>;
246+
static constexpr auto rule(content, ctll::set<'\x29','*','+','?','\x7B','|','\x7D'>) -> ctll::reject;
247+
235248
static constexpr auto rule(content_in_capture, ctll::term<'\\'>) -> ctll::push<ctll::anything, backslash, repeat, string2, content2>;
236249
static constexpr auto rule(content_in_capture, ctll::term<'['>) -> ctll::push<ctll::anything, c, repeat, string2, content2>;
237250
static constexpr auto rule(content_in_capture, ctll::term<'\x28'>) -> ctll::push<ctll::anything, prepare_capture, block, repeat, string2, content2>;
@@ -240,8 +253,9 @@ struct pcre {
240253
static constexpr auto rule(content_in_capture, ctll::set<'!',',','-',':','<','=','>','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T',']','_','0','U','V','W','X','Y','Z','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character, repeat, string2, content2>;
241254
static constexpr auto rule(content_in_capture, _others) -> ctll::push<ctll::anything, push_character, repeat, string2, content2>;
242255
static constexpr auto rule(content_in_capture, ctll::term<'.'>) -> ctll::push<ctll::anything, push_character_anything, repeat, string2, content2>;
256+
static constexpr auto rule(content_in_capture, ctll::term<'|'>) -> ctll::push<ctll::anything, push_empty, content, make_alternate>;
243257
static constexpr auto rule(content_in_capture, ctll::term<'\x29'>) -> ctll::push<push_empty>;
244-
static constexpr auto rule(content_in_capture, ctll::set<'*','+','?','\x7B','|','\x7D'>) -> ctll::reject;
258+
static constexpr auto rule(content_in_capture, ctll::set<'*','+','?','\x7B','\x7D'>) -> ctll::reject;
245259

246260
static constexpr auto rule(d, ctll::term<'<'>) -> ctll::push<ctll::anything, block_name, ctll::term<'>'>, content_in_capture, make_capture_with_name, ctll::term<'\x29'>>;
247261
static constexpr auto rule(d, ctll::term<':'>) -> ctll::push<reset_capture, ctll::anything, content_in_capture, ctll::term<'\x29'>>;

include/ctre/wrapper.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ template <typename RE> struct regular_expression {
7979
}
8080
#if __cpp_char8_t >= 201811
8181
static constexpr CTRE_FORCE_INLINE auto match(std::u8string_view sv) noexcept {
82-
return match_re<utf8_iterator, utf8_iterator::sentinel, RE, const char8_t *>(utf8_range(sv).begin(), utf8_range(sv).end(), {});
82+
return match_re<utf8_iterator, utf8_iterator::sentinel, RE, const char8_t *>(utf8_range(sv).begin(), utf8_range(sv).end(), {}, {});
8383
}
8484
#endif
8585
static constexpr CTRE_FORCE_INLINE auto match(std::u16string_view sv) noexcept {
@@ -115,7 +115,7 @@ template <typename RE> struct regular_expression {
115115
}
116116
#if __cpp_char8_t >= 201811
117117
static constexpr CTRE_FORCE_INLINE auto search(std::u8string_view sv) noexcept {
118-
return search_re<utf8_iterator, utf8_iterator::sentinel, RE, const char8_t *>(utf8_range(sv).begin(), utf8_range(sv).end(), {});
118+
return search_re<utf8_iterator, utf8_iterator::sentinel, RE, const char8_t *>(utf8_range(sv).begin(), utf8_range(sv).end(), {}, {});
119119
}
120120
#endif
121121
static constexpr CTRE_FORCE_INLINE auto search(std::u16string_view sv) noexcept {
@@ -145,7 +145,7 @@ template <typename RE> struct regular_expression {
145145
}
146146
#if __cpp_char8_t >= 201811
147147
static constexpr CTRE_FORCE_INLINE auto starts_with(std::u8string_view sv) noexcept {
148-
return starts_with<utf8_iterator, utf8_iterator::sentinel, RE, const char8_t *>(utf8_range(sv).begin(), utf8_range(sv).end(), {});
148+
return starts_with<utf8_iterator, utf8_iterator::sentinel, RE, const char8_t *>(utf8_range(sv).begin(), utf8_range(sv).end(), {}, {});
149149
}
150150
#endif
151151
static constexpr CTRE_FORCE_INLINE auto starts_with(std::string_view sv) noexcept {

0 commit comments

Comments
 (0)