Spicy
regexp.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <hilti/ast/ctors/bool.h>
6 #include <hilti/ast/expressions/ctor.h>
7 #include <hilti/ast/operators/common.h>
8 #include <hilti/ast/types/bytes.h>
9 #include <hilti/ast/types/integer.h>
10 #include <hilti/ast/types/library.h>
11 #include <hilti/ast/types/regexp.h>
12 #include <hilti/ast/types/stream.h>
13 #include <hilti/ast/types/tuple.h>
14 #include <hilti/ast/types/vector.h>
15 
16 namespace hilti::operator_ {
17 
18 BEGIN_METHOD(regexp, Match)
19  auto signature() const {
20  return Signature{.self = type::RegExp(),
21  .result = type::SignedInteger(32),
22  .id = "match",
23  .args = {{.id = "data", .type = type::constant(type::Bytes())}},
24  .doc = R"(
25 Matches the regular expression against *data*. If it matches, returns an
26 integer that's greater than zero. If multiple patterns have been compiled for
27 parallel matching, that integer will be the ID of the matching pattern. Returns
28 -1 if the regular expression does not match the data, but could still yield a
29 match if more data were added. Returns 0 if the regular expression is not found
30 and adding more data wouldn't change anything. The expression is considered
31 anchored, as though it starts with an implicit ``^`` regexp operator, to the
32 beginning of the data.
33 )"};
34  }
35 END_METHOD
36 
37 BEGIN_METHOD(regexp, Find)
38  auto signature() const {
39  return Signature{.self = type::RegExp(),
40  .result = type::Tuple({type::SignedInteger(32), type::Bytes()}),
41  .id = "find",
42  .args = {{.id = "data", .type = type::constant(type::Bytes())}},
43  .doc = R"(
44 Searches the regular expression in *data* and returns the matching part.
45 Different from ``match``, this does not anchor the expression to the beginning
46 of the data: it will find matches at arbitrary starting positions. Returns a
47 2-tuple with (1) an integer match indicator with the same semantics as that
48 returned by ``find``; and (2) if a match has been found, the data that matches
49 the regular expression. (Note: Currently this function has a runtime that's
50 quadratic in the size of *data*; consider using `match` if performance is an
51 issue.)
52 )"};
53  }
54 END_METHOD
55 
56 BEGIN_METHOD(regexp, MatchGroups)
57  auto signature() const {
58  return Signature{.self = type::RegExp(),
59  .result = type::Vector(type::Bytes()),
60  .id = "match_groups",
61  .args = {{.id = "data", .type = type::constant(type::Bytes())}},
62  .doc = R"(
63 Matches the regular expression against *data*. If it matches, returns a vector
64 with one entry for each capture group defined by the regular expression;
65 starting at index 1. Each of these entries is a view locating the matching
66 bytes. In addition, index 0 always contains the data that matches the full
67 regular expression. Returns an empty vector if the expression is not found. The
68 expression is considered anchored, as though it starts with an implicit ``^``
69 regexp operator, to the beginning of the data. This method is not compatible
70 with pattern sets and will throw a runtime exception if used with a regular
71 expression compiled from a set.
72 )"};
73  }
74 END_METHOD
75 
76 BEGIN_METHOD(regexp, TokenMatcher)
77  auto signature() const {
78  return Signature{.self = type::RegExp(),
79  .result = builder::typeByID("hilti::MatchState"),
80  .id = "token_matcher",
81  .args = {},
82  .doc = R"(
83 Initializes state for matching regular expression incrementally against chunks
84 of future input. The expression is considered anchored, as though it starts
85 with an implicit ``^`` regexp operator, to the beginning of the data.
86 )"};
87  }
88 END_METHOD
89 
90 BEGIN_METHOD(regexp_match_state, AdvanceBytes)
91  auto signature() const {
92  return Signature{.self = type::Library("hilti::rt::regexp::MatchState"),
93  .result = type::Tuple({type::SignedInteger(32), type::stream::View()}),
94  .id = "advance",
95  .args = {{.id = "data", .type = type::constant(type::Bytes())},
96  {.id = "final",
97  .type = type::Bool(),
98  .default_ = expression::Ctor(ctor::Bool(true))}},
99  .doc = R"(
100 Feeds a chunk of data into the token match state, continuing matching where it
101 left off last time. If *final* is true, this is assumed to be the final piece
102 of data; any further advancing will then lead to an exception. Returns a
103 2-tuple with (1) an integer match indicator with the same semantics as that
104 returned by ``regexp::match()``; and (2) the number of bytes in *data* consumed
105 by the matching. The state must not be used again once an integer larger
106 or equal zero has been returned.
107 )"};
108  }
109 END_METHOD
110 
111 BEGIN_METHOD(regexp_match_state, AdvanceView)
112  auto signature() const {
113  return Signature{.self = type::Library("hilti::rt::regexp::MatchState"),
114  .result = type::Tuple({type::SignedInteger(32), type::stream::View()}),
115  .id = "advance",
116  .args = {{.id = "data", .type = type::constant(type::stream::View())}},
117  .doc = R"(
118 Feeds a chunk of data into the token match state, continuing matching where it
119 left off last time. If the underlying view is frozen, this will be assumed to
120 be last piece of data; any further advancing will then lead to an exception.
121 Returns a 2-tuple with (1) an integer match indicator with the same semantics as
122 that returned by ``regexp::match()``; and (2) a new view that's trimming *data*
123 to the part not yet consumed. The state must not be used again once an integer
124 larger or equal zero has been returned.
125 )"};
126  }
127 END_METHOD
128 
129 } // namespace hilti::operator_
Definition: operator-registry.h:16