Spicy
regexp.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <hilti/ast/ctors/bool.h>
6 #include <hilti/ast/expressions/ctor.h>
7 #include <hilti/ast/operators/common.h>
8 #include <hilti/ast/types/bytes.h>
9 #include <hilti/ast/types/integer.h>
10 #include <hilti/ast/types/library.h>
11 #include <hilti/ast/types/regexp.h>
12 #include <hilti/ast/types/stream.h>
13 #include <hilti/ast/types/tuple.h>
14 #include <hilti/ast/types/vector.h>
15 
16 namespace hilti::operator_ {
17 
18 BEGIN_METHOD(regexp, Match)
19  const auto& signature() const {
20  static auto _signature = Signature{.self = type::RegExp(),
21  .result = type::SignedInteger(32),
22  .id = "match",
23  .args = {{"data", type::constant(type::Bytes())}},
24  .doc = R"(
25 Matches the regular expression against *data*. If it matches, returns an
26 integer that's greater than zero. If multiple patterns have been compiled for
27 parallel matching, that integer will be the ID of the matching pattern. Returns
28 -1 if the regular expression does not match the data, but could still yield a
29 match if more data were added. Returns 0 if the regular expression is not found
30 and adding more data wouldn't change anything. The expression is considered
31 anchored, as though it starts with an implicit ``^`` regexp operator, to the
32 beginning of the data.
33 )"};
34  return _signature;
35  }
36 END_METHOD
37 
38 BEGIN_METHOD(regexp, Find)
39  const auto& signature() const {
40  static auto _signature = Signature{.self = type::RegExp(),
41  .result = type::Tuple({type::SignedInteger(32), type::Bytes()}),
42  .id = "find",
43  .args = {{"data", type::constant(type::Bytes())}},
44  .doc = R"(
45 Searches the regular expression in *data* and returns the matching part.
46 Different from ``match``, this does not anchor the expression to the beginning
47 of the data: it will find matches at arbitrary starting positions. Returns a
48 2-tuple with (1) an integer match indicator with the same semantics as that
49 returned by ``find``; and (2) if a match has been found, the data that matches
50 the regular expression. (Note: Currently this function has a runtime that's
51 quadratic in the size of *data*; consider using `match` if performance is an
52 issue.)
53 )"};
54  return _signature;
55  }
56 END_METHOD
57 
58 BEGIN_METHOD(regexp, MatchGroups)
59  const auto& signature() const {
60  static auto _signature = Signature{.self = type::RegExp(),
61  .result = type::Vector(type::Bytes()),
62  .id = "match_groups",
63  .args = {{"data", type::constant(type::Bytes())}},
64  .doc = R"(
65 Matches the regular expression against *data*. If it matches, returns a vector
66 with one entry for each capture group defined by the regular expression;
67 starting at index 1. Each of these entries is a view locating the matching
68 bytes. In addition, index 0 always contains the data that matches the full
69 regular expression. Returns an empty vector if the expression is not found. The
70 expression is considered anchored, as though it starts with an implicit ``^``
71 regexp operator, to the beginning of the data. This method is not compatible
72 with pattern sets and will throw a runtime exception if used with a regular
73 expression compiled from a set.
74 )"};
75  return _signature;
76  }
77 END_METHOD
78 
79 BEGIN_METHOD(regexp, TokenMatcher)
80  const auto& signature() const {
81  static auto _signature = Signature{.self = type::RegExp(),
82  .result = builder::typeByID("hilti::MatchState"),
83  .id = "token_matcher",
84  .args = {},
85  .doc = R"(
86 Initializes state for matching regular expression incrementally against chunks
87 of future input. The expression is considered anchored, as though it starts
88 with an implicit ``^`` regexp operator, to the beginning of the data.
89 )"};
90  return _signature;
91  }
92 END_METHOD
93 
94 BEGIN_METHOD(regexp_match_state, AdvanceBytes)
95  const auto& signature() const {
96  static auto _signature = Signature{.self = type::Library("hilti::rt::regexp::MatchState"),
97  .result = type::Tuple({type::SignedInteger(32), type::stream::View()}),
98  .id = "advance",
99  .args = {{"data", type::constant(type::Bytes())},
100  {"final", type::Bool(), false, expression::Ctor(ctor::Bool(true))}},
101  .doc = R"(
102 Feeds a chunk of data into the token match state, continuing matching where it
103 left off last time. If *final* is true, this is assumed to be the final piece
104 of data; any further advancing will then lead to an exception. Returns a
105 2-tuple with (1) an integer match indicator with the same semantics as that
106 returned by ``regexp::match()``; and (2) the number of bytes in *data* consumed
107 by the matching. The state must not be used again once an integer larger
108 or equal zero has been returned.
109 )"};
110  return _signature;
111  }
112 END_METHOD
113 
114 BEGIN_METHOD(regexp_match_state, AdvanceView)
115  const auto& signature() const {
116  static auto _signature = Signature{.self = type::Library("hilti::rt::regexp::MatchState"),
117  .result = type::Tuple({type::SignedInteger(32), type::stream::View()}),
118  .id = "advance",
119  .args = {{"data", type::constant(type::stream::View())}},
120  .doc = R"(
121 Feeds a chunk of data into the token match state, continuing matching where it
122 left off last time. If the underlying view is frozen, this will be assumed to
123 be last piece of data; any further advancing will then lead to an exception.
124 Returns a 2-tuple with (1) an integer match indicator with the same semantics as
125 that returned by ``regexp::match()``; and (2) a new view that's trimming *data*
126 to the part not yet consumed. The state must not be used again once an integer
127 larger or equal zero has been returned.
128 )"};
129  return _signature;
130  }
131 END_METHOD
132 
133 } // namespace hilti::operator_
Definition: operator-registry.h:15