Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <hilti/ast/builder/expression.h>
6 #include <hilti/ast/expressions/id.h>
7 #include <hilti/ast/operators/common.h>
8 #include <hilti/ast/types/bool.h>
9 #include <hilti/ast/types/integer.h>
10 #include <hilti/ast/types/library.h>
11 #include <hilti/ast/types/string.h>
12 #include <hilti/ast/types/vector.h>
13 
14 namespace hilti::operator_ {
15 
16 // bytes::Iterator
17 
18 STANDARD_OPERATOR_1(bytes::iterator, Deref, type::UnsignedInteger(8), type::constant(type::bytes::Iterator()),
19  "Returns the character the iterator is pointing to.");
20 STANDARD_OPERATOR_1(bytes::iterator, IncrPostfix, type::bytes::Iterator(), type::bytes::Iterator(),
21  "Advances the iterator by one byte, returning the previous position.");
22 STANDARD_OPERATOR_1(bytes::iterator, IncrPrefix, type::bytes::Iterator(), type::bytes::Iterator(),
23  "Advances the iterator by one byte, returning the new position.");
24 
25 STANDARD_OPERATOR_2(
26  bytes::iterator, Equal, type::Bool(), type::constant(type::bytes::Iterator()),
27  type::constant(type::bytes::Iterator()),
28  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
29 STANDARD_OPERATOR_2(
30  bytes::iterator, Unequal, type::Bool(), type::constant(type::bytes::Iterator()),
31  type::constant(type::bytes::Iterator()),
32  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
33 STANDARD_OPERATOR_2(
34  bytes::iterator, Lower, type::Bool(), type::constant(type::bytes::Iterator()),
35  type::constant(type::bytes::Iterator()),
36  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
37 STANDARD_OPERATOR_2(
38  bytes::iterator, LowerEqual, type::Bool(), type::constant(type::bytes::Iterator()),
39  type::constant(type::bytes::Iterator()),
40  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
41 STANDARD_OPERATOR_2(
42  bytes::iterator, Greater, type::Bool(), type::constant(type::bytes::Iterator()),
43  type::constant(type::bytes::Iterator()),
44  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
45 STANDARD_OPERATOR_2(
46  bytes::iterator, GreaterEqual, type::Bool(), type::constant(type::bytes::Iterator()),
47  type::constant(type::bytes::Iterator()),
48  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
49 STANDARD_OPERATOR_2(
50  bytes::iterator, Difference, type::SignedInteger(64), type::constant(type::bytes::Iterator()),
51  type::constant(type::bytes::Iterator()),
52  "Returns the number of bytes between the two iterators. The result will be negative if the second iterator points "
53  "to a location before the first. The result is undefined if the iterators do not refer to the same bytes "
54  "instance.");
55 STANDARD_OPERATOR_2(bytes::iterator, Sum, type::bytes::Iterator(), type::constant(type::bytes::Iterator()),
56  type::UnsignedInteger(64),
57  "Returns an iterator which is pointing the given number of bytes beyond the one passed in.")
58 STANDARD_OPERATOR_2(bytes::iterator, SumAssign, type::bytes::Iterator(), type::bytes::Iterator(),
59  type::UnsignedInteger(64), "Advances the iterator by the given number of bytes.")
60 
61 // Bytes
62 
63 STANDARD_OPERATOR_1(bytes, Size, type::UnsignedInteger(64), type::constant(type::Bytes()),
64  "Returns the number of bytes the value contains.");
65 STANDARD_OPERATOR_2(bytes, Equal, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
66  "Compares two bytes values lexicographically.");
67 STANDARD_OPERATOR_2(bytes, Unequal, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
68  "Compares two bytes values lexicographically.");
69 STANDARD_OPERATOR_2(bytes, Greater, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
70  "Compares two bytes values lexicographically.");
71 STANDARD_OPERATOR_2(bytes, GreaterEqual, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
72  "Compares two bytes values lexicographically.");
73 STANDARD_OPERATOR_2(bytes, In, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
74  "Returns true if the right-hand-side value contains the left-hand-side value as a subsequence.");
75 STANDARD_OPERATOR_2(bytes, Lower, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
76  "Compares two bytes values lexicographically.");
77 STANDARD_OPERATOR_2(bytes, LowerEqual, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
78  "Compares two bytes values lexicographically.");
79 STANDARD_OPERATOR_2(bytes, Sum, type::constant(type::Bytes()), type::constant(type::Bytes()),
80  type::constant(type::Bytes()), "Returns the concatenation of two bytes values.");
81 STANDARD_OPERATOR_2x(bytes, SumAssignBytes, SumAssign, type::Bytes(), type::Bytes(), type::constant(type::Bytes()),
82  "Appends one bytes value to another.");
83 STANDARD_OPERATOR_2x(bytes, SumAssignStreamView, SumAssign, type::Bytes(), type::Bytes(),
84  type::constant(type::stream::View()), "Appends a view of stream data to a bytes instance.");
85 STANDARD_OPERATOR_2x(bytes, SumAssignUInt8, SumAssign, type::Bytes(), type::Bytes(), type::UnsignedInteger(8),
86  "Appends a single byte to the data.");
87 
88 BEGIN_METHOD(bytes, Find)
89  const auto& signature() const {
90  static auto _signature = Signature{.self = type::constant(type::Bytes()),
91  .result = type::Tuple({type::Bool(), type::bytes::Iterator()}),
92  .id = "find",
93  .args = {{"needle", type::constant(type::Bytes())}},
94  .doc = R"(
95 Searches *needle* in the value's content. Returns a tuple of a boolean and an
96 iterator. If *needle* was found, the boolean will be true and the iterator will
97 point to its first occurrence. If *needle* was not found, the boolean will be
98 false and the iterator will point to the last position so that everything before
99 it is guaranteed to not contain even a partial match of *needle*. Note that for a
100 simple yes/no result, you should use the ``in`` operator instead of this method,
101 as it's more efficient.
102 )"};
103  return _signature;
104  }
105 END_METHOD
106 
107 BEGIN_METHOD(bytes, LowerCase)
108  const auto& signature() const {
109  static auto _signature =
110  Signature{.self = type::constant(type::Bytes()),
111  .result = type::Bytes(),
112  .id = "lower",
113  .args = {{"charset", type::Enum(type::Wildcard()), false, builder::id("hilti::Charset::UTF8")},
114  {"errors", type::Enum(type::Wildcard()), false,
115  builder::id("hilti::DecodeErrorStrategy::REPLACE")}},
116  .doc = R"(
117 Returns a lower-case version of the bytes value, assuming it is
118 encoded in character set *charset*. If data is encountered that
119 *charset* cannot represent, it's handled according to the *errors*
120 strategy.
121 )"};
122  return _signature;
123  }
124 END_METHOD
125 
126 BEGIN_METHOD(bytes, UpperCase)
127  const auto& signature() const {
128  static auto _signature =
129  Signature{.self = type::constant(type::Bytes()),
130  .result = type::Bytes(),
131  .id = "upper",
132  .args = {{"charset", type::Enum(type::Wildcard()), false, builder::id("hilti::Charset::UTF8")},
133  {"errors", type::Enum(type::Wildcard()), false,
134  builder::id("hilti::DecodeErrorStrategy::REPLACE")}},
135  .doc = R"(
136 Returns an upper-case version of the bytes value, assuming it is
137 encoded in character set *charset*. If data is encountered that
138 *charset* cannot represent, it's handled according to the *errors*
139 strategy.
140 )"};
141  return _signature;
142  }
143 END_METHOD
144 
145 BEGIN_METHOD(bytes, At)
146  const auto& signature() const {
147  static auto _signature = Signature{.self = type::constant(type::Bytes()),
148  .result = type::bytes::Iterator(),
149  .id = "at",
150  .args = {{"i", type::UnsignedInteger(64)}},
151  .doc = R"(
152 Returns an iterator representing the offset *i* inside the bytes value.
153 )"};
154  return _signature;
155  }
156 END_METHOD
157 
158 BEGIN_METHOD(bytes, Split)
159  const auto& signature() const {
160  static auto _signature = Signature{.self = type::constant(type::Bytes()),
161  .result = type::Vector(type::Bytes()),
162  .id = "split",
163  .args = {{"sep", type::constant(type::Bytes()), true}},
164  .doc = R"(
165 Splits the bytes value at each occurrence of *sep* and returns a vector
166 containing the individual pieces, with all separators removed. If the separator
167 is not found, the returned vector will have the whole bytes value as its single
168 element. If the separator is not given, or empty, the split will take place at
169 sequences of white spaces.
170 )"};
171  return _signature;
172  }
173 END_METHOD
174 
175 BEGIN_METHOD(bytes, Split1)
176  const auto& signature() const {
177  static auto _signature = Signature{.self = type::constant(type::Bytes()),
178  .result = type::Tuple({type::Bytes(), type::Bytes()}),
179  .id = "split1",
180  .args = {{"sep", type::constant(type::Bytes()), true}},
181  .doc = R"(
182 Splits the bytes value at the first occurrence of *sep* and returns the two parts
183 as a 2-tuple, with the separator removed. If the separator is not found, the
184 returned tuple will have the whole bytes value as its first element and an empty value
185 as its second element. If the separator is not given, or empty, the split will
186 take place at the first sequence of white spaces.
187 )"};
188  return _signature;
189  }
190 END_METHOD
191 
192 BEGIN_METHOD(bytes, StartsWith)
193  const auto& signature() const {
194  static auto _signature = Signature{.self = type::constant(type::Bytes()),
195  .result = type::Bool(),
196  .id = "starts_with",
197  .args = {{"b", type::constant(type::Bytes())}},
198  .doc = R"(
199 Returns true if the bytes value starts with *b*.
200 )"};
201  return _signature;
202  }
203 END_METHOD
204 
205 BEGIN_METHOD(bytes, Strip)
206  const auto& signature() const {
207  static auto _signature =
208  Signature{.self = type::constant(type::Bytes()),
209  .result = type::Bytes(),
210  .id = "strip",
211  .args = {{"side", type::constant(type::Library("::hilti::rt::bytes::Side")), true},
212  {"set", type::constant(type::Bytes()), true}},
213  .doc = R"(
214 Removes leading and/or trailing sequences of all characters in *set* from the bytes
215 value. If *set* is not given, removes all white spaces. If *side* is given,
216 it indicates which side of the value should be stripped; ``Side::Both`` is the
217 default if not given.
218 )"};
219  return _signature;
220  }
221 END_METHOD
222 
223 BEGIN_METHOD(bytes, SubIterators)
224  const auto& signature() const {
225  static auto _signature =
226  Signature{.self = type::constant(type::Bytes()),
227  .result = type::Bytes(),
228  .id = "sub",
229  .args = {{"begin", type::bytes::Iterator()}, {"end", type::bytes::Iterator()}},
230  .doc = R"(
231 Returns the subsequence from *begin* to (but not including) *end*.
232 )"};
233  return _signature;
234  }
235 END_METHOD
236 
237 BEGIN_METHOD(bytes, SubIterator)
238  const auto& signature() const {
239  static auto _signature = Signature{.self = type::constant(type::Bytes()),
240  .result = type::Bytes(),
241  .id = "sub",
242  .args = {{"end", type::bytes::Iterator()}},
243  .doc = R"(
244 Returns the subsequence from the value's beginning to (but not including) *end*.
245 )"};
246  return _signature;
247  }
248 END_METHOD
249 
250 BEGIN_METHOD(bytes, SubOffsets)
251  const auto& signature() const {
252  static auto _signature =
253  Signature{.self = type::constant(type::Bytes()),
254  .result = type::Bytes(),
255  .id = "sub",
256  .args = {{"begin", type::UnsignedInteger(64)}, {"end", type::UnsignedInteger(64)}},
257  .doc = R"(
258 Returns the subsequence from offset *begin* to (but not including) offset *end*.
259 )"};
260  return _signature;
261  }
262 END_METHOD
263 
264 BEGIN_METHOD(bytes, Join)
265  const auto& signature() const {
266  static auto _signature = Signature{.self = type::constant(type::Bytes()),
267  .result = type::Bytes(),
268  .id = "join",
269  .args = {{"parts", type::Vector(type::Wildcard())}},
270  .doc =
271  R"(
272 Returns the concatenation of all elements in the *parts* list rendered as
273 printable strings. The portions will be separated by the bytes value to
274 which this method is invoked as a member.
275 )"};
276  return _signature;
277  }
278 END_METHOD
279 
280 BEGIN_METHOD(bytes, ToIntAscii)
281  const auto& signature() const {
282  static auto _signature = Signature{.self = type::constant(type::Bytes()),
283  .result = type::SignedInteger(64),
284  .id = "to_int",
285  .args = {{"base", type::UnsignedInteger(64), true}},
286  .doc =
287  R"(
288 Interprets the data as representing an ASCII-encoded number and converts that
289 into a signed integer, using a base of *base*. *base* must be between 2 and 36.
290 If *base* is not given, the default is 10.
291 )"};
292  return _signature;
293  }
294 END_METHOD
295 
296 BEGIN_METHOD(bytes, ToUIntAscii)
297  const auto& signature() const {
298  static auto _signature = Signature{.self = type::constant(type::Bytes()),
299  .result = type::UnsignedInteger(64),
300  .id = "to_uint",
301  .args = {{"base", type::UnsignedInteger(64), true}},
302  .doc =
303  R"(
304 Interprets the data as representing an ASCII-encoded number and converts that
305 into an unsigned integer, using a base of *base*. *base* must be between 2 and
306 36. If *base* is not given, the default is 10.
307 )"};
308  return _signature;
309  }
310 END_METHOD
311 
312 BEGIN_METHOD(bytes, ToIntBinary)
313  const auto& signature() const {
314  static auto _signature = Signature{.self = type::constant(type::Bytes()),
315  .result = type::SignedInteger(64),
316  .id = "to_int",
317  .args = {{"byte_order", type::Enum(type::Wildcard())}},
318  .doc =
319  R"(
320 Interprets the ``bytes`` as representing an binary number encoded with the given
321 byte order, and converts it into signed integer.
322 )"};
323  return _signature;
324  }
325 END_METHOD
326 
327 BEGIN_METHOD(bytes, ToUIntBinary)
328  const auto& signature() const {
329  static auto _signature = Signature{.self = type::constant(type::Bytes()),
330  .result = type::UnsignedInteger(64),
331  .id = "to_uint",
332  .args = {{"byte_order", type::Enum(type::Wildcard())}},
333  .doc =
334  R"(
335 Interprets the ``bytes`` as representing an binary number encoded with the given
336 byte order, and converts it into an unsigned integer.
337 )"};
338  return _signature;
339  }
340 END_METHOD
341 
342 BEGIN_METHOD(bytes, ToTimeAscii)
343  const auto& signature() const {
344  static auto _signature = Signature{.self = type::constant(type::Bytes()),
345  .result = type::Time(),
346  .id = "to_time",
347  .args = {{"base", type::UnsignedInteger(64), true}},
348  .doc =
349  R"(
350 Interprets the ``bytes`` as representing a number of seconds since the epoch in
351 the form of an ASCII-encoded number, and converts it into a time value using a
352 base of *base*. If *base* is not given, the default is 10.
353 )"};
354  return _signature;
355  }
356 END_METHOD
357 
358 BEGIN_METHOD(bytes, ToTimeBinary)
359  const auto& signature() const {
360  static auto _signature = Signature{.self = type::constant(type::Bytes()),
361  .result = type::Time(),
362  .id = "to_time",
363  .args = {{"byte_order", type::Enum(type::Wildcard())}},
364  .doc =
365  R"(
366 Interprets the ``bytes`` as representing as number of seconds since the epoch in
367 the form of an binary number encoded with the given byte order, and converts it
368 into a time value.
369 )"};
370  return _signature;
371  }
372 END_METHOD
373 
374 BEGIN_METHOD(bytes, Decode)
375  const auto& signature() const {
376  static auto _signature =
377  Signature{.self = type::constant(type::Bytes()),
378  .result = type::String(),
379  .id = "decode",
380  .args = {{"charset", type::Enum(type::Wildcard()), false, builder::id("hilti::Charset::UTF8")},
381  {"errors", type::Enum(type::Wildcard()), false,
382  builder::id("hilti::DecodeErrorStrategy::REPLACE")}},
383  .doc =
384  R"(
385 Interprets the ``bytes`` as representing an binary string encoded with
386 the given character set, and converts it into a UTF8 string. If data
387 is encountered that *charset* or UTF* cannot represent, it's handled
388 according to the *errors* strategy.
389 )"};
390  return _signature;
391  }
392 END_METHOD
393 
394 BEGIN_METHOD(bytes, Match)
395  const auto& signature() const {
396  static auto _signature =
397  Signature{.self = type::constant(type::Bytes()),
398  .result = type::Result(type::Bytes()),
399  .id = "match",
400  .args = {{"regex", type::RegExp()}, {"group", type::UnsignedInteger(64), true}},
401  .doc =
402  R"(
403 Matches the ``bytes`` object against the regular expression *regex*. Returns
404 the matching part or, if *group* is given, then the corresponding subgroup. The
405 expression is considered anchored to the beginning of the data.
406 )"};
407  return _signature;
408  }
409 END_METHOD
410 
411 } // namespace hilti::operator_
Definition: operator-registry.h:15