Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <hilti/ast/builder/expression.h>
6 #include <hilti/ast/expressions/id.h>
7 #include <hilti/ast/operators/common.h>
8 #include <hilti/ast/types/bool.h>
9 #include <hilti/ast/types/integer.h>
10 #include <hilti/ast/types/library.h>
11 #include <hilti/ast/types/string.h>
12 #include <hilti/ast/types/vector.h>
13 
14 namespace hilti::operator_ {
15 
16 // bytes::Iterator
17 
18 STANDARD_OPERATOR_1(bytes::iterator, Deref, type::UnsignedInteger(8), type::constant(type::bytes::Iterator()),
19  "Returns the character the iterator is pointing to.");
20 STANDARD_OPERATOR_1(bytes::iterator, IncrPostfix, type::bytes::Iterator(), type::bytes::Iterator(),
21  "Advances the iterator by one byte, returning the previous position.");
22 STANDARD_OPERATOR_1(bytes::iterator, IncrPrefix, type::bytes::Iterator(), type::bytes::Iterator(),
23  "Advances the iterator by one byte, returning the new position.");
24 
25 STANDARD_OPERATOR_2(
26  bytes::iterator, Equal, type::Bool(), type::constant(type::bytes::Iterator()),
27  type::constant(type::bytes::Iterator()),
28  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
29 STANDARD_OPERATOR_2(
30  bytes::iterator, Unequal, type::Bool(), type::constant(type::bytes::Iterator()),
31  type::constant(type::bytes::Iterator()),
32  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
33 STANDARD_OPERATOR_2(
34  bytes::iterator, Lower, type::Bool(), type::constant(type::bytes::Iterator()),
35  type::constant(type::bytes::Iterator()),
36  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
37 STANDARD_OPERATOR_2(
38  bytes::iterator, LowerEqual, type::Bool(), type::constant(type::bytes::Iterator()),
39  type::constant(type::bytes::Iterator()),
40  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
41 STANDARD_OPERATOR_2(
42  bytes::iterator, Greater, type::Bool(), type::constant(type::bytes::Iterator()),
43  type::constant(type::bytes::Iterator()),
44  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
45 STANDARD_OPERATOR_2(
46  bytes::iterator, GreaterEqual, type::Bool(), type::constant(type::bytes::Iterator()),
47  type::constant(type::bytes::Iterator()),
48  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
49 STANDARD_OPERATOR_2(
50  bytes::iterator, Difference, type::SignedInteger(64), type::constant(type::bytes::Iterator()),
51  type::constant(type::bytes::Iterator()),
52  "Returns the number of bytes between the two iterators. The result will be negative if the second iterator points "
53  "to a location before the first. The result is undefined if the iterators do not refer to the same bytes "
54  "instance.");
55 STANDARD_OPERATOR_2(bytes::iterator, Sum, type::bytes::Iterator(), type::constant(type::bytes::Iterator()),
56  type::UnsignedInteger(64),
57  "Returns an iterator which is pointing the given number of bytes beyond the one passed in.")
58 STANDARD_OPERATOR_2(bytes::iterator, SumAssign, type::bytes::Iterator(), type::bytes::Iterator(),
59  type::UnsignedInteger(64), "Advances the iterator by the given number of bytes.")
60 
61 // Bytes
62 
63 STANDARD_OPERATOR_1(bytes, Size, type::UnsignedInteger(64), type::constant(type::Bytes()),
64  "Returns the number of bytes the value contains.");
65 STANDARD_OPERATOR_2(bytes, Equal, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
66  "Compares two bytes values lexicographically.");
67 STANDARD_OPERATOR_2(bytes, Unequal, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
68  "Compares two bytes values lexicographically.");
69 STANDARD_OPERATOR_2(bytes, Greater, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
70  "Compares two bytes values lexicographically.");
71 STANDARD_OPERATOR_2(bytes, GreaterEqual, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
72  "Compares two bytes values lexicographically.");
73 STANDARD_OPERATOR_2(bytes, In, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
74  "Returns true if the right-hand-side value contains the left-hand-side value as a subsequence.");
75 STANDARD_OPERATOR_2(bytes, Lower, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
76  "Compares two bytes values lexicographically.");
77 STANDARD_OPERATOR_2(bytes, LowerEqual, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
78  "Compares two bytes values lexicographically.");
79 STANDARD_OPERATOR_2(bytes, Sum, type::constant(type::Bytes()), type::constant(type::Bytes()),
80  type::constant(type::Bytes()), "Returns the concatenation of two bytes values.");
81 STANDARD_OPERATOR_2x(bytes, SumAssignBytes, SumAssign, type::Bytes(), type::Bytes(), type::constant(type::Bytes()),
82  "Appends one bytes value to another.");
83 STANDARD_OPERATOR_2x(bytes, SumAssignStreamView, SumAssign, type::Bytes(), type::Bytes(),
84  type::constant(type::stream::View()), "Appends a view of stream data to a bytes instance.");
85 STANDARD_OPERATOR_2x(bytes, SumAssignUInt8, SumAssign, type::Bytes(), type::Bytes(), type::UnsignedInteger(8),
86  "Appends a single byte to the data.");
87 
88 BEGIN_METHOD(bytes, Find)
89  const auto& signature() const {
90  static auto _signature = Signature{.self = type::constant(type::Bytes()),
91  .result = type::Tuple({type::Bool(), type::bytes::Iterator()}),
92  .id = "find",
93  .args = {{"needle", type::constant(type::Bytes())}},
94  .doc = R"(
95 Searches *needle* in the value's content. Returns a tuple of a boolean and an
96 iterator. If *needle* was found, the boolean will be true and the iterator will
97 point to its first occurrence. If *needle* was not found, the boolean will be
98 false and the iterator will point to the last position so that everything before
99 it is guaranteed to not contain even a partial match of *needle*. Note that for a
100 simple yes/no result, you should use the ``in`` operator instead of this method,
101 as it's more efficient.
102 )"};
103  return _signature;
104  }
105 END_METHOD
106 
107 BEGIN_METHOD(bytes, LowerCase)
108  const auto& signature() const {
109  static auto _signature =
110  Signature{.self = type::constant(type::Bytes()),
111  .result = type::Bytes(),
112  .id = "lower",
113  .args = {{"charset", type::Enum(type::Wildcard()), false, builder::id("hilti::Charset::UTF8")}},
114  .doc = R"(
115 Returns a lower-case version of the bytes value, assuming it is encoded in character set *charset*.
116 )"};
117  return _signature;
118  }
119 END_METHOD
120 
121 BEGIN_METHOD(bytes, UpperCase)
122  const auto& signature() const {
123  static auto _signature =
124  Signature{.self = type::constant(type::Bytes()),
125  .result = type::Bytes(),
126  .id = "upper",
127  .args = {{"charset", type::Enum(type::Wildcard()), false, builder::id("hilti::Charset::UTF8")}},
128  .doc = R"(
129 Returns an upper-case version of the bytes value, assuming it is encoded in character set *charset*.
130 )"};
131  return _signature;
132  }
133 END_METHOD
134 
135 BEGIN_METHOD(bytes, At)
136  const auto& signature() const {
137  static auto _signature = Signature{.self = type::constant(type::Bytes()),
138  .result = type::bytes::Iterator(),
139  .id = "at",
140  .args = {{"i", type::UnsignedInteger(64)}},
141  .doc = R"(
142 Returns an iterator representing the offset *i* inside the bytes value.
143 )"};
144  return _signature;
145  }
146 END_METHOD
147 
148 BEGIN_METHOD(bytes, Split)
149  const auto& signature() const {
150  static auto _signature = Signature{.self = type::constant(type::Bytes()),
151  .result = type::Vector(type::Bytes()),
152  .id = "split",
153  .args = {{"sep", type::constant(type::Bytes()), true}},
154  .doc = R"(
155 Splits the bytes value at each occurrence of *sep* and returns a vector
156 containing the individual pieces, with all separators removed. If the separator
157 is not found, the returned vector will have the whole bytes value as its single
158 element. If the separator is not given, or empty, the split will take place at
159 sequences of white spaces.
160 )"};
161  return _signature;
162  }
163 END_METHOD
164 
165 BEGIN_METHOD(bytes, Split1)
166  const auto& signature() const {
167  static auto _signature = Signature{.self = type::constant(type::Bytes()),
168  .result = type::Tuple({type::Bytes(), type::Bytes()}),
169  .id = "split1",
170  .args = {{"sep", type::constant(type::Bytes()), true}},
171  .doc = R"(
172 Splits the bytes value at the first occurrence of *sep* and returns the two parts
173 as a 2-tuple, with the separator removed. If the separator is not found, the
174 returned tuple will have the whole bytes value as its first element and an empty value
175 as its second element. If the separator is not given, or empty, the split will
176 take place at the first sequence of white spaces.
177 )"};
178  return _signature;
179  }
180 END_METHOD
181 
182 BEGIN_METHOD(bytes, StartsWith)
183  const auto& signature() const {
184  static auto _signature = Signature{.self = type::constant(type::Bytes()),
185  .result = type::Bool(),
186  .id = "starts_with",
187  .args = {{"b", type::constant(type::Bytes())}},
188  .doc = R"(
189 Returns true if the bytes value starts with *b*.
190 )"};
191  return _signature;
192  }
193 END_METHOD
194 
195 BEGIN_METHOD(bytes, Strip)
196  const auto& signature() const {
197  static auto _signature =
198  Signature{.self = type::constant(type::Bytes()),
199  .result = type::Bytes(),
200  .id = "strip",
201  .args = {{"side", type::constant(type::Library("::hilti::rt::bytes::Side")), true},
202  {"set", type::constant(type::Bytes()), true}},
203  .doc = R"(
204 Removes leading and/or trailing sequences of all characters in *set* from the bytes
205 value. If *set* is not given, removes all white spaces. If *side* is given,
206 it indicates which side of the value should be stripped; ``Side::Both`` is the
207 default if not given.
208 )"};
209  return _signature;
210  }
211 END_METHOD
212 
213 BEGIN_METHOD(bytes, SubIterators)
214  const auto& signature() const {
215  static auto _signature =
216  Signature{.self = type::constant(type::Bytes()),
217  .result = type::Bytes(),
218  .id = "sub",
219  .args = {{"begin", type::bytes::Iterator()}, {"end", type::bytes::Iterator()}},
220  .doc = R"(
221 Returns the subsequence from *begin* to (but not including) *end*.
222 )"};
223  return _signature;
224  }
225 END_METHOD
226 
227 BEGIN_METHOD(bytes, SubIterator)
228  const auto& signature() const {
229  static auto _signature = Signature{.self = type::constant(type::Bytes()),
230  .result = type::Bytes(),
231  .id = "sub",
232  .args = {{"end", type::bytes::Iterator()}},
233  .doc = R"(
234 Returns the subsequence from the value's beginning to (but not including) *end*.
235 )"};
236  return _signature;
237  }
238 END_METHOD
239 
240 BEGIN_METHOD(bytes, SubOffsets)
241  const auto& signature() const {
242  static auto _signature =
243  Signature{.self = type::constant(type::Bytes()),
244  .result = type::Bytes(),
245  .id = "sub",
246  .args = {{"begin", type::UnsignedInteger(64)}, {"end", type::UnsignedInteger(64)}},
247  .doc = R"(
248 Returns the subsequence from offset *begin* to (but not including) offset *end*.
249 )"};
250  return _signature;
251  }
252 END_METHOD
253 
254 BEGIN_METHOD(bytes, Join)
255  const auto& signature() const {
256  static auto _signature = Signature{.self = type::constant(type::Bytes()),
257  .result = type::Bytes(),
258  .id = "join",
259  .args = {{"parts", type::Vector(type::Wildcard())}},
260  .doc =
261  R"(
262 Returns the concatenation of all elements in the *parts* list rendered as
263 printable strings. The portions will be separated by the bytes value to
264 which this method is invoked as a member.
265 )"};
266  return _signature;
267  }
268 END_METHOD
269 
270 BEGIN_METHOD(bytes, ToIntAscii)
271  const auto& signature() const {
272  static auto _signature = Signature{.self = type::constant(type::Bytes()),
273  .result = type::SignedInteger(64),
274  .id = "to_int",
275  .args = {{"base", type::UnsignedInteger(64), true}},
276  .doc =
277  R"(
278 Interprets the data as representing an ASCII-encoded number and converts that
279 into a signed integer, using a base of *base*. *base* must be between 2 and 36.
280 If *base* is not given, the default is 10.
281 )"};
282  return _signature;
283  }
284 END_METHOD
285 
286 BEGIN_METHOD(bytes, ToUIntAscii)
287  const auto& signature() const {
288  static auto _signature = Signature{.self = type::constant(type::Bytes()),
289  .result = type::UnsignedInteger(64),
290  .id = "to_uint",
291  .args = {{"base", type::UnsignedInteger(64), true}},
292  .doc =
293  R"(
294 Interprets the data as representing an ASCII-encoded number and converts that
295 into an unsigned integer, using a base of *base*. *base* must be between 2 and
296 36. If *base* is not given, the default is 10.
297 )"};
298  return _signature;
299  }
300 END_METHOD
301 
302 BEGIN_METHOD(bytes, ToIntBinary)
303  const auto& signature() const {
304  static auto _signature = Signature{.self = type::constant(type::Bytes()),
305  .result = type::SignedInteger(64),
306  .id = "to_int",
307  .args = {{"byte_order", type::Enum(type::Wildcard())}},
308  .doc =
309  R"(
310 Interprets the ``bytes`` as representing an binary number encoded with the given
311 byte order, and converts it into signed integer.
312 )"};
313  return _signature;
314  }
315 END_METHOD
316 
317 BEGIN_METHOD(bytes, ToUIntBinary)
318  const auto& signature() const {
319  static auto _signature = Signature{.self = type::constant(type::Bytes()),
320  .result = type::UnsignedInteger(64),
321  .id = "to_uint",
322  .args = {{"byte_order", type::Enum(type::Wildcard())}},
323  .doc =
324  R"(
325 Interprets the ``bytes`` as representing an binary number encoded with the given
326 byte order, and converts it into an unsigned integer.
327 )"};
328  return _signature;
329  }
330 END_METHOD
331 
332 BEGIN_METHOD(bytes, ToTimeAscii)
333  const auto& signature() const {
334  static auto _signature = Signature{.self = type::constant(type::Bytes()),
335  .result = type::Time(),
336  .id = "to_time",
337  .args = {{"base", type::UnsignedInteger(64), true}},
338  .doc =
339  R"(
340 Interprets the ``bytes`` as representing a number of seconds since the epoch in
341 the form of an ASCII-encoded number, and converts it into a time value using a
342 base of *base*. If *base* is not given, the default is 10.
343 )"};
344  return _signature;
345  }
346 END_METHOD
347 
348 BEGIN_METHOD(bytes, ToTimeBinary)
349  const auto& signature() const {
350  static auto _signature = Signature{.self = type::constant(type::Bytes()),
351  .result = type::Time(),
352  .id = "to_time",
353  .args = {{"byte_order", type::Enum(type::Wildcard())}},
354  .doc =
355  R"(
356 Interprets the ``bytes`` as representing as number of seconds since the epoch in
357 the form of an binary number encoded with the given byte order, and converts it
358 into a time value.
359 )"};
360  return _signature;
361  }
362 END_METHOD
363 
364 BEGIN_METHOD(bytes, Decode)
365  const auto& signature() const {
366  static auto _signature =
367  Signature{.self = type::constant(type::Bytes()),
368  .result = type::String(),
369  .id = "decode",
370  .args = {{"charset", type::Enum(type::Wildcard()), false, builder::id("hilti::Charset::UTF8")}},
371  .doc =
372  R"(
373 Interprets the ``bytes`` as representing an binary string encoded with the given
374 character set, and converts it into a UTF8 string.
375 )"};
376  return _signature;
377  }
378 END_METHOD
379 
380 BEGIN_METHOD(bytes, Match)
381  const auto& signature() const {
382  static auto _signature =
383  Signature{.self = type::constant(type::Bytes()),
384  .result = type::Result(type::Bytes()),
385  .id = "match",
386  .args = {{"regex", type::RegExp()}, {"group", type::UnsignedInteger(64), true}},
387  .doc =
388  R"(
389 Matches the ``bytes`` object against the regular expression *regex*. Returns
390 the matching part or, if *group* is given, then the corresponding subgroup. The
391 expression is considered anchored to the beginning of the data.
392 )"};
393  return _signature;
394  }
395 END_METHOD
396 
397 } // namespace hilti::operator_
Definition: operator-registry.h:15