Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <hilti/ast/builder/expression.h>
6 #include <hilti/ast/expressions/id.h>
7 #include <hilti/ast/operators/common.h>
8 #include <hilti/ast/types/bool.h>
9 #include <hilti/ast/types/integer.h>
10 #include <hilti/ast/types/library.h>
11 #include <hilti/ast/types/string.h>
12 #include <hilti/ast/types/vector.h>
13 
14 namespace hilti {
15 namespace operator_ {
16 
17 // bytes::Iterator
18 
19 STANDARD_OPERATOR_1(bytes::iterator, Deref, type::UnsignedInteger(8), type::constant(type::bytes::Iterator()),
20  "Returns the character the iterator is pointing to.");
21 STANDARD_OPERATOR_1(bytes::iterator, IncrPostfix, type::bytes::Iterator(), type::bytes::Iterator(),
22  "Advances the iterator by one byte, returning the previous position.");
23 STANDARD_OPERATOR_1(bytes::iterator, IncrPrefix, type::bytes::Iterator(), type::bytes::Iterator(),
24  "Advances the iterator by one byte, returning the new position.");
25 
26 STANDARD_OPERATOR_2(
27  bytes::iterator, Equal, type::Bool(), type::constant(type::bytes::Iterator()),
28  type::constant(type::bytes::Iterator()),
29  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
30 STANDARD_OPERATOR_2(
31  bytes::iterator, Unequal, type::Bool(), type::constant(type::bytes::Iterator()),
32  type::constant(type::bytes::Iterator()),
33  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
34 STANDARD_OPERATOR_2(
35  bytes::iterator, Lower, type::Bool(), type::constant(type::bytes::Iterator()),
36  type::constant(type::bytes::Iterator()),
37  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
38 STANDARD_OPERATOR_2(
39  bytes::iterator, LowerEqual, type::Bool(), type::constant(type::bytes::Iterator()),
40  type::constant(type::bytes::Iterator()),
41  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
42 STANDARD_OPERATOR_2(
43  bytes::iterator, Greater, type::Bool(), type::constant(type::bytes::Iterator()),
44  type::constant(type::bytes::Iterator()),
45  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
46 STANDARD_OPERATOR_2(
47  bytes::iterator, GreaterEqual, type::Bool(), type::constant(type::bytes::Iterator()),
48  type::constant(type::bytes::Iterator()),
49  "Compares the two positions. The result is undefined if they are not referring to the same bytes value.");
50 STANDARD_OPERATOR_2(
51  bytes::iterator, Difference, type::SignedInteger(64), type::constant(type::bytes::Iterator()),
52  type::constant(type::bytes::Iterator()),
53  "Returns the number of bytes between the two iterators. The result will be negative if the second iterator points "
54  "to a location before the first. The result is undefined if the iterators do not refer to the same bytes "
55  "instance.");
56 STANDARD_OPERATOR_2(bytes::iterator, Sum, type::bytes::Iterator(), type::constant(type::bytes::Iterator()),
57  type::UnsignedInteger(64),
58  "Returns an iterator which is pointing the given number of bytes beyond the one passed in.")
59 STANDARD_OPERATOR_2(bytes::iterator, SumAssign, type::bytes::Iterator(), type::bytes::Iterator(),
60  type::UnsignedInteger(64), "Advances the iterator by the given number of bytes.")
61 
62 // Bytes
63 
64 STANDARD_OPERATOR_1(bytes, Size, type::UnsignedInteger(64), type::constant(type::Bytes()),
65  "Returns the number of bytes the value contains.");
66 STANDARD_OPERATOR_2(bytes, Equal, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
67  "Compares two bytes values lexicographically.");
68 STANDARD_OPERATOR_2(bytes, Unequal, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
69  "Compares two bytes values lexicographically.");
70 STANDARD_OPERATOR_2(bytes, Greater, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
71  "Compares two bytes values lexicographically.");
72 STANDARD_OPERATOR_2(bytes, GreaterEqual, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
73  "Compares two bytes values lexicographically.");
74 STANDARD_OPERATOR_2(bytes, In, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
75  "Returns true if the right-hand-side value contains the left-hand-side value as a subsequence.");
76 STANDARD_OPERATOR_2(bytes, Lower, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
77  "Compares two bytes values lexicographically.");
78 STANDARD_OPERATOR_2(bytes, LowerEqual, type::Bool(), type::constant(type::Bytes()), type::constant(type::Bytes()),
79  "Compares two bytes values lexicographically.");
80 STANDARD_OPERATOR_2(bytes, Sum, type::constant(type::Bytes()), type::constant(type::Bytes()),
81  type::constant(type::Bytes()), "Returns the concatenation of two bytes values.");
82 STANDARD_OPERATOR_2x(bytes, SumAssignBytes, SumAssign, type::Bytes(), type::Bytes(), type::constant(type::Bytes()),
83  "Appends one bytes value to another.");
84 STANDARD_OPERATOR_2x(bytes, SumAssignStreamView, SumAssign, type::Bytes(), type::Bytes(),
85  type::constant(type::stream::View()), "Appends a view of stream data to a bytes instance.");
86 STANDARD_OPERATOR_2x(bytes, SumAssignUInt8, SumAssign, type::Bytes(), type::Bytes(), type::UnsignedInteger(8),
87  "Appends a single byte to the data.");
88 
89 BEGIN_METHOD(bytes, Find)
90  auto signature() const {
91  return Signature{.self = type::constant(type::Bytes()),
92  .result = type::Tuple({type::Bool(), type::bytes::Iterator()}),
93  .id = "find",
94  .args = {{.id = "needle", .type = type::constant(type::Bytes())}},
95  .doc = R"(
96 Searches *needle* in the value's content. Returns a tuple of a boolean and an
97 iterator. If *needle* was found, the boolean will be true and the iterator will
98 point to its first occurrence. If *needle* was not found, the boolean will be
99 false and the iterator will point to the last position so that everything before
100 it is guaranteed to not contain even a partial match of *needle*. Note that for a
101 simple yes/no result, you should use the ``in`` operator instead of this method,
102 as it's more efficient.
103 )"};
104  }
105 END_METHOD
106 
107 BEGIN_METHOD(bytes, LowerCase)
108  auto signature() const {
109  return Signature{.self = type::constant(type::Bytes()),
110  .result = type::Bytes(),
111  .id = "lower",
112  .args = {{.id = "charset",
113  .type = type::Enum(type::Wildcard()),
114  .default_ = builder::id("hilti::Charset::UTF8")}},
115  .doc = R"(
116 Returns a lower-case version of the bytes value, assuming it is encoded in character set *charset*.
117 )"};
118  }
119 END_METHOD
120 
121 BEGIN_METHOD(bytes, UpperCase)
122  auto signature() const {
123  return Signature{.self = type::constant(type::Bytes()),
124  .result = type::Bytes(),
125  .id = "upper",
126  .args = {{.id = "charset",
127  .type = type::Enum(type::Wildcard()),
128  .default_ = builder::id("hilti::Charset::UTF8")}},
129  .doc = R"(
130 Returns an upper-case version of the bytes value, assuming it is encoded in character set *charset*.
131 )"};
132  }
133 END_METHOD
134 
135 BEGIN_METHOD(bytes, At)
136  auto signature() const {
137  return Signature{.self = type::constant(type::Bytes()),
138  .result = type::bytes::Iterator(),
139  .id = "at",
140  .args = {{.id = "i", .type = type::UnsignedInteger(64)}},
141  .doc = R"(
142 Returns an iterator representing the offset *i* inside the bytes value.
143 )"};
144  }
145 END_METHOD
146 
147 BEGIN_METHOD(bytes, Split)
148  auto signature() const {
149  return Signature{.self = type::constant(type::Bytes()),
150  .result = type::Vector(type::Bytes()),
151  .id = "split",
152  .args = {{.id = "sep", .type = type::constant(type::Bytes()), .optional = true}},
153  .doc = R"(
154 Splits the bytes value at each occurrence of *sep* and returns a vector
155 containing the individual pieces, with all separators removed. If the separator
156 is not found, the returned vector will have the whole bytes value as its single
157 element. If the separator is not given, or empty, the split will take place at
158 sequences of white spaces.
159 )"};
160  }
161 END_METHOD
162 
163 BEGIN_METHOD(bytes, Split1)
164  auto signature() const {
165  return Signature{.self = type::constant(type::Bytes()),
166  .result = type::Tuple({type::Bytes(), type::Bytes()}),
167  .id = "split1",
168  .args = {{.id = "sep", .type = type::constant(type::Bytes()), .optional = true}},
169  .doc = R"(
170 Splits the bytes value at the first occurrence of *sep* and returns the two parts
171 as a 2-tuple, with the separator removed. If the separator is not found, the
172 returned tuple will have the whole bytes value as its first element and an empty value
173 as its second element. If the separator is not given, or empty, the split will
174 take place at the first sequence of white spaces.
175 )"};
176  }
177 END_METHOD
178 
179 BEGIN_METHOD(bytes, StartsWith)
180  auto signature() const {
181  return Signature{.self = type::constant(type::Bytes()),
182  .result = type::Bool(),
183  .id = "starts_with",
184  .args = {{.id = "b", .type = type::constant(type::Bytes())}},
185  .doc = R"(
186 Returns true if the bytes value starts with *b*.
187 )"};
188  }
189 END_METHOD
190 
191 BEGIN_METHOD(bytes, Strip)
192  auto signature() const {
193  return Signature{.self = type::constant(type::Bytes()),
194  .result = type::Bytes(),
195  .id = "strip",
196  .args = {{.id = "side",
197  .type = type::constant(type::Library("hilti::rt::bytes::Side")),
198  .optional = true},
199  {.id = "set", .type = type::constant(type::Bytes()), .optional = true}},
200  .doc = R"(
201 Removes leading and/or trailing sequences of all characters in *set* from the bytes
202 value. If *set* is not given, removes all white spaces. If *side* is given,
203 it indicates which side of the value should be stripped; ``Side::Both`` is the
204 default if not given.
205 )"};
206  }
207 END_METHOD
208 
209 BEGIN_METHOD(bytes, SubIterators)
210  auto signature() const {
211  return Signature{.self = type::constant(type::Bytes()),
212  .result = type::Bytes(),
213  .id = "sub",
214  .args = {{.id = "begin", .type = type::bytes::Iterator()},
215  {.id = "end", .type = type::bytes::Iterator()}},
216  .doc = R"(
217 Returns the subsequence from *begin* to (but not including) *end*.
218 )"};
219  }
220 END_METHOD
221 
222 BEGIN_METHOD(bytes, SubIterator)
223  auto signature() const {
224  return Signature{.self = type::constant(type::Bytes()),
225  .result = type::Bytes(),
226  .id = "sub",
227  .args = {{.id = "end", .type = type::bytes::Iterator()}},
228  .doc = R"(
229 Returns the subsequence from the value's beginning to (but not including) *end*.
230 )"};
231  }
232 END_METHOD
233 
234 BEGIN_METHOD(bytes, SubOffsets)
235  auto signature() const {
236  return Signature{.self = type::constant(type::Bytes()),
237  .result = type::Bytes(),
238  .id = "sub",
239  .args = {{.id = "begin", .type = type::UnsignedInteger(64)},
240  {.id = "end", .type = type::UnsignedInteger(64)}},
241  .doc = R"(
242 Returns the subsequence from offset *begin* to (but not including) offset *end*.
243 )"};
244  }
245 END_METHOD
246 
247 BEGIN_METHOD(bytes, Join)
248  auto signature() const {
249  return Signature{.self = type::constant(type::Bytes()),
250  .result = type::Bytes(),
251  .id = "join",
252  .args = {{.id = "parts", .type = type::Vector(type::Wildcard())}},
253  .doc =
254  R"(
255 Returns the concatenation of all elements in the *parts* list rendered as
256 printable strings. The portions will be separated by the bytes value to
257 which this method is invoked as a member.
258 )"};
259  }
260 END_METHOD
261 
262 BEGIN_METHOD(bytes, ToIntAscii)
263  auto signature() const {
264  return Signature{.self = type::constant(type::Bytes()),
265  .result = type::SignedInteger(64),
266  .id = "to_int",
267  .args = {{.id = "base", .type = type::UnsignedInteger(64), .optional = true}},
268  .doc =
269  R"(
270 Interprets the data as representing an ASCII-encoded number and converts that
271 into a signed integer, using a base of *base*. *base* must be between 2 and 36.
272 If *base* is not given, the default is 10.
273 )"};
274  }
275 END_METHOD
276 
277 BEGIN_METHOD(bytes, ToUIntAscii)
278  auto signature() const {
279  return Signature{.self = type::constant(type::Bytes()),
280  .result = type::UnsignedInteger(64),
281  .id = "to_uint",
282  .args = {{.id = "base", .type = type::UnsignedInteger(64), .optional = true}},
283  .doc =
284  R"(
285 Interprets the data as representing an ASCII-encoded number and converts that
286 into an unsigned integer, using a base of *base*. *base* must be between 2 and
287 36. If *base* is not given, the default is 10.
288 )"};
289  }
290 END_METHOD
291 
292 BEGIN_METHOD(bytes, ToIntBinary)
293  auto signature() const {
294  return Signature{.self = type::constant(type::Bytes()),
295  .result = type::SignedInteger(64),
296  .id = "to_int",
297  .args = {{.id = "byte_order", .type = type::Enum(type::Wildcard())}},
298  .doc =
299  R"(
300 Interprets the ``bytes`` as representing an binary number encoded with the given
301 byte order, and converts it into signed integer.
302 )"};
303  }
304 END_METHOD
305 
306 BEGIN_METHOD(bytes, ToUIntBinary)
307  auto signature() const {
308  return Signature{.self = type::constant(type::Bytes()),
309  .result = type::UnsignedInteger(64),
310  .id = "to_uint",
311  .args = {{.id = "byte_order", .type = type::Enum(type::Wildcard())}},
312  .doc =
313  R"(
314 Interprets the ``bytes`` as representing an binary number encoded with the given
315 byte order, and converts it into an unsigned integer.
316 )"};
317  }
318 END_METHOD
319 
320 BEGIN_METHOD(bytes, ToTimeAscii)
321  auto signature() const {
322  return Signature{.self = type::constant(type::Bytes()),
323  .result = type::Time(),
324  .id = "to_time",
325  .args = {{.id = "base", .type = type::UnsignedInteger(64), .optional = true}},
326  .doc =
327  R"(
328 Interprets the ``bytes`` as representing a number of seconds since the epoch in
329 the form of an ASCII-encoded number, and converts it into a time value using a
330 base of *base*. If *base* is not given, the default is 10.
331 )"};
332  }
333 END_METHOD
334 
335 BEGIN_METHOD(bytes, ToTimeBinary)
336  auto signature() const {
337  return Signature{.self = type::constant(type::Bytes()),
338  .result = type::Time(),
339  .id = "to_time",
340  .args = {{.id = "byte_order", .type = type::Enum(type::Wildcard())}},
341  .doc =
342  R"(
343 Interprets the ``bytes`` as representing as number of seconds since the epoch in
344 the form of an binary number encoded with the given byte order, and converts it
345 into a time value.
346 )"};
347  }
348 END_METHOD
349 
350 BEGIN_METHOD(bytes, Decode)
351  auto signature() const {
352  return Signature{.self = type::constant(type::Bytes()),
353  .result = type::String(),
354  .id = "decode",
355  .args = {{.id = "charset",
356  .type = type::Enum(type::Wildcard()),
357  .default_ = builder::id("hilti::Charset::UTF8")}},
358  .doc =
359  R"(
360 Interprets the ``bytes`` as representing an binary string encoded with the given
361 character set, and converts it into a UTF8 string.
362 )"};
363  }
364 END_METHOD
365 
366 BEGIN_METHOD(bytes, Match)
367  auto signature() const {
368  return Signature{.self = type::constant(type::Bytes()),
369  .result = type::Result(type::Bytes()),
370  .id = "match",
371  .args = {{.id = "regex", .type = type::RegExp()},
372  {.id = "group", .type = type::UnsignedInteger(64), .optional = true}},
373  .doc =
374  R"(
375 Matches the ``bytes`` object against the regular expression *regex*. Returns
376 the matching part or, if *group* is given, then the corresponding subgroup. The
377 expression is considered anchored to the beginning of the data.
378 )"};
379  }
380 END_METHOD
381 
382 } // namespace operator_
383 } // namespace hilti