Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <cstring>
6 #include <memory>
7 #include <string>
8 #include <tuple>
9 #include <utility>
10 
11 #include <hilti/rt/extension-points.h>
12 #include <hilti/rt/iterator.h>
13 #include <hilti/rt/result.h>
14 #include <hilti/rt/types/integer.h>
15 #include <hilti/rt/types/string.h>
16 #include <hilti/rt/types/time.h>
17 #include <hilti/rt/types/vector.h>
18 #include <hilti/rt/util.h>
19 
20 namespace hilti::rt {
21 
22 class Bytes;
23 class RegExp;
24 
25 namespace stream {
26 class View;
27 }
28 
29 namespace bytes {
30 
32 enum class Side {
33  Left,
34  Right,
35  Both
36 };
37 
39 enum class Charset { Undef, UTF8, ASCII };
40 
41 class Iterator {
42  using B = std::string;
43  using difference_type = B::const_iterator::difference_type;
44 
45  std::weak_ptr<B*> _control;
46  typename integer::safe<std::uint64_t> _index = 0;
47 
48 public:
49  Iterator() = default;
50 
51  Iterator(typename B::size_type index, const std::weak_ptr<B*> control)
52  : _control(control), _index(std::move(index)) {}
53 
54  uint8_t operator*() const {
55  if ( auto&& l = _control.lock() ) {
56  auto&& data = static_cast<B&>(**l);
57 
58  if ( _index >= data.size() )
59  throw IndexError(fmt("index %s out of bounds", _index));
60 
61  return data[_index];
62  }
63 
64  throw InvalidIterator("bound object has expired");
65  }
66 
67  template<typename T>
68  auto& operator+=(const hilti::rt::integer::safe<T>& n) {
69  return *this += n.Ref();
70  }
71 
72  auto& operator+=(uint64_t n) {
73  _index += n;
74  return *this;
75  }
76 
77  template<typename T>
78  auto operator+(const hilti::rt::integer::safe<T>& n) const {
79  return *this + n.Ref();
80  }
81 
82  template<typename T>
83  auto operator+(const T& n) const {
84  return Iterator{_index + n, _control};
85  }
86 
87  explicit operator bool() const { return static_cast<bool>(_control.lock()); }
88 
89  auto& operator++() {
90  ++_index;
91  return *this;
92  }
93 
94  auto operator++(int) {
95  auto result = *this;
96  ++_index;
97  return result;
98  }
99 
100  friend auto operator==(const Iterator& a, const Iterator& b) {
101  if ( a._control.lock() != b._control.lock() )
102  throw InvalidArgument("cannot compare iterators into different bytes");
103  return a._index == b._index;
104  }
105 
106  friend bool operator!=(const Iterator& a, const Iterator& b) { return ! (a == b); }
107 
108  friend auto operator<(const Iterator& a, const Iterator& b) {
109  if ( a._control.lock() != b._control.lock() )
110  throw InvalidArgument("cannot compare iterators into different bytes");
111  return a._index < b._index;
112  }
113 
114  friend auto operator<=(const Iterator& a, const Iterator& b) {
115  if ( a._control.lock() != b._control.lock() )
116  throw InvalidArgument("cannot compare iterators into different bytes");
117  return a._index <= b._index;
118  }
119 
120  friend auto operator>(const Iterator& a, const Iterator& b) {
121  if ( a._control.lock() != b._control.lock() )
122  throw InvalidArgument("cannot compare iterators into different bytes");
123  return a._index > b._index;
124  }
125 
126  friend auto operator>=(const Iterator& a, const Iterator& b) {
127  if ( a._control.lock() != b._control.lock() )
128  throw InvalidArgument("cannot compare iterators into different bytes");
129  return a._index >= b._index;
130  }
131 
132  friend difference_type operator-(const Iterator& a, const Iterator& b) {
133  if ( a._control.lock() != b._control.lock() )
134  throw InvalidArgument("cannot perform arithmetic with iterators into different bytes");
135  return a._index - b._index;
136  }
137 };
138 
139 inline std::string to_string(const Iterator& /* i */, rt::detail::adl::tag /*unused*/) { return "<bytes iterator>"; }
140 
141 inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
142  out << "<bytes iterator>";
143  return out;
144 }
145 
146 } // namespace bytes
147 
154 class Bytes : protected std::string {
155 public:
156  using Base = std::string;
158  using Base::const_reference;
159  using Base::reference;
160  using Offset = uint64_t;
161  using size_type = integer::safe<uint64_t>;
162 
163  using Base::Base;
164  using Base::data;
165 
174  Bytes(std::string s, bytes::Charset cs);
175 
176  Bytes(Base&& str) : Base(std::move(str)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
177  Bytes(const Bytes& xs) : Base(xs), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
178  Bytes(Bytes&& xs) : Base(std::move(xs)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
179 
187  Bytes& operator=(const Bytes& b) {
188  invalidateIterators();
189  this->Base::operator=(b);
190  return *this;
191  }
192 
201  invalidateIterators();
202  this->Base::operator=(std::move(b));
203  return *this;
204  }
205 
207  void append(const Bytes& d) { Base::append(d.str()); }
208 
210  void append(const stream::View& view);
211 
213  void append(const uint8_t x) { Base::append(1, x); }
214 
216  const std::string& str() const& { return *this; }
217 
219  const_iterator begin() const { return const_iterator(0u, _control); }
220 
222  const_iterator end() const { return const_iterator(size(), _control); }
223 
225  const_iterator at(Offset o) const { return begin() + o; }
226 
228  bool isEmpty() const { return empty(); }
229 
231  size_type size() const { return static_cast<int64_t>(std::string::size()); }
232 
239  const_iterator find(value_type b, const const_iterator& n = const_iterator()) const {
240  if ( auto i = Base::find(b, (n ? n - begin() : 0)); i != Base::npos )
241  return begin() + i;
242  else
243  return end();
244  }
245 
256  std::tuple<bool, const_iterator> find(const Bytes& v, const const_iterator& n = const_iterator()) const;
257 
265  Bytes sub(const const_iterator& from, const const_iterator& to) const {
266  return {substr(from - begin(), to - from)};
267  }
268 
275  Bytes sub(const const_iterator& to) const { return sub(begin(), to); }
276 
284  Bytes sub(Offset from, Offset to) const { return {substr(from, to - from)}; }
285 
292  Bytes sub(Offset to) const { return sub(0, to); }
293 
301  template<int N>
302  Bytes extract(unsigned char (&dst)[N]) const {
303  if ( N > size() )
304  throw InvalidArgument("insufficient data in source");
305 
306  memcpy(dst, data(), N);
307  return sub(N, std::string::npos);
308  }
309 
317  std::string decode(bytes::Charset cs) const;
318 
320  bool startsWith(const Bytes& b) const { return hilti::rt::startsWith(*this, b); }
321 
330  Bytes upper(bytes::Charset cs) const { return Bytes(hilti::rt::string::upper(decode(cs)), cs); }
331 
337  Bytes lower(bytes::Charset cs) const { return Bytes(hilti::rt::string::lower(decode(cs)), cs); }
338 
347  Bytes strip(const Bytes& set, bytes::Side side = bytes::Side::Both) const;
348 
356  Bytes strip(bytes::Side side = bytes::Side::Both) const;
357 
360  Vector<Bytes> x;
361  for ( auto& v : hilti::rt::split(*this) )
362  x.emplace_back(Bytes::Base(v));
363  return x;
364  }
365 
370  std::tuple<Bytes, Bytes> split1() const {
371  auto p = hilti::rt::split1(str());
372  return std::make_tuple(p.first, p.second);
373  }
374 
376  Vector<Bytes> split(const Bytes& sep) const {
377  Vector<Bytes> x;
378  for ( auto& v : hilti::rt::split(*this, sep) )
379  x.push_back(Bytes::Base(v));
380  return x;
381  }
382 
390  std::tuple<Bytes, Bytes> split1(const Bytes& sep) const {
391  auto p = hilti::rt::split1(str(), sep);
392  return std::make_tuple(p.first, p.second);
393  }
394 
400  template<typename T>
401  Bytes join(const Vector<T>& parts) const {
402  Bytes rval;
403 
404  for ( size_t i = 0; i < parts.size(); ++i ) {
405  if ( i > 0 )
406  rval += *this;
407 
408  rval += Bytes(hilti::rt::to_string_for_print(parts[i]).data());
409  }
410 
411  return rval;
412  }
413 
421  integer::safe<int64_t> toInt(uint64_t base = 10) const;
422 
430  integer::safe<uint64_t> toUInt(uint64_t base = 10) const;
431 
439  int64_t toInt(hilti::rt::ByteOrder byte_order) const;
440 
448  uint64_t toUInt(hilti::rt::ByteOrder byte_order) const;
449 
457  Time toTime(uint64_t base = 10) const {
458  auto ns = ! isEmpty() ? toUInt(base) * integer::safe<uint64_t>(1'000'000'000) : integer::safe<uint64_t>(0);
459  return Time(ns, Time::NanosecondTag());
460  }
461 
469  Time toTime(hilti::rt::ByteOrder byte_order) const {
470  return Time(toUInt(byte_order) * integer::safe<uint64_t>(1'000'000'000), Time::NanosecondTag());
471  }
472 
480  Result<Bytes> match(const RegExp& re, unsigned int group = 0) const;
481 
482  // Add some operators over `Base`.
483  friend bool operator==(const Bytes& a, const Bytes& b) {
484  return static_cast<const Bytes::Base&>(a) == static_cast<const Bytes::Base&>(b);
485  }
486 
487  friend bool operator!=(const Bytes& a, const Bytes& b) { return ! (a == b); }
488 
489 
490  friend bool operator<(const Bytes& a, const Bytes& b) {
491  return static_cast<const Bytes::Base&>(a) < static_cast<const Bytes::Base&>(b);
492  }
493 
494  friend bool operator<=(const Bytes& a, const Bytes& b) {
495  return static_cast<const Bytes::Base&>(a) <= static_cast<const Bytes::Base&>(b);
496  }
497 
498  friend bool operator>(const Bytes& a, const Bytes& b) {
499  return static_cast<const Bytes::Base&>(a) > static_cast<const Bytes::Base&>(b);
500  }
501 
502  friend bool operator>=(const Bytes& a, const Bytes& b) {
503  return static_cast<const Bytes::Base&>(a) >= static_cast<const Bytes::Base&>(b);
504  }
505 
506  friend Bytes operator+(const Bytes& a, const Bytes& b) {
507  return static_cast<const Bytes::Base&>(a) + static_cast<const Bytes::Base&>(b);
508  }
509 
510 private:
511  friend bytes::Iterator;
512  std::shared_ptr<Base*> _control;
513 
514  void invalidateIterators() { _control = std::make_shared<Base*>(static_cast<Base*>(this)); }
515 };
516 
517 inline std::ostream& operator<<(std::ostream& out, const Bytes& x) {
518  out << escapeBytes(x.str(), false);
519  return out;
520 }
521 
522 namespace bytes {
523 inline namespace literals {
524 inline Bytes operator"" _b(const char* str, size_t size) { return Bytes(Bytes::Base(str, size)); }
525 } // namespace literals
526 } // namespace bytes
527 
528 template<>
529 inline std::string detail::to_string_for_print<Bytes>(const Bytes& x) {
530  return escapeBytes(x.str(), false);
531 }
532 
533 namespace detail::adl {
534 std::string to_string(const Bytes& x, adl::tag /*unused*/);
535 std::string to_string(const bytes::Side& x, adl::tag /*unused*/);
536 std::string to_string(const bytes::Charset& x, adl::tag /*unused*/);
537 } // namespace detail::adl
538 
539 } // namespace hilti::rt
ByteOrder
Definition: util.h:503
Bytes sub(const const_iterator &to) const
Definition: bytes.h:275
std::string to_string(T &&x)
Definition: extension-points.h:26
bool isEmpty() const
Definition: bytes.h:228
std::string to_string_for_print(const T &x)
Definition: extension-points.h:45
void append(const uint8_t x)
Definition: bytes.h:213
size_type size() const
Definition: bytes.h:231
Bytes & operator=(const Bytes &b)
Definition: bytes.h:187
Definition: bytes.h:41
Bytes sub(Offset from, Offset to) const
Definition: bytes.h:284
Definition: any.h:7
Bytes extract(unsigned char(&dst)[N]) const
Definition: bytes.h:302
Bytes sub(Offset to) const
Definition: bytes.h:292
std::tuple< Bytes, Bytes > split1(const Bytes &sep) const
Definition: bytes.h:390
Definition: regexp.h:117
Time toTime(uint64_t base=10) const
Definition: bytes.h:457
std::pair< std::string, std::string > split1(std::string s)
Definition: util.cc:156
Definition: bytes.h:154
bool startsWith(const std::string &s, const std::string &prefix)
Definition: util.h:200
const_iterator begin() const
Definition: bytes.h:219
Bytes upper(bytes::Charset cs) const
Definition: bytes.h:330
Definition: stream.h:978
const_iterator end() const
Definition: bytes.h:222
std::vector< std::string_view > split(std::string_view s, std::string_view delim)
Definition: util.cc:112
bool startsWith(const Bytes &b) const
Definition: bytes.h:320
void append(const Bytes &d)
Definition: bytes.h:207
Bytes sub(const const_iterator &from, const const_iterator &to) const
Definition: bytes.h:265
const std::string & str() const &
Definition: bytes.h:216
Bytes & operator=(Bytes &&b)
Definition: bytes.h:200
Definition: extension-points.h:12
Definition: vector.h:251
std::tuple< Bytes, Bytes > split1() const
Definition: bytes.h:370
Vector< Bytes > split() const
Definition: bytes.h:359
Definition: time.h:23
Definition: time.h:20
const_iterator find(value_type b, const const_iterator &n=const_iterator()) const
Definition: bytes.h:239
Definition: result.h:67
Bytes lower(bytes::Charset cs) const
Definition: bytes.h:337
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:13
const_iterator at(Offset o) const
Definition: bytes.h:225
Vector< Bytes > split(const Bytes &sep) const
Definition: bytes.h:376
Bytes join(const Vector< T > &parts) const
Definition: bytes.h:401