Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <cstring>
6 #include <memory>
7 #include <string>
8 #include <tuple>
9 #include <utility>
10 
11 #include <hilti/rt/extension-points.h>
12 #include <hilti/rt/iterator.h>
13 #include <hilti/rt/json-fwd.h>
14 #include <hilti/rt/result.h>
15 #include <hilti/rt/types/integer.h>
16 #include <hilti/rt/types/string.h>
17 #include <hilti/rt/types/time.h>
18 #include <hilti/rt/types/vector.h>
19 #include <hilti/rt/util.h>
20 
21 namespace hilti::rt {
22 
23 class Bytes;
24 class RegExp;
25 
26 namespace stream {
27 class View;
28 }
29 
30 namespace bytes {
31 
33 enum class Side : int64_t {
34  Left,
35  Right,
36  Both
37 };
38 
40 enum class Charset : int64_t { Undef, UTF8, ASCII };
41 
42 class Iterator {
43  using B = std::string;
44  using difference_type = B::const_iterator::difference_type;
45 
46  std::weak_ptr<B*> _control;
47  typename integer::safe<std::uint64_t> _index = 0;
48 
49 public:
50  Iterator() = default;
51 
52  Iterator(typename B::size_type index, const std::weak_ptr<B*> control)
53  : _control(control), _index(std::move(index)) {}
54 
55  uint8_t operator*() const {
56  if ( auto&& l = _control.lock() ) {
57  auto&& data = static_cast<B&>(**l);
58 
59  if ( _index >= data.size() )
60  throw IndexError(fmt("index %s out of bounds", _index));
61 
62  return data[_index];
63  }
64 
65  throw InvalidIterator("bound object has expired");
66  }
67 
68  template<typename T>
69  auto& operator+=(const hilti::rt::integer::safe<T>& n) {
70  return *this += n.Ref();
71  }
72 
73  auto& operator+=(uint64_t n) {
74  _index += n;
75  return *this;
76  }
77 
78  template<typename T>
79  auto operator+(const hilti::rt::integer::safe<T>& n) const {
80  return *this + n.Ref();
81  }
82 
83  template<typename T>
84  auto operator+(const T& n) const {
85  return Iterator{_index + n, _control};
86  }
87 
88  explicit operator bool() const { return static_cast<bool>(_control.lock()); }
89 
90  auto& operator++() {
91  ++_index;
92  return *this;
93  }
94 
95  const auto operator++(int) {
96  auto result = *this;
97  ++_index;
98  return result;
99  }
100 
101  friend auto operator==(const Iterator& a, const Iterator& b) {
102  if ( a._control.lock() != b._control.lock() )
103  throw InvalidArgument("cannot compare iterators into different bytes");
104  return a._index == b._index;
105  }
106 
107  friend bool operator!=(const Iterator& a, const Iterator& b) { return ! (a == b); }
108 
109  friend auto operator<(const Iterator& a, const Iterator& b) {
110  if ( a._control.lock() != b._control.lock() )
111  throw InvalidArgument("cannot compare iterators into different bytes");
112  return a._index < b._index;
113  }
114 
115  friend auto operator<=(const Iterator& a, const Iterator& b) {
116  if ( a._control.lock() != b._control.lock() )
117  throw InvalidArgument("cannot compare iterators into different bytes");
118  return a._index <= b._index;
119  }
120 
121  friend auto operator>(const Iterator& a, const Iterator& b) {
122  if ( a._control.lock() != b._control.lock() )
123  throw InvalidArgument("cannot compare iterators into different bytes");
124  return a._index > b._index;
125  }
126 
127  friend auto operator>=(const Iterator& a, const Iterator& b) {
128  if ( a._control.lock() != b._control.lock() )
129  throw InvalidArgument("cannot compare iterators into different bytes");
130  return a._index >= b._index;
131  }
132 
133  friend difference_type operator-(const Iterator& a, const Iterator& b) {
134  if ( a._control.lock() != b._control.lock() )
135  throw InvalidArgument("cannot perform arithmetic with iterators into different bytes");
136  return a._index - b._index;
137  }
138 };
139 
140 inline std::string to_string(const Iterator& /* i */, rt::detail::adl::tag /*unused*/) { return "<bytes iterator>"; }
141 
142 inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
143  out << "<bytes iterator>";
144  return out;
145 }
146 
147 } // namespace bytes
148 
155 class Bytes : protected std::string {
156 public:
157  using Base = std::string;
159  using Base::const_reference;
160  using Base::reference;
161  using Offset = uint64_t;
162  using size_type = integer::safe<uint64_t>;
163 
164  using Base::Base;
165  using Base::data;
166 
175  Bytes(std::string s, bytes::Charset cs);
176 
177  Bytes(Base&& str) : Base(std::move(str)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
178  Bytes(const Bytes& xs) : Base(xs), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
179  Bytes(Bytes&& xs) : Base(std::move(xs)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
180 
188  Bytes& operator=(const Bytes& b) {
189  invalidateIterators();
190  this->Base::operator=(b);
191  return *this;
192  }
193 
202  invalidateIterators();
203  this->Base::operator=(std::move(b));
204  return *this;
205  }
206 
208  void append(const Bytes& d) { Base::append(d.str()); }
209 
211  void append(const stream::View& view);
212 
214  void append(const uint8_t x) { Base::append(1, x); }
215 
217  const std::string& str() const& { return *this; }
218 
220  const_iterator begin() const { return const_iterator(0u, _control); }
221 
223  const_iterator end() const { return const_iterator(size(), _control); }
224 
226  const_iterator at(Offset o) const { return begin() + o; }
227 
229  bool isEmpty() const { return empty(); }
230 
232  size_type size() const { return static_cast<int64_t>(std::string::size()); }
233 
240  const_iterator find(value_type b, const const_iterator& n = const_iterator()) const {
241  if ( auto i = Base::find(b, (n ? n - begin() : 0)); i != Base::npos )
242  return begin() + i;
243  else
244  return end();
245  }
246 
257  std::tuple<bool, const_iterator> find(const Bytes& v, const const_iterator& n = const_iterator()) const;
258 
266  Bytes sub(const const_iterator& from, const const_iterator& to) const {
267  return {substr(from - begin(), to - from)};
268  }
269 
276  Bytes sub(const const_iterator& to) const { return sub(begin(), to); }
277 
285  Bytes sub(Offset from, Offset to) const { return {substr(from, to - from)}; }
286 
293  Bytes sub(Offset to) const { return sub(0, to); }
294 
302  template<int N>
303  Bytes extract(unsigned char (&dst)[N]) const {
304  if ( N > size() )
305  throw InvalidArgument("insufficient data in source");
306 
307  memcpy(dst, data(), N);
308  return sub(N, std::string::npos);
309  }
310 
318  std::string decode(bytes::Charset cs) const;
319 
321  bool startsWith(const Bytes& b) const { return hilti::rt::startsWith(*this, b); }
322 
331  Bytes upper(bytes::Charset cs) const { return Bytes(hilti::rt::string::upper(decode(cs)), cs); }
332 
338  Bytes lower(bytes::Charset cs) const { return Bytes(hilti::rt::string::lower(decode(cs)), cs); }
339 
348  Bytes strip(const Bytes& set, bytes::Side side = bytes::Side::Both) const;
349 
357  Bytes strip(bytes::Side side = bytes::Side::Both) const;
358 
361  Vector<Bytes> x;
362  for ( auto& v : hilti::rt::split(*this) )
363  x.emplace_back(Bytes::Base(v));
364  return x;
365  }
366 
371  std::tuple<Bytes, Bytes> split1() const {
372  auto p = hilti::rt::split1(str());
373  return std::make_tuple(p.first, p.second);
374  }
375 
377  Vector<Bytes> split(const Bytes& sep) const {
378  Vector<Bytes> x;
379  for ( auto& v : hilti::rt::split(*this, sep) )
380  x.push_back(Bytes::Base(v));
381  return x;
382  }
383 
391  std::tuple<Bytes, Bytes> split1(const Bytes& sep) const {
392  auto p = hilti::rt::split1(str(), sep);
393  return std::make_tuple(p.first, p.second);
394  }
395 
401  template<typename T>
402  Bytes join(const Vector<T>& parts) const {
403  Bytes rval;
404 
405  for ( size_t i = 0; i < parts.size(); ++i ) {
406  if ( i > 0 )
407  rval += *this;
408 
409  rval += Bytes(hilti::rt::to_string_for_print(parts[i]).data());
410  }
411 
412  return rval;
413  }
414 
422  integer::safe<int64_t> toInt(uint64_t base = 10) const;
423 
431  integer::safe<uint64_t> toUInt(uint64_t base = 10) const;
432 
440  int64_t toInt(hilti::rt::ByteOrder byte_order) const;
441 
449  uint64_t toUInt(hilti::rt::ByteOrder byte_order) const;
450 
458  Time toTime(uint64_t base = 10) const {
459  auto ns = ! isEmpty() ? toUInt(base) * integer::safe<uint64_t>(1'000'000'000) : integer::safe<uint64_t>(0);
460  return Time(ns, Time::NanosecondTag());
461  }
462 
470  Time toTime(hilti::rt::ByteOrder byte_order) const {
471  return Time(toUInt(byte_order) * integer::safe<uint64_t>(1'000'000'000), Time::NanosecondTag());
472  }
473 
481  Result<Bytes> match(const RegExp& re, unsigned int group = 0) const;
482 
483  // Add some operators over `Base`.
484  friend bool operator==(const Bytes& a, const Bytes& b) {
485  return static_cast<const Bytes::Base&>(a) == static_cast<const Bytes::Base&>(b);
486  }
487 
488  friend bool operator!=(const Bytes& a, const Bytes& b) { return ! (a == b); }
489 
490 
491  friend bool operator<(const Bytes& a, const Bytes& b) {
492  return static_cast<const Bytes::Base&>(a) < static_cast<const Bytes::Base&>(b);
493  }
494 
495  friend bool operator<=(const Bytes& a, const Bytes& b) {
496  return static_cast<const Bytes::Base&>(a) <= static_cast<const Bytes::Base&>(b);
497  }
498 
499  friend bool operator>(const Bytes& a, const Bytes& b) {
500  return static_cast<const Bytes::Base&>(a) > static_cast<const Bytes::Base&>(b);
501  }
502 
503  friend bool operator>=(const Bytes& a, const Bytes& b) {
504  return static_cast<const Bytes::Base&>(a) >= static_cast<const Bytes::Base&>(b);
505  }
506 
507  friend Bytes operator+(const Bytes& a, const Bytes& b) {
508  return static_cast<const Bytes::Base&>(a) + static_cast<const Bytes::Base&>(b);
509  }
510 
511 private:
512  friend bytes::Iterator;
513  std::shared_ptr<Base*> _control;
514 
515  void invalidateIterators() { _control = std::make_shared<Base*>(static_cast<Base*>(this)); }
516 };
517 
518 inline std::ostream& operator<<(std::ostream& out, const Bytes& x) {
519  out << escapeBytes(x.str(), false);
520  return out;
521 }
522 
523 namespace bytes {
524 inline namespace literals {
525 inline Bytes operator"" _b(const char* str, size_t size) { return Bytes(Bytes::Base(str, size)); }
526 } // namespace literals
527 } // namespace bytes
528 
529 template<>
530 inline std::string detail::to_string_for_print<Bytes>(const Bytes& x) {
531  return escapeBytes(x.str(), false);
532 }
533 
534 namespace detail::adl {
535 std::string to_string(const Bytes& x, adl::tag /*unused*/);
536 std::string to_string(const bytes::Side& x, adl::tag /*unused*/);
537 std::string to_string(const bytes::Charset& x, adl::tag /*unused*/);
538 } // namespace detail::adl
539 
540 } // namespace hilti::rt
541 
542 // Disable JSON-ification of `Bytes`.
543 //
544 // As of nlohmann-json-0e694b4060ed55df980eaaebc2398b0ff24530d4 the JSON library misdetects the serialization for
545 // `Bytes` on some platforms. We see this on platfoms not providing a C++17-compliant (e.g., in Cirrus' `no-toolchain`
546 // task which uses gcc-9.3.0) where code in JSON wants to check whether `Bytes` can be converted to a
547 // `std::filesystem::path`, but then runs into compiler issues.
548 namespace nlohmann {
549 template<>
550 struct adl_serializer<hilti::rt::Bytes> {};
551 } // namespace nlohmann
Bytes sub(const const_iterator &to) const
Definition: bytes.h:276
std::string to_string(T &&x)
Definition: extension-points.h:26
bool isEmpty() const
Definition: bytes.h:229
std::string to_string_for_print(const T &x)
Definition: extension-points.h:45
void append(const uint8_t x)
Definition: bytes.h:214
size_type size() const
Definition: bytes.h:232
Bytes & operator=(const Bytes &b)
Definition: bytes.h:188
Definition: bytes.h:42
Bytes sub(Offset from, Offset to) const
Definition: bytes.h:285
Definition: any.h:7
Bytes extract(unsigned char(&dst)[N]) const
Definition: bytes.h:303
Bytes sub(Offset to) const
Definition: bytes.h:293
std::tuple< Bytes, Bytes > split1(const Bytes &sep) const
Definition: bytes.h:391
Definition: regexp.h:117
Time toTime(uint64_t base=10) const
Definition: bytes.h:458
std::pair< std::string, std::string > split1(std::string s)
Definition: util.cc:156
Definition: bytes.h:155
bool startsWith(const std::string &s, const std::string &prefix)
Definition: util.h:201
const_iterator begin() const
Definition: bytes.h:220
Bytes upper(bytes::Charset cs) const
Definition: bytes.h:331
Definition: stream.h:1001
const_iterator end() const
Definition: bytes.h:223
std::vector< std::string_view > split(std::string_view s, std::string_view delim)
Definition: util.cc:112
bool startsWith(const Bytes &b) const
Definition: bytes.h:321
Definition: bytes.h:548
void append(const Bytes &d)
Definition: bytes.h:208
ByteOrder
Definition: util.h:504
Bytes sub(const const_iterator &from, const const_iterator &to) const
Definition: bytes.h:266
const std::string & str() const &
Definition: bytes.h:217
Bytes & operator=(Bytes &&b)
Definition: bytes.h:201
Definition: extension-points.h:12
Definition: vector.h:251
std::tuple< Bytes, Bytes > split1() const
Definition: bytes.h:371
Vector< Bytes > split() const
Definition: bytes.h:360
Definition: time.h:23
Definition: time.h:20
const_iterator find(value_type b, const const_iterator &n=const_iterator()) const
Definition: bytes.h:240
Definition: result.h:67
Bytes lower(bytes::Charset cs) const
Definition: bytes.h:338
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:13
const_iterator at(Offset o) const
Definition: bytes.h:226
Vector< Bytes > split(const Bytes &sep) const
Definition: bytes.h:377
Bytes join(const Vector< T > &parts) const
Definition: bytes.h:402