Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <cstring>
6 #include <memory>
7 #include <stdexcept>
8 #include <string>
9 #include <tuple>
10 #include <utility>
11 
12 #include <hilti/rt/exception.h>
13 #include <hilti/rt/extension-points.h>
14 #include <hilti/rt/iterator.h>
15 #include <hilti/rt/json-fwd.h>
16 #include <hilti/rt/result.h>
17 #include <hilti/rt/types/integer.h>
18 #include <hilti/rt/types/string.h>
19 #include <hilti/rt/types/time.h>
20 #include <hilti/rt/types/vector.h>
21 #include <hilti/rt/util.h>
22 
23 namespace hilti::rt {
24 
25 class Bytes;
26 class RegExp;
27 
28 namespace stream {
29 class View;
30 }
31 
32 namespace bytes {
33 
35 enum class Side : int64_t {
36  Left,
37  Right,
38  Both
39 };
40 
42 enum class Charset : int64_t { Undef, UTF8, ASCII };
43 
44 class Iterator {
45  using B = std::string;
46  using difference_type = B::const_iterator::difference_type;
47 
48  std::weak_ptr<B*> _control;
49  typename integer::safe<std::uint64_t> _index = 0;
50 
51 public:
52  Iterator() = default;
53 
54  Iterator(typename B::size_type index, std::weak_ptr<B*> control) : _control(std::move(control)), _index(index) {}
55 
56  uint8_t operator*() const {
57  if ( auto&& l = _control.lock() ) {
58  auto&& data = static_cast<B&>(**l);
59 
60  if ( _index >= data.size() )
61  throw IndexError(fmt("index %s out of bounds", _index));
62 
63  return data[_index];
64  }
65 
66  throw InvalidIterator("bound object has expired");
67  }
68 
69  template<typename T>
70  auto& operator+=(const hilti::rt::integer::safe<T>& n) {
71  return *this += n.Ref();
72  }
73 
74  auto& operator+=(uint64_t n) {
75  _index += n;
76  return *this;
77  }
78 
79  template<typename T>
80  auto operator+(const hilti::rt::integer::safe<T>& n) const {
81  return *this + n.Ref();
82  }
83 
84  template<typename T>
85  auto operator+(const T& n) const {
86  return Iterator{_index + n, _control};
87  }
88 
89  explicit operator bool() const { return static_cast<bool>(_control.lock()); }
90 
91  auto& operator++() {
92  ++_index;
93  return *this;
94  }
95 
96  auto operator++(int) {
97  auto result = *this;
98  ++_index;
99  return result;
100  }
101 
102  friend auto operator==(const Iterator& a, const Iterator& b) {
103  if ( a._control.lock() != b._control.lock() )
104  throw InvalidArgument("cannot compare iterators into different bytes");
105  return a._index == b._index;
106  }
107 
108  friend bool operator!=(const Iterator& a, const Iterator& b) { return ! (a == b); }
109 
110  friend auto operator<(const Iterator& a, const Iterator& b) {
111  if ( a._control.lock() != b._control.lock() )
112  throw InvalidArgument("cannot compare iterators into different bytes");
113  return a._index < b._index;
114  }
115 
116  friend auto operator<=(const Iterator& a, const Iterator& b) {
117  if ( a._control.lock() != b._control.lock() )
118  throw InvalidArgument("cannot compare iterators into different bytes");
119  return a._index <= b._index;
120  }
121 
122  friend auto operator>(const Iterator& a, const Iterator& b) {
123  if ( a._control.lock() != b._control.lock() )
124  throw InvalidArgument("cannot compare iterators into different bytes");
125  return a._index > b._index;
126  }
127 
128  friend auto operator>=(const Iterator& a, const Iterator& b) {
129  if ( a._control.lock() != b._control.lock() )
130  throw InvalidArgument("cannot compare iterators into different bytes");
131  return a._index >= b._index;
132  }
133 
134  friend difference_type operator-(const Iterator& a, const Iterator& b) {
135  if ( a._control.lock() != b._control.lock() )
136  throw InvalidArgument("cannot perform arithmetic with iterators into different bytes");
137  return a._index - b._index;
138  }
139 
140  friend class ::hilti::rt::Bytes;
141 };
142 
143 inline std::string to_string(const Iterator& /* i */, rt::detail::adl::tag /*unused*/) { return "<bytes iterator>"; }
144 
145 inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
146  out << "<bytes iterator>";
147  return out;
148 }
149 
150 } // namespace bytes
151 
158 class Bytes : protected std::string {
159 public:
160  using Base = std::string;
162  using Base::const_reference;
163  using Base::reference;
164  using Offset = uint64_t;
165  using size_type = integer::safe<uint64_t>;
166 
167  using Base::Base;
168  using Base::data;
169 
178  Bytes(std::string s, bytes::Charset cs);
179 
180  Bytes(Base&& str) : Base(std::move(str)) {}
181  Bytes(const Bytes& xs) : Base(xs) {}
182  Bytes(Bytes&& xs) noexcept : Base(std::move(xs)) {}
183 
191  Bytes& operator=(const Bytes& b) {
192  if ( &b == this )
193  return *this;
194 
195  invalidateIterators();
196  this->Base::operator=(b);
197  return *this;
198  }
199 
207  Bytes& operator=(Bytes&& b) noexcept {
208  invalidateIterators();
209  this->Base::operator=(std::move(b));
210  return *this;
211  }
212 
214  void append(const Bytes& d) { Base::append(d.str()); }
215 
217  void append(const stream::View& view);
218 
220  void append(const uint8_t x) { Base::append(1, static_cast<Base::value_type>(x)); }
221 
223  const std::string& str() const& { return *this; }
224 
226  const_iterator begin() const { return const_iterator(0U, _control); }
227 
229  const_iterator end() const { return const_iterator(size(), _control); }
230 
232  const_iterator at(Offset o) const { return begin() + o; }
233 
235  bool isEmpty() const { return empty(); }
236 
238  size_type size() const { return static_cast<int64_t>(std::string::size()); }
239 
246  const_iterator find(value_type b, const const_iterator& n = const_iterator()) const {
247  if ( auto i = Base::find(b, (n ? n - begin() : 0)); i != Base::npos )
248  return begin() + i;
249  else
250  return end();
251  }
252 
263  std::tuple<bool, const_iterator> find(const Bytes& v, const const_iterator& n = const_iterator()) const;
264 
272  Bytes sub(const const_iterator& from, const const_iterator& to) const {
273  if ( from._control.lock() != to._control.lock() )
274  throw InvalidArgument("start and end iterator cannot belong to different bytes");
275 
276  return sub(Offset(from - begin()), to._index);
277  }
278 
285  Bytes sub(const const_iterator& to) const { return sub(begin(), to); }
286 
294  Bytes sub(Offset from, Offset to) const {
295  try {
296  return {substr(from, to - from)};
297  } catch ( const std::out_of_range& ) {
298  throw OutOfRange(fmt("start index %s out of range for bytes with length %d", from, size()));
299  }
300  }
301 
308  Bytes sub(Offset to) const { return sub(0, to); }
309 
317  Bytes extract(unsigned char* dst, uint64_t n) const {
318  if ( n > size() )
319  throw InvalidArgument("insufficient data in source");
320 
321  memcpy(dst, data(), n);
322  return sub(n, std::string::npos);
323  }
324 
332  std::string decode(bytes::Charset cs) const;
333 
335  bool startsWith(const Bytes& b) const { return hilti::rt::startsWith(*this, b); }
336 
345  Bytes upper(bytes::Charset cs) const { return Bytes(hilti::rt::string::upper(decode(cs)), cs); }
346 
352  Bytes lower(bytes::Charset cs) const { return Bytes(hilti::rt::string::lower(decode(cs)), cs); }
353 
362  Bytes strip(const Bytes& set, bytes::Side side = bytes::Side::Both) const;
363 
371  Bytes strip(bytes::Side side = bytes::Side::Both) const;
372 
375  Vector<Bytes> x;
376  for ( auto& v : hilti::rt::split(*this) )
377  x.emplace_back(Bytes::Base(v));
378  return x;
379  }
380 
385  std::tuple<Bytes, Bytes> split1() const {
386  auto p = hilti::rt::split1(str());
387  return std::make_tuple(p.first, p.second);
388  }
389 
391  Vector<Bytes> split(const Bytes& sep) const {
392  Vector<Bytes> x;
393  for ( auto& v : hilti::rt::split(*this, sep) )
394  x.push_back(Bytes::Base(v));
395  return x;
396  }
397 
405  std::tuple<Bytes, Bytes> split1(const Bytes& sep) const {
406  auto p = hilti::rt::split1(str(), sep);
407  return std::make_tuple(p.first, p.second);
408  }
409 
415  template<typename T>
416  Bytes join(const Vector<T>& parts) const {
417  Bytes rval;
418 
419  for ( size_t i = 0; i < parts.size(); ++i ) {
420  if ( i > 0 )
421  rval += *this;
422 
423  rval += Bytes(hilti::rt::to_string_for_print(parts[i]));
424  }
425 
426  return rval;
427  }
428 
436  integer::safe<int64_t> toInt(uint64_t base = 10) const;
437 
445  integer::safe<uint64_t> toUInt(uint64_t base = 10) const;
446 
454  int64_t toInt(hilti::rt::ByteOrder byte_order) const;
455 
463  uint64_t toUInt(hilti::rt::ByteOrder byte_order) const;
464 
472  Time toTime(uint64_t base = 10) const {
473  auto ns = ! isEmpty() ? toUInt(base) * integer::safe<uint64_t>(1'000'000'000) : integer::safe<uint64_t>(0);
474  return Time(ns, Time::NanosecondTag());
475  }
476 
484  Time toTime(hilti::rt::ByteOrder byte_order) const {
485  return Time(toUInt(byte_order) * integer::safe<uint64_t>(1'000'000'000), Time::NanosecondTag());
486  }
487 
495  Result<Bytes> match(const RegExp& re, unsigned int group = 0) const;
496 
497  // Add some operators over `Base`.
498  friend bool operator==(const Bytes& a, const Bytes& b) {
499  return static_cast<const Bytes::Base&>(a) == static_cast<const Bytes::Base&>(b);
500  }
501 
502  friend bool operator!=(const Bytes& a, const Bytes& b) { return ! (a == b); }
503 
504 
505  friend bool operator<(const Bytes& a, const Bytes& b) {
506  return static_cast<const Bytes::Base&>(a) < static_cast<const Bytes::Base&>(b);
507  }
508 
509  friend bool operator<=(const Bytes& a, const Bytes& b) {
510  return static_cast<const Bytes::Base&>(a) <= static_cast<const Bytes::Base&>(b);
511  }
512 
513  friend bool operator>(const Bytes& a, const Bytes& b) {
514  return static_cast<const Bytes::Base&>(a) > static_cast<const Bytes::Base&>(b);
515  }
516 
517  friend bool operator>=(const Bytes& a, const Bytes& b) {
518  return static_cast<const Bytes::Base&>(a) >= static_cast<const Bytes::Base&>(b);
519  }
520 
521  friend Bytes operator+(const Bytes& a, const Bytes& b) {
522  return static_cast<const Bytes::Base&>(a) + static_cast<const Bytes::Base&>(b);
523  }
524 
525 private:
526  friend bytes::Iterator;
527  std::shared_ptr<Base*> _control = std::make_shared<Base*>(static_cast<Base*>(this));
528 
529  void invalidateIterators() { _control = std::make_shared<Base*>(static_cast<Base*>(this)); }
530 };
531 
532 inline std::ostream& operator<<(std::ostream& out, const Bytes& x) {
533  out << escapeBytes(x.str(), false);
534  return out;
535 }
536 
537 namespace bytes {
538 inline namespace literals {
539 inline Bytes operator"" _b(const char* str, size_t size) { return Bytes(Bytes::Base(str, size)); }
540 } // namespace literals
541 } // namespace bytes
542 
543 template<>
544 inline std::string detail::to_string_for_print<Bytes>(const Bytes& x) {
545  return escapeBytes(x.str(), false);
546 }
547 
548 namespace detail::adl {
549 std::string to_string(const Bytes& x, adl::tag /*unused*/);
550 std::string to_string(const bytes::Side& x, adl::tag /*unused*/);
551 std::string to_string(const bytes::Charset& x, adl::tag /*unused*/);
552 } // namespace detail::adl
553 
554 } // namespace hilti::rt
555 
556 // Disable JSON-ification of `Bytes`.
557 //
558 // As of nlohmann-json-0e694b4060ed55df980eaaebc2398b0ff24530d4 the JSON library misdetects the serialization for
559 // `Bytes` on some platforms. We see this on platfoms not providing a C++17-compliant (e.g., in Cirrus' `no-toolchain`
560 // task which uses gcc-9.3.0) where code in JSON wants to check whether `Bytes` can be converted to a
561 // `std::filesystem::path`, but then runs into compiler issues.
562 namespace nlohmann {
563 template<>
564 struct adl_serializer<hilti::rt::Bytes> {};
565 } // namespace nlohmann
Bytes sub(const const_iterator &to) const
Definition: bytes.h:285
std::string to_string(T &&x)
Definition: extension-points.h:26
bool isEmpty() const
Definition: bytes.h:235
std::string to_string_for_print(const T &x)
Definition: extension-points.h:45
void append(const uint8_t x)
Definition: bytes.h:220
size_type size() const
Definition: bytes.h:238
Bytes & operator=(const Bytes &b)
Definition: bytes.h:191
Definition: bytes.h:44
Bytes sub(Offset from, Offset to) const
Definition: bytes.h:294
Bytes extract(unsigned char *dst, uint64_t n) const
Definition: bytes.h:317
Definition: any.h:7
Bytes sub(Offset to) const
Definition: bytes.h:308
std::tuple< Bytes, Bytes > split1(const Bytes &sep) const
Definition: bytes.h:405
Definition: regexp.h:125
Time toTime(uint64_t base=10) const
Definition: bytes.h:472
std::pair< std::string, std::string > split1(std::string s)
Definition: util.cc:156
Definition: bytes.h:158
bool startsWith(const std::string &s, const std::string &prefix)
Definition: util.h:201
const_iterator begin() const
Definition: bytes.h:226
Bytes upper(bytes::Charset cs) const
Definition: bytes.h:345
Definition: stream.h:983
const_iterator end() const
Definition: bytes.h:229
std::vector< std::string_view > split(std::string_view s, std::string_view delim)
Definition: util.cc:112
bool startsWith(const Bytes &b) const
Definition: bytes.h:335
Definition: bytes.h:562
Bytes & operator=(Bytes &&b) noexcept
Definition: bytes.h:207
void append(const Bytes &d)
Definition: bytes.h:214
ByteOrder
Definition: util.h:506
Bytes sub(const const_iterator &from, const const_iterator &to) const
Definition: bytes.h:272
const std::string & str() const &
Definition: bytes.h:223
Definition: extension-points.h:12
Definition: vector.h:251
std::tuple< Bytes, Bytes > split1() const
Definition: bytes.h:385
Vector< Bytes > split() const
Definition: bytes.h:374
Definition: time.h:23
Definition: time.h:20
const_iterator find(value_type b, const const_iterator &n=const_iterator()) const
Definition: bytes.h:246
Definition: result.h:67
Bytes lower(bytes::Charset cs) const
Definition: bytes.h:352
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:13
const_iterator at(Offset o) const
Definition: bytes.h:232
Vector< Bytes > split(const Bytes &sep) const
Definition: bytes.h:391
Bytes join(const Vector< T > &parts) const
Definition: bytes.h:416