Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <cstring>
6 #include <memory>
7 #include <string>
8 #include <tuple>
9 #include <utility>
10 
11 #include <hilti/rt/extension-points.h>
12 #include <hilti/rt/iterator.h>
13 #include <hilti/rt/json-fwd.h>
14 #include <hilti/rt/result.h>
15 #include <hilti/rt/safe-int.h>
16 #include <hilti/rt/types/string.h>
17 #include <hilti/rt/types/time.h>
18 #include <hilti/rt/types/vector.h>
19 #include <hilti/rt/util.h>
20 
21 namespace hilti::rt {
22 
23 class Bytes;
24 class RegExp;
25 
26 namespace stream {
27 class View;
28 }
29 
30 namespace bytes {
31 
33 HILTI_RT_ENUM_WITH_DEFAULT(Side, Left,
34  Left, // left side
35  Right, // right side
36  Both // left and right side
37 );
38 
40 HILTI_RT_ENUM(Charset, Undef, UTF8, ASCII);
41 
43 using DecodeErrorStrategy = string::DecodeErrorStrategy;
44 
45 class Iterator {
46  using B = std::string;
47  using difference_type = B::const_iterator::difference_type;
48 
49  std::weak_ptr<B*> _control;
50  typename integer::safe<std::uint64_t> _index = 0;
51 
52 public:
53  Iterator() = default;
54 
55  Iterator(typename B::size_type index, std::weak_ptr<B*> control) : _control(std::move(control)), _index(index) {}
56 
57  uint8_t operator*() const {
58  if ( auto&& l = _control.lock() ) {
59  auto&& data = static_cast<B&>(**l);
60 
61  if ( _index >= data.size() )
62  throw IndexError(fmt("index %s out of bounds", _index));
63 
64  return data[_index];
65  }
66 
67  throw InvalidIterator("bound object has expired");
68  }
69 
70  template<typename T>
71  auto& operator+=(const hilti::rt::integer::safe<T>& n) {
72  return *this += n.Ref();
73  }
74 
75  auto& operator+=(uint64_t n) {
76  _index += n;
77  return *this;
78  }
79 
80  template<typename T>
81  auto operator+(const hilti::rt::integer::safe<T>& n) const {
82  return *this + n.Ref();
83  }
84 
85  template<typename T>
86  auto operator+(const T& n) const {
87  return Iterator{_index + n, _control};
88  }
89 
90  explicit operator bool() const { return static_cast<bool>(_control.lock()); }
91 
92  auto& operator++() {
93  ++_index;
94  return *this;
95  }
96 
97  auto operator++(int) {
98  auto result = *this;
99  ++_index;
100  return result;
101  }
102 
103  friend auto operator==(const Iterator& a, const Iterator& b) {
104  if ( a._control.lock() != b._control.lock() )
105  throw InvalidArgument("cannot compare iterators into different bytes");
106  return a._index == b._index;
107  }
108 
109  friend bool operator!=(const Iterator& a, const Iterator& b) { return ! (a == b); }
110 
111  friend auto operator<(const Iterator& a, const Iterator& b) {
112  if ( a._control.lock() != b._control.lock() )
113  throw InvalidArgument("cannot compare iterators into different bytes");
114  return a._index < b._index;
115  }
116 
117  friend auto operator<=(const Iterator& a, const Iterator& b) {
118  if ( a._control.lock() != b._control.lock() )
119  throw InvalidArgument("cannot compare iterators into different bytes");
120  return a._index <= b._index;
121  }
122 
123  friend auto operator>(const Iterator& a, const Iterator& b) {
124  if ( a._control.lock() != b._control.lock() )
125  throw InvalidArgument("cannot compare iterators into different bytes");
126  return a._index > b._index;
127  }
128 
129  friend auto operator>=(const Iterator& a, const Iterator& b) {
130  if ( a._control.lock() != b._control.lock() )
131  throw InvalidArgument("cannot compare iterators into different bytes");
132  return a._index >= b._index;
133  }
134 
135  friend difference_type operator-(const Iterator& a, const Iterator& b) {
136  if ( a._control.lock() != b._control.lock() )
137  throw InvalidArgument("cannot perform arithmetic with iterators into different bytes");
138  return a._index - b._index;
139  }
140 };
141 
142 inline std::string to_string(const Iterator& /* i */, rt::detail::adl::tag /*unused*/) { return "<bytes iterator>"; }
143 
144 inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
145  out << "<bytes iterator>";
146  return out;
147 }
148 
149 } // namespace bytes
150 
157 class Bytes : protected std::string {
158 public:
159  using Base = std::string;
161  using Base::const_reference;
162  using Base::reference;
163  using Offset = uint64_t;
164  using size_type = integer::safe<uint64_t>;
165 
166  using Base::Base;
167  using Base::data;
168 
178  Bytes(std::string s, bytes::Charset cs, bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE);
179 
180  Bytes(Base&& str) : Base(std::move(str)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
181  Bytes(const Bytes& xs) : Base(xs), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
182  Bytes(Bytes&& xs) noexcept : Base(std::move(xs)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
183 
191  Bytes& operator=(const Bytes& b) {
192  if ( &b == this )
193  return *this;
194 
195  invalidateIterators();
196  this->Base::operator=(b);
197  return *this;
198  }
199 
207  Bytes& operator=(Bytes&& b) noexcept {
208  invalidateIterators();
209  this->Base::operator=(std::move(b));
210  return *this;
211  }
212 
214  void append(const Bytes& d) { Base::append(d.str()); }
215 
217  void append(const stream::View& view);
218 
220  void append(const uint8_t x) { Base::append(1, static_cast<Base::value_type>(x)); }
221 
223  const std::string& str() const& { return *this; }
224 
226  const_iterator begin() const { return const_iterator(0U, _control); }
227 
229  const_iterator end() const { return const_iterator(size(), _control); }
230 
232  const_iterator at(Offset o) const { return begin() + o; }
233 
235  bool isEmpty() const { return empty(); }
236 
238  size_type size() const { return static_cast<int64_t>(std::string::size()); }
239 
246  const_iterator find(value_type b, const const_iterator& n = const_iterator()) const {
247  if ( auto i = Base::find(b, (n ? n - begin() : 0)); i != Base::npos )
248  return begin() + i;
249  else
250  return end();
251  }
252 
263  std::tuple<bool, const_iterator> find(const Bytes& v, const const_iterator& n = const_iterator()) const;
264 
272  Bytes sub(const const_iterator& from, const const_iterator& to) const {
273  return {substr(from - begin(), to - from)};
274  }
275 
282  Bytes sub(const const_iterator& to) const { return sub(begin(), to); }
283 
291  Bytes sub(Offset from, Offset to) const { return {substr(from, to - from)}; }
292 
299  Bytes sub(Offset to) const { return sub(0, to); }
300 
308  Bytes extract(unsigned char* dst, uint64_t n) const {
309  if ( n > size() )
310  throw InvalidArgument("insufficient data in source");
311 
312  memcpy(dst, data(), n);
313  return sub(n, std::string::npos);
314  }
315 
324  std::string decode(bytes::Charset cs,
325  bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE) const;
326 
328  bool startsWith(const Bytes& b) const { return hilti::rt::startsWith(*this, b); }
329 
339  Bytes upper(bytes::Charset cs, bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE) const {
340  return Bytes(hilti::rt::string::upper(decode(cs, errors), errors), cs, errors);
341  }
342 
350  Bytes lower(bytes::Charset cs, bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE) const {
351  return Bytes(hilti::rt::string::lower(decode(cs, errors), errors), cs, errors);
352  }
353 
362  Bytes strip(const Bytes& set, bytes::Side side = bytes::Side::Both) const;
363 
371  Bytes strip(bytes::Side side = bytes::Side::Both) const;
372 
375  Vector<Bytes> x;
376  for ( auto& v : hilti::rt::split(*this) )
377  x.emplace_back(Bytes::Base(v));
378  return x;
379  }
380 
385  std::tuple<Bytes, Bytes> split1() const {
386  auto p = hilti::rt::split1(str());
387  return std::make_tuple(p.first, p.second);
388  }
389 
391  Vector<Bytes> split(const Bytes& sep) const {
392  Vector<Bytes> x;
393  for ( auto& v : hilti::rt::split(*this, sep) )
394  x.push_back(Bytes::Base(v));
395  return x;
396  }
397 
405  std::tuple<Bytes, Bytes> split1(const Bytes& sep) const {
406  auto p = hilti::rt::split1(str(), sep);
407  return std::make_tuple(p.first, p.second);
408  }
409 
415  template<typename T>
416  Bytes join(const Vector<T>& parts) const {
417  Bytes rval;
418 
419  for ( size_t i = 0; i < parts.size(); ++i ) {
420  if ( i > 0 )
421  rval += *this;
422 
423  rval += Bytes(hilti::rt::to_string_for_print(parts[i]));
424  }
425 
426  return rval;
427  }
428 
436  integer::safe<int64_t> toInt(uint64_t base = 10) const;
437 
445  integer::safe<uint64_t> toUInt(uint64_t base = 10) const;
446 
454  int64_t toInt(hilti::rt::ByteOrder byte_order) const;
455 
463  uint64_t toUInt(hilti::rt::ByteOrder byte_order) const;
464 
472  Time toTime(uint64_t base = 10) const {
473  auto ns = ! isEmpty() ? toUInt(base) * integer::safe<uint64_t>(1'000'000'000) : integer::safe<uint64_t>(0);
474  return Time(ns, Time::NanosecondTag());
475  }
476 
484  Time toTime(hilti::rt::ByteOrder byte_order) const {
485  return Time(toUInt(byte_order) * integer::safe<uint64_t>(1'000'000'000), Time::NanosecondTag());
486  }
487 
495  Result<Bytes> match(const RegExp& re, unsigned int group = 0) const;
496 
497  // Add some operators over `Base`.
498  friend bool operator==(const Bytes& a, const Bytes& b) {
499  return static_cast<const Bytes::Base&>(a) == static_cast<const Bytes::Base&>(b);
500  }
501 
502  friend bool operator!=(const Bytes& a, const Bytes& b) { return ! (a == b); }
503 
504 
505  friend bool operator<(const Bytes& a, const Bytes& b) {
506  return static_cast<const Bytes::Base&>(a) < static_cast<const Bytes::Base&>(b);
507  }
508 
509  friend bool operator<=(const Bytes& a, const Bytes& b) {
510  return static_cast<const Bytes::Base&>(a) <= static_cast<const Bytes::Base&>(b);
511  }
512 
513  friend bool operator>(const Bytes& a, const Bytes& b) {
514  return static_cast<const Bytes::Base&>(a) > static_cast<const Bytes::Base&>(b);
515  }
516 
517  friend bool operator>=(const Bytes& a, const Bytes& b) {
518  return static_cast<const Bytes::Base&>(a) >= static_cast<const Bytes::Base&>(b);
519  }
520 
521  friend Bytes operator+(const Bytes& a, const Bytes& b) {
522  return static_cast<const Bytes::Base&>(a) + static_cast<const Bytes::Base&>(b);
523  }
524 
525 private:
526  friend bytes::Iterator;
527  std::shared_ptr<Base*> _control;
528 
529  void invalidateIterators() { _control = std::make_shared<Base*>(static_cast<Base*>(this)); }
530 };
531 
532 inline std::ostream& operator<<(std::ostream& out, const Bytes& x) {
533  out << escapeBytes(x.str(), false);
534  return out;
535 }
536 
537 namespace bytes {
538 inline namespace literals {
539 inline Bytes operator"" _b(const char* str, size_t size) { return Bytes(Bytes::Base(str, size)); }
540 } // namespace literals
541 } // namespace bytes
542 
543 template<>
544 inline std::string detail::to_string_for_print<Bytes>(const Bytes& x) {
545  return escapeBytes(x.str(), false);
546 }
547 
548 namespace detail::adl {
549 std::string to_string(const Bytes& x, adl::tag /*unused*/);
550 std::string to_string(const bytes::Side& x, adl::tag /*unused*/);
551 std::string to_string(const bytes::Charset& x, adl::tag /*unused*/);
552 std::string to_string(const bytes::DecodeErrorStrategy& x, adl::tag /*unused*/);
553 } // namespace detail::adl
554 
555 } // namespace hilti::rt
556 
557 // Disable JSON-ification of `Bytes`.
558 //
559 // As of nlohmann-json-0e694b4060ed55df980eaaebc2398b0ff24530d4 the JSON library misdetects the serialization for
560 // `Bytes` on some platforms. We see this on platfoms not providing a C++17-compliant (e.g., in Cirrus' `no-toolchain`
561 // task which uses gcc-9.3.0) where code in JSON wants to check whether `Bytes` can be converted to a
562 // `std::filesystem::path`, but then runs into compiler issues.
563 namespace nlohmann {
564 template<>
565 struct adl_serializer<hilti::rt::Bytes> {};
566 } // namespace nlohmann
Bytes upper(bytes::Charset cs, bytes::DecodeErrorStrategy errors=bytes::DecodeErrorStrategy::REPLACE) const
Definition: bytes.h:339
Bytes sub(const const_iterator &to) const
Definition: bytes.h:282
std::string to_string(T &&x)
Definition: extension-points.h:26
bool isEmpty() const
Definition: bytes.h:235
std::string to_string_for_print(const T &x)
Definition: extension-points.h:45
void append(const uint8_t x)
Definition: bytes.h:220
size_type size() const
Definition: bytes.h:238
Bytes & operator=(const Bytes &b)
Definition: bytes.h:191
Definition: bytes.h:45
Bytes sub(Offset from, Offset to) const
Definition: bytes.h:291
Bytes extract(unsigned char *dst, uint64_t n) const
Definition: bytes.h:308
Definition: any.h:7
Bytes sub(Offset to) const
Definition: bytes.h:299
std::tuple< Bytes, Bytes > split1(const Bytes &sep) const
Definition: bytes.h:405
Definition: regexp.h:125
Time toTime(uint64_t base=10) const
Definition: bytes.h:472
std::pair< std::string, std::string > split1(std::string s)
Definition: util.cc:146
Definition: bytes.h:157
bool startsWith(const std::string &s, const std::string &prefix)
Definition: util.cc:380
const_iterator begin() const
Definition: bytes.h:226
Definition: stream.h:984
const_iterator end() const
Definition: bytes.h:229
std::vector< std::string_view > split(std::string_view s, std::string_view delim)
Definition: util.cc:102
bool startsWith(const Bytes &b) const
Definition: bytes.h:328
Definition: bytes.h:563
Bytes & operator=(Bytes &&b) noexcept
Definition: bytes.h:207
void append(const Bytes &d)
Definition: bytes.h:214
Bytes sub(const const_iterator &from, const const_iterator &to) const
Definition: bytes.h:272
const std::string & str() const &
Definition: bytes.h:223
Bytes lower(bytes::Charset cs, bytes::DecodeErrorStrategy errors=bytes::DecodeErrorStrategy::REPLACE) const
Definition: bytes.h:350
Definition: extension-points.h:12
Definition: vector.h:256
std::tuple< Bytes, Bytes > split1() const
Definition: bytes.h:385
Vector< Bytes > split() const
Definition: bytes.h:374
Definition: time.h:23
Definition: time.h:20
const_iterator find(value_type b, const const_iterator &n=const_iterator()) const
Definition: bytes.h:246
Definition: result.h:67
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:13
const_iterator at(Offset o) const
Definition: bytes.h:232
Vector< Bytes > split(const Bytes &sep) const
Definition: bytes.h:391
Bytes join(const Vector< T > &parts) const
Definition: bytes.h:416