Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <cstring>
6 #include <memory>
7 #include <string>
8 #include <tuple>
9 #include <utility>
10 
11 #include <hilti/rt/extension-points.h>
12 #include <hilti/rt/iterator.h>
13 #include <hilti/rt/json-fwd.h>
14 #include <hilti/rt/result.h>
15 #include <hilti/rt/safe-int.h>
16 #include <hilti/rt/types/string.h>
17 #include <hilti/rt/types/time.h>
18 #include <hilti/rt/types/vector.h>
19 #include <hilti/rt/util.h>
20 
21 namespace hilti::rt {
22 
23 class Bytes;
24 class RegExp;
25 
26 namespace stream {
27 class View;
28 }
29 
30 namespace bytes {
31 
33 enum class Side : int64_t {
34  Left,
35  Right,
36  Both
37 };
38 
40 enum class Charset : int64_t { Undef, UTF8, ASCII };
41 
43 using DecodeErrorStrategy = string::DecodeErrorStrategy;
44 
45 class Iterator {
46  using B = std::string;
47  using difference_type = B::const_iterator::difference_type;
48 
49  std::weak_ptr<B*> _control;
50  typename integer::safe<std::uint64_t> _index = 0;
51 
52 public:
53  Iterator() = default;
54 
55  Iterator(typename B::size_type index, std::weak_ptr<B*> control) : _control(std::move(control)), _index(index) {}
56 
57  uint8_t operator*() const {
58  if ( auto&& l = _control.lock() ) {
59  auto&& data = static_cast<B&>(**l);
60 
61  if ( _index >= data.size() )
62  throw IndexError(fmt("index %s out of bounds", _index));
63 
64  return data[_index];
65  }
66 
67  throw InvalidIterator("bound object has expired");
68  }
69 
70  template<typename T>
71  auto& operator+=(const hilti::rt::integer::safe<T>& n) {
72  return *this += n.Ref();
73  }
74 
75  auto& operator+=(uint64_t n) {
76  _index += n;
77  return *this;
78  }
79 
80  template<typename T>
81  auto operator+(const hilti::rt::integer::safe<T>& n) const {
82  return *this + n.Ref();
83  }
84 
85  template<typename T>
86  auto operator+(const T& n) const {
87  return Iterator{_index + n, _control};
88  }
89 
90  explicit operator bool() const { return static_cast<bool>(_control.lock()); }
91 
92  auto& operator++() {
93  ++_index;
94  return *this;
95  }
96 
97  auto operator++(int) {
98  auto result = *this;
99  ++_index;
100  return result;
101  }
102 
103  friend auto operator==(const Iterator& a, const Iterator& b) {
104  if ( a._control.lock() != b._control.lock() )
105  throw InvalidArgument("cannot compare iterators into different bytes");
106  return a._index == b._index;
107  }
108 
109  friend bool operator!=(const Iterator& a, const Iterator& b) { return ! (a == b); }
110 
111  friend auto operator<(const Iterator& a, const Iterator& b) {
112  if ( a._control.lock() != b._control.lock() )
113  throw InvalidArgument("cannot compare iterators into different bytes");
114  return a._index < b._index;
115  }
116 
117  friend auto operator<=(const Iterator& a, const Iterator& b) {
118  if ( a._control.lock() != b._control.lock() )
119  throw InvalidArgument("cannot compare iterators into different bytes");
120  return a._index <= b._index;
121  }
122 
123  friend auto operator>(const Iterator& a, const Iterator& b) {
124  if ( a._control.lock() != b._control.lock() )
125  throw InvalidArgument("cannot compare iterators into different bytes");
126  return a._index > b._index;
127  }
128 
129  friend auto operator>=(const Iterator& a, const Iterator& b) {
130  if ( a._control.lock() != b._control.lock() )
131  throw InvalidArgument("cannot compare iterators into different bytes");
132  return a._index >= b._index;
133  }
134 
135  friend difference_type operator-(const Iterator& a, const Iterator& b) {
136  if ( a._control.lock() != b._control.lock() )
137  throw InvalidArgument("cannot perform arithmetic with iterators into different bytes");
138  return a._index - b._index;
139  }
140 };
141 
142 inline std::string to_string(const Iterator& /* i */, rt::detail::adl::tag /*unused*/) { return "<bytes iterator>"; }
143 
144 inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
145  out << "<bytes iterator>";
146  return out;
147 }
148 
149 } // namespace bytes
150 
157 class Bytes : protected std::string {
158 public:
159  using Base = std::string;
161  using Base::const_reference;
162  using Base::reference;
163  using Offset = uint64_t;
164  using size_type = integer::safe<uint64_t>;
165 
166  using Base::Base;
167  using Base::data;
168 
178  Bytes(std::string s, bytes::Charset cs, bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE);
179 
180  Bytes(Base&& str) : Base(std::move(str)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
181  Bytes(const Bytes& xs) : Base(xs), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
182  Bytes(Bytes&& xs) noexcept : Base(std::move(xs)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
183 
191  Bytes& operator=(const Bytes& b) {
192  if ( &b == this )
193  return *this;
194 
195  invalidateIterators();
196  this->Base::operator=(b);
197  return *this;
198  }
199 
207  Bytes& operator=(Bytes&& b) noexcept {
208  invalidateIterators();
209  this->Base::operator=(std::move(b));
210  return *this;
211  }
212 
214  void append(const Bytes& d) { Base::append(d.str()); }
215 
217  void append(const stream::View& view);
218 
220  void append(const uint8_t x) { Base::append(1, static_cast<Base::value_type>(x)); }
221 
223  const std::string& str() const& { return *this; }
224 
226  const_iterator begin() const { return const_iterator(0U, _control); }
227 
229  const_iterator end() const { return const_iterator(size(), _control); }
230 
232  const_iterator at(Offset o) const { return begin() + o; }
233 
235  bool isEmpty() const { return empty(); }
236 
238  size_type size() const { return static_cast<int64_t>(std::string::size()); }
239 
246  const_iterator find(value_type b, const const_iterator& n = const_iterator()) const {
247  if ( auto i = Base::find(b, (n ? n - begin() : 0)); i != Base::npos )
248  return begin() + i;
249  else
250  return end();
251  }
252 
263  std::tuple<bool, const_iterator> find(const Bytes& v, const const_iterator& n = const_iterator()) const;
264 
272  Bytes sub(const const_iterator& from, const const_iterator& to) const {
273  return {substr(from - begin(), to - from)};
274  }
275 
282  Bytes sub(const const_iterator& to) const { return sub(begin(), to); }
283 
291  Bytes sub(Offset from, Offset to) const { return {substr(from, to - from)}; }
292 
299  Bytes sub(Offset to) const { return sub(0, to); }
300 
308  template<int N>
309  Bytes extract(unsigned char (&dst)[N]) const {
310  if ( N > size() )
311  throw InvalidArgument("insufficient data in source");
312 
313  memcpy(dst, data(), N);
314  return sub(N, std::string::npos);
315  }
316 
325  std::string decode(bytes::Charset cs,
326  bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE) const;
327 
329  bool startsWith(const Bytes& b) const { return hilti::rt::startsWith(*this, b); }
330 
340  Bytes upper(bytes::Charset cs, bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE) const {
341  return Bytes(hilti::rt::string::upper(decode(cs, errors), errors), cs, errors);
342  }
343 
351  Bytes lower(bytes::Charset cs, bytes::DecodeErrorStrategy errors = bytes::DecodeErrorStrategy::REPLACE) const {
352  return Bytes(hilti::rt::string::lower(decode(cs, errors), errors), cs, errors);
353  }
354 
363  Bytes strip(const Bytes& set, bytes::Side side = bytes::Side::Both) const;
364 
372  Bytes strip(bytes::Side side = bytes::Side::Both) const;
373 
376  Vector<Bytes> x;
377  for ( auto& v : hilti::rt::split(*this) )
378  x.emplace_back(Bytes::Base(v));
379  return x;
380  }
381 
386  std::tuple<Bytes, Bytes> split1() const {
387  auto p = hilti::rt::split1(str());
388  return std::make_tuple(p.first, p.second);
389  }
390 
392  Vector<Bytes> split(const Bytes& sep) const {
393  Vector<Bytes> x;
394  for ( auto& v : hilti::rt::split(*this, sep) )
395  x.push_back(Bytes::Base(v));
396  return x;
397  }
398 
406  std::tuple<Bytes, Bytes> split1(const Bytes& sep) const {
407  auto p = hilti::rt::split1(str(), sep);
408  return std::make_tuple(p.first, p.second);
409  }
410 
416  template<typename T>
417  Bytes join(const Vector<T>& parts) const {
418  Bytes rval;
419 
420  for ( size_t i = 0; i < parts.size(); ++i ) {
421  if ( i > 0 )
422  rval += *this;
423 
424  rval += Bytes(hilti::rt::to_string_for_print(parts[i]));
425  }
426 
427  return rval;
428  }
429 
437  integer::safe<int64_t> toInt(uint64_t base = 10) const;
438 
446  integer::safe<uint64_t> toUInt(uint64_t base = 10) const;
447 
455  int64_t toInt(hilti::rt::ByteOrder byte_order) const;
456 
464  uint64_t toUInt(hilti::rt::ByteOrder byte_order) const;
465 
473  Time toTime(uint64_t base = 10) const {
474  auto ns = ! isEmpty() ? toUInt(base) * integer::safe<uint64_t>(1'000'000'000) : integer::safe<uint64_t>(0);
475  return Time(ns, Time::NanosecondTag());
476  }
477 
485  Time toTime(hilti::rt::ByteOrder byte_order) const {
486  return Time(toUInt(byte_order) * integer::safe<uint64_t>(1'000'000'000), Time::NanosecondTag());
487  }
488 
496  Result<Bytes> match(const RegExp& re, unsigned int group = 0) const;
497 
498  // Add some operators over `Base`.
499  friend bool operator==(const Bytes& a, const Bytes& b) {
500  return static_cast<const Bytes::Base&>(a) == static_cast<const Bytes::Base&>(b);
501  }
502 
503  friend bool operator!=(const Bytes& a, const Bytes& b) { return ! (a == b); }
504 
505 
506  friend bool operator<(const Bytes& a, const Bytes& b) {
507  return static_cast<const Bytes::Base&>(a) < static_cast<const Bytes::Base&>(b);
508  }
509 
510  friend bool operator<=(const Bytes& a, const Bytes& b) {
511  return static_cast<const Bytes::Base&>(a) <= static_cast<const Bytes::Base&>(b);
512  }
513 
514  friend bool operator>(const Bytes& a, const Bytes& b) {
515  return static_cast<const Bytes::Base&>(a) > static_cast<const Bytes::Base&>(b);
516  }
517 
518  friend bool operator>=(const Bytes& a, const Bytes& b) {
519  return static_cast<const Bytes::Base&>(a) >= static_cast<const Bytes::Base&>(b);
520  }
521 
522  friend Bytes operator+(const Bytes& a, const Bytes& b) {
523  return static_cast<const Bytes::Base&>(a) + static_cast<const Bytes::Base&>(b);
524  }
525 
526 private:
527  friend bytes::Iterator;
528  std::shared_ptr<Base*> _control;
529 
530  void invalidateIterators() { _control = std::make_shared<Base*>(static_cast<Base*>(this)); }
531 };
532 
533 inline std::ostream& operator<<(std::ostream& out, const Bytes& x) {
534  out << escapeBytes(x.str(), false);
535  return out;
536 }
537 
538 namespace bytes {
539 inline namespace literals {
540 inline Bytes operator"" _b(const char* str, size_t size) { return Bytes(Bytes::Base(str, size)); }
541 } // namespace literals
542 } // namespace bytes
543 
544 template<>
545 inline std::string detail::to_string_for_print<Bytes>(const Bytes& x) {
546  return escapeBytes(x.str(), false);
547 }
548 
549 namespace detail::adl {
550 std::string to_string(const Bytes& x, adl::tag /*unused*/);
551 std::string to_string(const bytes::Side& x, adl::tag /*unused*/);
552 std::string to_string(const bytes::Charset& x, adl::tag /*unused*/);
553 std::string to_string(const bytes::DecodeErrorStrategy& x, adl::tag /*unused*/);
554 } // namespace detail::adl
555 
556 } // namespace hilti::rt
557 
558 // Disable JSON-ification of `Bytes`.
559 //
560 // As of nlohmann-json-0e694b4060ed55df980eaaebc2398b0ff24530d4 the JSON library misdetects the serialization for
561 // `Bytes` on some platforms. We see this on platfoms not providing a C++17-compliant (e.g., in Cirrus' `no-toolchain`
562 // task which uses gcc-9.3.0) where code in JSON wants to check whether `Bytes` can be converted to a
563 // `std::filesystem::path`, but then runs into compiler issues.
564 namespace nlohmann {
565 template<>
566 struct adl_serializer<hilti::rt::Bytes> {};
567 } // namespace nlohmann
Bytes upper(bytes::Charset cs, bytes::DecodeErrorStrategy errors=bytes::DecodeErrorStrategy::REPLACE) const
Definition: bytes.h:340
Bytes sub(const const_iterator &to) const
Definition: bytes.h:282
std::string to_string(T &&x)
Definition: extension-points.h:26
bool isEmpty() const
Definition: bytes.h:235
std::string to_string_for_print(const T &x)
Definition: extension-points.h:45
void append(const uint8_t x)
Definition: bytes.h:220
size_type size() const
Definition: bytes.h:238
Bytes & operator=(const Bytes &b)
Definition: bytes.h:191
Definition: bytes.h:45
Bytes sub(Offset from, Offset to) const
Definition: bytes.h:291
Definition: any.h:7
Bytes extract(unsigned char(&dst)[N]) const
Definition: bytes.h:309
Bytes sub(Offset to) const
Definition: bytes.h:299
std::tuple< Bytes, Bytes > split1(const Bytes &sep) const
Definition: bytes.h:406
Definition: regexp.h:125
Time toTime(uint64_t base=10) const
Definition: bytes.h:473
std::pair< std::string, std::string > split1(std::string s)
Definition: util.cc:146
Definition: bytes.h:157
bool startsWith(const std::string &s, const std::string &prefix)
Definition: util.cc:380
const_iterator begin() const
Definition: bytes.h:226
Definition: stream.h:1001
const_iterator end() const
Definition: bytes.h:229
std::vector< std::string_view > split(std::string_view s, std::string_view delim)
Definition: util.cc:102
bool startsWith(const Bytes &b) const
Definition: bytes.h:329
Definition: bytes.h:564
Bytes & operator=(Bytes &&b) noexcept
Definition: bytes.h:207
void append(const Bytes &d)
Definition: bytes.h:214
ByteOrder
Definition: util.h:503
Bytes sub(const const_iterator &from, const const_iterator &to) const
Definition: bytes.h:272
const std::string & str() const &
Definition: bytes.h:223
Bytes lower(bytes::Charset cs, bytes::DecodeErrorStrategy errors=bytes::DecodeErrorStrategy::REPLACE) const
Definition: bytes.h:351
Definition: extension-points.h:12
Definition: vector.h:251
std::tuple< Bytes, Bytes > split1() const
Definition: bytes.h:386
Vector< Bytes > split() const
Definition: bytes.h:375
Definition: time.h:23
Definition: time.h:20
const_iterator find(value_type b, const const_iterator &n=const_iterator()) const
Definition: bytes.h:246
Definition: result.h:67
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:13
const_iterator at(Offset o) const
Definition: bytes.h:232
Vector< Bytes > split(const Bytes &sep) const
Definition: bytes.h:392
Bytes join(const Vector< T > &parts) const
Definition: bytes.h:417