Spicy
bytes.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <cstring>
6 #include <memory>
7 #include <string>
8 #include <tuple>
9 #include <utility>
10 
11 #include <hilti/rt/extension-points.h>
12 #include <hilti/rt/iterator.h>
13 #include <hilti/rt/result.h>
14 #include <hilti/rt/types/string.h>
15 #include <hilti/rt/types/time.h>
16 #include <hilti/rt/types/vector.h>
17 #include <hilti/rt/util.h>
18 
19 namespace hilti::rt {
20 
21 class Bytes;
22 class RegExp;
23 
24 namespace stream {
25 class View;
26 }
27 
28 namespace bytes {
29 
31 enum class Side {
32  Left,
33  Right,
34  Both
35 };
36 
38 enum class Charset { Undef, UTF8, ASCII };
39 
40 class Iterator {
41  using B = std::string;
42  using difference_type = B::const_iterator::difference_type;
43 
44  std::weak_ptr<B*> _control;
45  typename B::size_type _index = 0;
46 
47 public:
48  Iterator() = default;
49 
50  Iterator(typename B::size_type index, const std::weak_ptr<B*> control)
51  : _control(control), _index(std::move(index)) {}
52 
53  uint8_t operator*() const {
54  if ( auto&& l = _control.lock() ) {
55  auto&& data = static_cast<B&>(**l);
56 
57  if ( _index >= data.size() )
58  throw IndexError(fmt("index %s out of bounds", _index));
59 
60  return data[_index];
61  }
62 
63  throw InvalidIterator("bound object has expired");
64  }
65 
66  template<typename T>
67  auto& operator+=(const hilti::rt::integer::safe<T>& n) {
68  return *this += n.Ref();
69  }
70 
71  auto& operator+=(uint64_t n) {
72  _index += n;
73  return *this;
74  }
75 
76  template<typename T>
77  auto operator+(const hilti::rt::integer::safe<T>& n) const {
78  return *this + n.Ref();
79  }
80 
81  template<typename T>
82  auto operator+(const T& n) const {
83  return Iterator{_index + n, _control};
84  }
85 
86  explicit operator bool() const { return static_cast<bool>(_control.lock()); }
87 
88  auto& operator++() {
89  ++_index;
90  return *this;
91  }
92 
93  auto operator++(int) {
94  auto result = *this;
95  ++_index;
96  return result;
97  }
98 
99  friend auto operator==(const Iterator& a, const Iterator& b) {
100  if ( a._control.lock() != b._control.lock() )
101  throw InvalidArgument("cannot compare iterators into different bytes");
102  return a._index == b._index;
103  }
104 
105  friend bool operator!=(const Iterator& a, const Iterator& b) { return ! (a == b); }
106 
107  friend auto operator<(const Iterator& a, const Iterator& b) {
108  if ( a._control.lock() != b._control.lock() )
109  throw InvalidArgument("cannot compare iterators into different bytes");
110  return a._index < b._index;
111  }
112 
113  friend auto operator<=(const Iterator& a, const Iterator& b) {
114  if ( a._control.lock() != b._control.lock() )
115  throw InvalidArgument("cannot compare iterators into different bytes");
116  return a._index <= b._index;
117  }
118 
119  friend auto operator>(const Iterator& a, const Iterator& b) {
120  if ( a._control.lock() != b._control.lock() )
121  throw InvalidArgument("cannot compare iterators into different bytes");
122  return a._index > b._index;
123  }
124 
125  friend auto operator>=(const Iterator& a, const Iterator& b) {
126  if ( a._control.lock() != b._control.lock() )
127  throw InvalidArgument("cannot compare iterators into different bytes");
128  return a._index >= b._index;
129  }
130 
131  friend difference_type operator-(const Iterator& a, const Iterator& b) {
132  if ( a._control.lock() != b._control.lock() )
133  throw InvalidArgument("cannot perform arithmetic with iterators into different bytes");
134  return a._index - b._index;
135  }
136 };
137 
138 inline std::string to_string(const Iterator& /* i */, rt::detail::adl::tag /*unused*/) { return "<bytes iterator>"; }
139 
140 inline std::ostream& operator<<(std::ostream& out, const Iterator& /* x */) {
141  out << "<bytes iterator>";
142  return out;
143 }
144 
145 } // namespace bytes
146 
153 class Bytes : protected std::string {
154 public:
155  using Base = std::string;
157  using Base::const_reference;
158  using Base::reference;
159  using Offset = uint64_t;
160 
161  using Base::Base;
162  using Base::data;
163 
172  Bytes(std::string s, bytes::Charset cs);
173 
174  Bytes(Base&& str) : Base(std::move(str)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
175  Bytes(const Bytes& xs) : Base(xs), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
176  Bytes(Bytes&& xs) : Base(std::move(xs)), _control(std::make_shared<Base*>(static_cast<Base*>(this))) {}
177 
185  Bytes& operator=(const Bytes& b) {
186  invalidateIterators();
187  this->Base::operator=(b);
188  return *this;
189  }
190 
199  invalidateIterators();
200  this->Base::operator=(std::move(b));
201  return *this;
202  }
203 
205  void append(const Bytes& d) { Base::append(d.str()); }
206 
208  void append(const stream::View& view);
209 
211  void append(const uint8_t x) { Base::append(1, x); }
212 
214  const std::string& str() const& { return *this; }
215 
217  const_iterator begin() const { return const_iterator(0u, _control); }
218 
220  const_iterator end() const { return const_iterator(size(), _control); }
221 
223  const_iterator at(Offset o) const { return begin() + o; }
224 
226  bool isEmpty() const { return empty(); }
227 
229  int64_t size() const { return static_cast<int64_t>(std::string::size()); }
230 
237  const_iterator find(value_type b, const const_iterator& n = const_iterator()) const {
238  if ( auto i = Base::find(b, (n ? n - begin() : 0)); i != Base::npos )
239  return begin() + i;
240  else
241  return end();
242  }
243 
254  std::tuple<bool, const_iterator> find(const Bytes& v, const const_iterator& n = const_iterator()) const;
255 
263  Bytes sub(const const_iterator& from, const const_iterator& to) const {
264  return {substr(from - begin(), to - from)};
265  }
266 
273  Bytes sub(const const_iterator& to) const { return sub(begin(), to); }
274 
282  Bytes sub(Offset from, Offset to) const { return {substr(from, to - from)}; }
283 
290  Bytes sub(Offset to) const { return sub(0, to); }
291 
299  template<int N>
300  Bytes extract(unsigned char (&dst)[N]) const {
301  if ( N > size() )
302  throw InvalidArgument("insufficient data in source");
303 
304  memcpy(dst, data(), N);
305  return sub(N, std::string::npos);
306  }
307 
315  std::string decode(bytes::Charset cs) const;
316 
318  bool startsWith(const Bytes& b) const { return hilti::rt::startsWith(*this, b); }
319 
328  Bytes upper(bytes::Charset cs) const { return Bytes(hilti::rt::string::upper(decode(cs)), cs); }
329 
335  Bytes lower(bytes::Charset cs) const { return Bytes(hilti::rt::string::lower(decode(cs)), cs); }
336 
345  Bytes strip(const Bytes& set, bytes::Side side = bytes::Side::Both) const;
346 
354  Bytes strip(bytes::Side side = bytes::Side::Both) const;
355 
358  Vector<Bytes> x;
359  for ( auto& v : hilti::rt::split(*this) )
360  x.emplace_back(Bytes::Base(v));
361  return x;
362  }
363 
368  std::tuple<Bytes, Bytes> split1() const {
369  auto p = hilti::rt::split1(str());
370  return std::make_tuple(p.first, p.second);
371  }
372 
374  Vector<Bytes> split(const Bytes& sep) const {
375  Vector<Bytes> x;
376  for ( auto& v : hilti::rt::split(*this, sep) )
377  x.push_back(Bytes::Base(v));
378  return x;
379  }
380 
388  std::tuple<Bytes, Bytes> split1(const Bytes& sep) const {
389  auto p = hilti::rt::split1(str(), sep);
390  return std::make_tuple(p.first, p.second);
391  }
392 
398  template<typename T>
399  Bytes join(const Vector<T>& parts) const {
400  Bytes rval;
401 
402  for ( size_t i = 0; i < parts.size(); ++i ) {
403  if ( i > 0 )
404  rval += *this;
405 
406  rval += Bytes(hilti::rt::to_string_for_print(parts[i]).data());
407  }
408 
409  return rval;
410  }
411 
419  integer::safe<int64_t> toInt(uint64_t base = 10) const;
420 
428  integer::safe<uint64_t> toUInt(uint64_t base = 10) const;
429 
437  int64_t toInt(hilti::rt::ByteOrder byte_order) const;
438 
446  uint64_t toUInt(hilti::rt::ByteOrder byte_order) const;
447 
455  Time toTime(uint64_t base = 10) const {
456  auto ns = ! isEmpty() ? toUInt(base) * integer::safe<uint64_t>(1'000'000'000) : integer::safe<uint64_t>(0);
457  return Time(ns, Time::NanosecondTag());
458  }
459 
467  Time toTime(hilti::rt::ByteOrder byte_order) const {
468  return Time(toUInt(byte_order) * integer::safe<uint64_t>(1'000'000'000), Time::NanosecondTag());
469  }
470 
478  Result<Bytes> match(const RegExp& re, unsigned int group = 0) const;
479 
480  // Add some operators over `Base`.
481  friend bool operator==(const Bytes& a, const Bytes& b) {
482  return static_cast<const Bytes::Base&>(a) == static_cast<const Bytes::Base&>(b);
483  }
484 
485  friend bool operator!=(const Bytes& a, const Bytes& b) { return ! (a == b); }
486 
487 
488  friend bool operator<(const Bytes& a, const Bytes& b) {
489  return static_cast<const Bytes::Base&>(a) < static_cast<const Bytes::Base&>(b);
490  }
491 
492  friend bool operator<=(const Bytes& a, const Bytes& b) {
493  return static_cast<const Bytes::Base&>(a) <= static_cast<const Bytes::Base&>(b);
494  }
495 
496  friend bool operator>(const Bytes& a, const Bytes& b) {
497  return static_cast<const Bytes::Base&>(a) > static_cast<const Bytes::Base&>(b);
498  }
499 
500  friend bool operator>=(const Bytes& a, const Bytes& b) {
501  return static_cast<const Bytes::Base&>(a) >= static_cast<const Bytes::Base&>(b);
502  }
503 
504  friend Bytes operator+(const Bytes& a, const Bytes& b) {
505  return static_cast<const Bytes::Base&>(a) + static_cast<const Bytes::Base&>(b);
506  }
507 
508 private:
509  friend bytes::Iterator;
510  std::shared_ptr<Base*> _control;
511 
512  void invalidateIterators() { _control = std::make_shared<Base*>(static_cast<Base*>(this)); }
513 };
514 
515 inline std::ostream& operator<<(std::ostream& out, const Bytes& x) {
516  out << escapeBytes(x.str(), false);
517  return out;
518 }
519 
520 namespace bytes {
521 inline namespace literals {
522 inline Bytes operator"" _b(const char* str, size_t size) { return Bytes(Bytes::Base(str, size)); }
523 } // namespace literals
524 } // namespace bytes
525 
526 template<>
527 inline std::string detail::to_string_for_print<Bytes>(const Bytes& x) {
528  return escapeBytes(x.str(), false);
529 }
530 
531 namespace detail::adl {
532 std::string to_string(const Bytes& x, adl::tag /*unused*/);
533 std::string to_string(const bytes::Side& x, adl::tag /*unused*/);
534 std::string to_string(const bytes::Charset& x, adl::tag /*unused*/);
535 } // namespace detail::adl
536 
537 } // namespace hilti::rt
ByteOrder
Definition: util.h:515
Bytes sub(const const_iterator &to) const
Definition: bytes.h:273
std::string to_string(T &&x)
Definition: extension-points.h:26
bool isEmpty() const
Definition: bytes.h:226
std::string to_string_for_print(const T &x)
Definition: extension-points.h:45
void append(const uint8_t x)
Definition: bytes.h:211
Bytes & operator=(const Bytes &b)
Definition: bytes.h:185
Definition: bytes.h:40
Bytes sub(Offset from, Offset to) const
Definition: bytes.h:282
Definition: any.h:7
Bytes extract(unsigned char(&dst)[N]) const
Definition: bytes.h:300
Bytes sub(Offset to) const
Definition: bytes.h:290
std::tuple< Bytes, Bytes > split1(const Bytes &sep) const
Definition: bytes.h:388
Definition: regexp.h:117
Time toTime(uint64_t base=10) const
Definition: bytes.h:455
std::pair< std::string, std::string > split1(std::string s)
Definition: util.cc:156
Definition: bytes.h:153
bool startsWith(const std::string &s, const std::string &prefix)
Definition: util.h:200
const_iterator begin() const
Definition: bytes.h:217
int64_t size() const
Definition: bytes.h:229
Bytes upper(bytes::Charset cs) const
Definition: bytes.h:328
Definition: stream.h:978
const_iterator end() const
Definition: bytes.h:220
std::vector< std::string_view > split(std::string_view s, std::string_view delim)
Definition: util.cc:112
bool startsWith(const Bytes &b) const
Definition: bytes.h:318
void append(const Bytes &d)
Definition: bytes.h:205
Bytes sub(const const_iterator &from, const const_iterator &to) const
Definition: bytes.h:263
const std::string & str() const &
Definition: bytes.h:214
Bytes & operator=(Bytes &&b)
Definition: bytes.h:198
Definition: extension-points.h:12
Definition: vector.h:249
std::tuple< Bytes, Bytes > split1() const
Definition: bytes.h:368
Vector< Bytes > split() const
Definition: bytes.h:357
Definition: time.h:23
Definition: time.h:20
const_iterator find(value_type b, const const_iterator &n=const_iterator()) const
Definition: bytes.h:237
Definition: result.h:67
Bytes lower(bytes::Charset cs) const
Definition: bytes.h:335
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:13
const_iterator at(Offset o) const
Definition: bytes.h:223
Vector< Bytes > split(const Bytes &sep) const
Definition: bytes.h:374
Bytes join(const Vector< T > &parts) const
Definition: bytes.h:399