Spicy
parser.h
1 // Copyright (c) 2020-now by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <algorithm>
6 #include <string>
7 #include <string_view>
8 #include <type_traits>
9 #include <utility>
10 #include <vector>
11 
12 #include <hilti/rt/exception.h>
13 #include <hilti/rt/fiber.h>
14 #include <hilti/rt/result.h>
15 #include <hilti/rt/type-info.h>
16 #include <hilti/rt/types/bytes.h>
17 #include <hilti/rt/types/null.h>
18 #include <hilti/rt/types/port.h>
19 #include <hilti/rt/types/reference.h>
20 #include <hilti/rt/types/struct.h>
21 #include <hilti/rt/types/tuple.h>
22 #include <hilti/rt/util.h>
23 
24 #include <spicy/rt/filter.h>
25 #include <spicy/rt/global-state.h>
26 #include <spicy/rt/mime.h>
27 #include <spicy/rt/parser-fwd.h>
28 #include <spicy/rt/sink.h>
29 #include <spicy/rt/typedefs.h>
30 
31 namespace spicy::rt {
32 
34 HILTI_RT_ENUM(Direction, Originator, Responder, Both, Undef);
35 
36 } // namespace spicy::rt
37 
38 namespace hilti::rt::detail::adl {
39 
40 inline std::string to_string(const ::spicy::rt::Direction& x, adl::tag /*unused*/) {
41  switch ( x.value() ) {
42  case spicy::rt::Direction::Originator: return "originator";
43  case spicy::rt::Direction::Responder: return "responder";
44  case spicy::rt::Direction::Both: return "both";
45  case spicy::rt::Direction::Undef: return "undefined";
46  }
47 
49 };
50 
51 } // namespace hilti::rt::detail::adl
52 
53 namespace spicy::rt {
54 
55 inline std::ostream& operator<<(std::ostream& out, const Direction& d) { return out << hilti::rt::to_string(d); }
56 
58 struct ParserPort {
59  hilti::rt::Port port;
60  Direction direction;
61 
62  // Constructor used by code generator.
64  : port(hilti::rt::tuple::get<0>(args)), direction(hilti::rt::tuple::get<1>(args)) {}
65 };
66 
67 inline std::ostream& operator<<(std::ostream& out, const ParserPort& p) { return out << hilti::rt::to_string(p); }
68 
69 } // namespace spicy::rt
70 
71 namespace hilti::rt::detail::adl {
72 
73 inline std::string to_string(const spicy::rt::ParserPort& x, adl::tag /*unused*/) {
74  // TODO: Not sure why we need to explicit to_string() here.
75  if ( x.direction == spicy::rt::Direction::Both )
76  return x.port;
77  else
78  return fmt("%s (%s direction)", x.port, x.direction);
79 }
80 
81 } // namespace hilti::rt::detail::adl
82 
83 namespace spicy::rt {
84 
85 namespace detail {
86 
87 // Helper traits to detect whether a parser implements sink hooks.
88 
89 template<typename P>
90 struct has_on_gap {
91  template<typename U>
92  // If `->` gets wrapped to the next line cpplint misdetects this as a C-style cast.
93  // clang-format off
94  static auto test(int) -> decltype(
95  std::declval<U>().__on_0x25_gap(std::declval<uint64_t>(), std::declval<uint64_t>()), std::true_type());
96  // clang-format on
97  template<typename U>
98  static std::false_type test(...);
99  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
100 };
101 
102 template<typename P>
104  template<typename U>
105  static auto test(int) -> decltype(std::declval<U>().__on_0x25_skipped(std::declval<uint64_t>()), std::true_type());
106  template<typename U>
107  static std::false_type test(...);
108  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
109 };
110 
111 template<typename P>
113  template<typename U>
114  static auto test(int) -> decltype(std::declval<U>().__on_0x25_overlap(std::declval<uint64_t>(),
115  std::declval<const hilti::rt::Bytes&>(),
116  std::declval<const hilti::rt::Bytes&>()),
117  std::true_type());
118  template<typename U>
119  static std::false_type test(...);
120  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
121 };
122 
123 template<typename P>
125  template<typename U>
126  static auto test(int) -> decltype(std::declval<U>().__on_0x25_undelivered(std::declval<uint64_t>(),
127  std::declval<const hilti::rt::Bytes&>()),
128  std::true_type());
129  template<typename U>
130  static std::false_type test(...);
131  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
132 };
133 
134 } // namespace detail
135 
142 struct Parser {
143  Parser(std::string_view name, bool is_public, Parse1Function parse1, hilti::rt::any parse2, Parse3Function parse3,
144  ContextNewFunction context_new, const hilti::rt::TypeInfo* type, std::string description,
146  : name(name),
148  parse1(parse1),
149  parse2(std::move(parse2)),
150  parse3(parse3),
152  type_info(type),
153  description(std::move(description)),
154  mime_types(std::move(mime_types)),
155  ports(std::move(ports)) {
156  _initProfiling();
157  }
158 
159  Parser(std::string_view name, bool is_public, Parse1Function parse1, hilti::rt::any parse2, Parse3Function parse3,
160  hilti::rt::Null /* null */, const hilti::rt::TypeInfo* type, std::string description,
162  : name(name),
164  parse1(parse1),
165  parse2(std::move(parse2)),
166  parse3(parse3),
167  type_info(type),
168  description(std::move(description)),
169  mime_types(std::move(mime_types)),
170  ports(std::move(ports)) {
171  _initProfiling();
172  }
173 
174  Parser(std::string_view name, bool is_public, hilti::rt::Null /* null */, hilti::rt::any parse2,
175  hilti::rt::Null /* null */, hilti::rt::Null /* null */, const hilti::rt::TypeInfo* type,
177  : Parser(name, is_public, nullptr, std::move(parse2), nullptr, nullptr, type, std::move(description),
178  std::move(mime_types), std::move(ports)) {
179  _initProfiling();
180  }
181 
182  Parser(std::string_view name, bool is_public, hilti::rt::Null /* null */, hilti::rt::any parse2,
183  hilti::rt::Null /* null */, ContextNewFunction context_new, const hilti::rt::TypeInfo* type,
185  : Parser(name, is_public, nullptr, std::move(parse2), nullptr, context_new, type, std::move(description),
186  std::move(mime_types), std::move(ports)) {
187  _initProfiling();
188  }
189 
190  Parser(const Parser&) = default;
191 
192  Parser() = default;
193  ~Parser() = default;
194  Parser(Parser&&) noexcept = default;
195  Parser& operator=(const Parser&) = default;
196  Parser& operator=(Parser&&) noexcept = default;
197 
202  std::optional<UnitContext> createContext() const {
203  if ( context_new )
204  return (*context_new)();
205  else
206  return {};
207  }
208 
210  std::string_view name;
211 
213  bool is_public = false;
214 
220  uint64_t linker_scope = 0;
221 
227  Parse1Function parse1{};
228 
233  hilti::rt::any parse2;
234 
241  Parse3Function parse3{};
242 
247  ContextNewFunction context_new = nullptr;
248 
250  const hilti::rt::TypeInfo* type_info = nullptr;
251 
253  struct {
254  std::string prepare_block = "spicy/prepare/block/";
255  std::string prepare_input = "spicy/prepare/input/";
256  std::string prepare_stream = "spicy/prepare/stream/";
257 
258  operator bool() const {
259  // ensure initialization code has run
260  return ! prepare_input.empty();
261  }
263 
267  std::string description;
268 
273 
278 
283  detail::ParseSinkFunction __parse_sink = nullptr;
284 
286  void (*__hook_gap)(const hilti::rt::StrongReferenceGeneric&, uint64_t, uint64_t) = nullptr;
287 
290  const hilti::rt::Bytes&) = nullptr;
291 
293  void (*__hook_skipped)(const hilti::rt::StrongReferenceGeneric&, uint64_t) = nullptr;
294 
296  void (*__hook_undelivered)(const hilti::rt::StrongReferenceGeneric&, uint64_t, const hilti::rt::Bytes&) = nullptr;
297 
298 private:
299  void _initProfiling();
300 };
301 
303 inline auto parsers() {
304  const auto& parsers = detail::globalState()->parsers;
305 
306  std::vector<const Parser*> public_parsers;
307  std::ranges::copy_if(parsers, std::back_inserter(public_parsers), [](const auto& p) { return p->is_public; });
308 
309  return public_parsers;
310 }
311 
313 inline const auto& parserNames() { return detail::globalState()->parsers_by_name; }
314 
323 hilti::rt::Result<hilti::rt::Nothing> registerParserAlias(const std::string& parser, const std::string& alias);
324 
339 hilti::rt::Result<const spicy::rt::Parser*> lookupParser(const std::string& name = "",
340  const std::optional<uint64_t>& linker_scope = {});
341 
345 class ParseError : public hilti::rt::RecoverableFailure {
346 public:
347  ParseError(std::string_view msg, std::string_view location = "") : RecoverableFailure(msg, location) {}
348 
349  ParseError(const hilti::rt::result::Error& e) : RecoverableFailure(e.description()) {}
350 
351  ~ParseError() override; /* required to create vtable, see hilti::rt::Exception */
352 };
353 
359 class Backtrack : public ParseError {
360 public:
361  Backtrack() : ParseError("backtracking outside of &try scope") {}
362  ~Backtrack() override;
363 };
364 
365 class MissingData : public ParseError {
366 public:
367  MissingData(std::string_view location = "") : ParseError("missing data", location) {}
368  ~MissingData() override; /* required to create vtable, see hilti::rt::Exception */
369 };
370 
375 extern void accept_input();
376 
383 extern void decline_input(const std::string& reason);
384 
385 namespace detail {
386 
400 template<typename UnitRef>
401 inline void registerParser(::spicy::rt::Parser& p, // NOLINT(google-runtime-references)
402  uint64_t linker_scope, UnitRef /* not used, just for template instantiation */,
403  const hilti::rt::TypeInfo* /* utype */) {
404  // Note: This may may be called before spicy::rt::init(), and during
405  // hilti::rt::init(). Cannot rely on any library functionality being
406  // initialized yet.
407 
408  p.linker_scope = linker_scope;
409  globalState()->parsers.emplace_back(&p);
410 
411  using unit_type = typename UnitRef::element_type;
412 
414  ! std::is_base_of_v<hilti::rt::trait::hasParameters, unit_type> )
415  p.__parse_sink = []() {
416  auto unit = spicy::rt::UnitRef<unit_type>(unit_type());
417  return std::make_pair(hilti::rt::StrongReferenceGeneric(unit), spicy::rt::sink::detail::connectUnit(unit));
418  };
419 
420  if constexpr ( detail::has_on_gap<unit_type>::value )
421  p.__hook_gap = [](const hilti::rt::StrongReferenceGeneric& u, uint64_t seq, uint64_t len) -> void {
422  (u.as<unit_type>()->__on_0x25_gap)(seq, len);
423  };
424 
425  if constexpr ( detail::has_on_skipped<unit_type>::value )
426  p.__hook_skipped = [](const hilti::rt::StrongReferenceGeneric& u, uint64_t seq) -> void {
427  (u.as<unit_type>()->__on_0x25_skipped)(seq);
428  };
429 
430  if constexpr ( detail::has_on_overlap<unit_type>::value )
431  p.__hook_overlap = [](const hilti::rt::StrongReferenceGeneric& u, uint64_t seq, const hilti::rt::Bytes& old,
432  const hilti::rt::Bytes& new_) -> void {
433  (u.as<unit_type>()->__on_0x25_overlap)(seq, old, new_);
434  };
435 
436  if constexpr ( detail::has_on_undelivered<unit_type>::value )
437  p.__hook_undelivered = [](const hilti::rt::StrongReferenceGeneric& u, uint64_t seq,
438  const hilti::rt::Bytes& bytes) -> void {
439  (u.as<unit_type>()->__on_0x25_undelivered)(seq, bytes);
440  };
441 }
442 
447 void printParserState(std::string_view unit_id, const hilti::rt::ValueReference<hilti::rt::Stream>& data,
448  const std::optional<hilti::rt::stream::SafeConstIterator>& begin,
449  const hilti::rt::stream::View& cur, int64_t lahead,
450  const hilti::rt::stream::SafeConstIterator& lahead_end, std::string_view literal_mode, bool trim,
451  const std::optional<hilti::rt::RecoverableFailure>& error);
452 
464 extern bool waitForInputOrEod(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
465  const hilti::rt::stream::View& cur, uint64_t min,
467 
476 extern void waitForEod(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
477  const hilti::rt::stream::View& cur,
479 
497 extern void waitForInput(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
498  const hilti::rt::stream::View& cur, uint64_t min, std::string_view error_msg,
499  std::string_view location,
501 
514 extern bool waitForInputNoThrow(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
515  const hilti::rt::stream::View& cur, uint64_t min,
517 
528 extern bool waitForInputOrEod(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
529  const hilti::rt::stream::View& cur,
531 
545 extern void waitForInput(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
546  const hilti::rt::stream::View& cur, std::string_view error_msg, std::string_view location,
548 
558 
562 inline void backtrack() { throw Backtrack(); }
563 
575 std::optional<hilti::rt::stream::SafeConstIterator> unitFind(
577  const std::optional<hilti::rt::stream::SafeConstIterator>& i, const hilti::rt::Bytes& needle,
578  hilti::rt::stream::Direction d);
579 
594  uint64_t size, bool eod_ok, std::string_view location,
596 
608 void expectBytesLiteral(hilti::rt::ValueReference<hilti::rt::Stream>& data, const hilti::rt::stream::View& cur,
609  const hilti::rt::Bytes& literal, std::string_view location,
611 
612 } // namespace detail
613 } // namespace spicy::rt
Definition: bytes.h:234
Definition: port.h:21
Definition: result.h:71
Definition: reference.h:694
T * as() const
Definition: reference.h:708
Definition: reference.h:376
Definition: vector.h:260
Definition: result.h:18
Definition: stream.h:492
Definition: stream.h:1167
Definition: parser.h:359
Definition: parser.h:365
Definition: parser.h:345
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:13
std::string to_string(T &&x)
Definition: extension-points.h:26
std::tuple< std::optional< Ts >... > Tuple
Definition: tuple.h:24
void cannot_be_reached() __attribute__((noreturn))
Definition: util.cc:45
Definition: null.h:18
Definition: type-info.h:1273
Definition: parser.h:142
struct spicy::rt::Parser::@2 profiler_tags
ContextNewFunction context_new
Definition: parser.h:247
bool is_public
Definition: parser.h:213
Parse1Function parse1
Definition: parser.h:227
void(* __hook_undelivered)(const hilti::rt::StrongReferenceGeneric &, uint64_t, const hilti::rt::Bytes &)
Definition: parser.h:296
detail::ParseSinkFunction __parse_sink
Definition: parser.h:283
void(* __hook_skipped)(const hilti::rt::StrongReferenceGeneric &, uint64_t)
Definition: parser.h:293
hilti::rt::Vector< ParserPort > ports
Definition: parser.h:277
std::string_view name
Definition: parser.h:210
std::string description
Definition: parser.h:267
std::optional< UnitContext > createContext() const
Definition: parser.h:202
hilti::rt::any parse2
Definition: parser.h:233
void(* __hook_overlap)(const hilti::rt::StrongReferenceGeneric &, uint64_t, const hilti::rt::Bytes &, const hilti::rt::Bytes &)
Definition: parser.h:289
uint64_t linker_scope
Definition: parser.h:220
void(* __hook_gap)(const hilti::rt::StrongReferenceGeneric &, uint64_t, uint64_t)
Definition: parser.h:286
hilti::rt::Vector< MIMEType > mime_types
Definition: parser.h:272
const hilti::rt::TypeInfo * type_info
Definition: parser.h:250
Parse3Function parse3
Definition: parser.h:241
Definition: parser.h:58
Definition: parser.h:90
Definition: parser.h:112
Definition: parser.h:103