Spicy
parser.h
1 // Copyright (c) 2020-now by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <algorithm>
6 #include <string>
7 #include <string_view>
8 #include <type_traits>
9 #include <utility>
10 #include <vector>
11 
12 #include <hilti/rt/exception.h>
13 #include <hilti/rt/fiber.h>
14 #include <hilti/rt/result.h>
15 #include <hilti/rt/type-info.h>
16 #include <hilti/rt/types/bytes.h>
17 #include <hilti/rt/types/null.h>
18 #include <hilti/rt/types/port.h>
19 #include <hilti/rt/types/reference.h>
20 #include <hilti/rt/types/string.h>
21 #include <hilti/rt/types/struct.h>
22 #include <hilti/rt/types/tuple.h>
23 #include <hilti/rt/util.h>
24 
25 #include <spicy/rt/filter.h>
26 #include <spicy/rt/global-state.h>
27 #include <spicy/rt/mime.h>
28 #include <spicy/rt/parser-fwd.h>
29 #include <spicy/rt/sink.h>
30 #include <spicy/rt/typedefs.h>
31 
32 namespace spicy::rt {
33 
35 HILTI_RT_ENUM(Direction, Originator, Responder, Both);
36 
37 } // namespace spicy::rt
38 
39 namespace hilti::rt::detail::adl {
40 
41 inline std::string to_string(const ::spicy::rt::Direction& x, adl::tag /*unused*/) {
42  switch ( x.value() ) {
43  case spicy::rt::Direction::Originator: return "originator";
44  case spicy::rt::Direction::Responder: return "responder";
45  case spicy::rt::Direction::Both: return "both";
46  case spicy::rt::Direction::Undef: return "undefined";
47  }
48 
50 };
51 
52 } // namespace hilti::rt::detail::adl
53 
54 namespace spicy::rt {
55 
56 inline std::ostream& operator<<(std::ostream& out, const Direction& d) { return out << hilti::rt::to_string(d); }
57 
59 struct ParserPort {
60  hilti::rt::Port port;
61  Direction direction;
62 
63  // Constructor used by code generator.
65  : port(hilti::rt::tuple::get<0>(args)), direction(hilti::rt::tuple::get<1>(args)) {}
66 };
67 
68 inline std::ostream& operator<<(std::ostream& out, const ParserPort& p) { return out << hilti::rt::to_string(p); }
69 
70 } // namespace spicy::rt
71 
72 namespace hilti::rt::detail::adl {
73 
74 inline std::string to_string(const spicy::rt::ParserPort& x, adl::tag /*unused*/) {
75  // TODO: Not sure why we need to explicit to_string() here.
76  if ( x.direction == spicy::rt::Direction::Both )
77  return std::string(static_cast<hilti::rt::String>(x.port));
78  else
79  return fmt("%s (%s direction)", x.port, x.direction);
80 }
81 
82 } // namespace hilti::rt::detail::adl
83 
84 namespace spicy::rt {
85 
86 namespace detail {
87 
88 // Helper traits to detect whether a parser implements sink hooks.
89 
90 template<typename P>
91 struct has_on_gap {
92  template<typename U>
93  // If `->` gets wrapped to the next line cpplint misdetects this as a C-style cast.
94  // NOLINTNEXTLINE(readability/casting)
95  static auto test(int)
96  -> decltype(std::declval<U>().HILTI_INTERNAL(on_0x25_gap)(std::declval<uint64_t>(), std::declval<uint64_t>()),
97  std::true_type());
98  template<typename U>
99  static std::false_type test(...);
100  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
101 };
102 
103 template<typename P>
105  template<typename U>
106  // NOLINTNEXTLINE(readability/casting)
107  static auto test(int)
108  -> decltype(std::declval<U>().HILTI_INTERNAL(on_0x25_skipped)(std::declval<uint64_t>()), std::true_type());
109  template<typename U>
110  static std::false_type test(...);
111  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
112 };
113 
114 template<typename P>
116  template<typename U>
117  // NOLINTNEXTLINE(readability/casting)
118  static auto test(int)
119  -> decltype(std::declval<U>().HILTI_INTERNAL(on_0x25_overlap)(std::declval<uint64_t>(),
120  std::declval<const hilti::rt::Bytes&>(),
121  std::declval<const hilti::rt::Bytes&>()),
122  std::true_type());
123  template<typename U>
124  static std::false_type test(...);
125  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
126 };
127 
128 template<typename P>
130  template<typename U>
131  // NOLINTNEXTLINE(readability/casting)
132  static auto test(int)
133  -> decltype(std::declval<U>().HILTI_INTERNAL(on_0x25_undelivered)(std::declval<uint64_t>(),
134  std::declval<const hilti::rt::Bytes&>()),
135  std::true_type());
136  template<typename U>
137  static std::false_type test(...);
138  static constexpr bool value = std::is_same_v<decltype(test<P>(0)), std::true_type>;
139 };
140 
141 } // namespace detail
142 
149 struct Parser {
151  std::string_view name,
152  bool is_public,
153  Parse1Function parse1,
154  hilti::rt::any parse2,
155  Parse3Function parse3,
156  ContextNewFunction context_new,
157  const hilti::rt::TypeInfo* type,
161  : name(name),
163  parse1(parse1),
164  parse2(std::move(parse2)),
165  parse3(parse3),
167  type_info(type),
168  description(std::move(description)),
169  mime_types(std::move(mime_types)),
170  ports(std::move(ports)) {
171  _initProfiling();
172  }
173 
175  std::string_view name,
176  bool is_public,
177  Parse1Function parse1,
178  hilti::rt::any parse2,
179  Parse3Function parse3,
180  hilti::rt::Null /* null */,
181  const hilti::rt::TypeInfo* type,
185  : name(name),
187  parse1(parse1),
188  parse2(std::move(parse2)),
189  parse3(parse3),
190  type_info(type),
191  description(std::move(description)),
192  mime_types(std::move(mime_types)),
193  ports(std::move(ports)) {
194  _initProfiling();
195  }
196 
198  std::string_view name,
199  bool is_public,
200  hilti::rt::Null /* null */,
201  hilti::rt::any parse2,
202  hilti::rt::Null /* null */,
203  hilti::rt::Null /* null */,
204  const hilti::rt::TypeInfo* type,
209  name,
210  is_public,
211  nullptr,
212  std::move(parse2),
213  nullptr,
214  nullptr,
215  type,
216  std::move(description),
217  std::move(mime_types),
218  std::move(ports)) {
219  _initProfiling();
220  }
221 
223  std::string_view name,
224  bool is_public,
225  hilti::rt::Null /* null */,
226  hilti::rt::any parse2,
227  hilti::rt::Null /* null */,
228  ContextNewFunction context_new,
229  const hilti::rt::TypeInfo* type,
234  name,
235  is_public,
236  nullptr,
237  std::move(parse2),
238  nullptr,
239  context_new,
240  type,
241  std::move(description),
242  std::move(mime_types),
243  std::move(ports)) {
244  _initProfiling();
245  }
246 
247  Parser(const Parser&) = default;
248 
249  Parser() = default;
250  ~Parser() = default;
251  Parser(Parser&&) noexcept = default;
252  Parser& operator=(const Parser&) = default;
253  Parser& operator=(Parser&&) noexcept = default;
254 
260  if ( context_new )
261  return (*context_new)();
262  else
263  return {};
264  }
265 
268 
270  bool is_public = false;
271 
277  uint64_t linker_scope = 0;
278 
284  Parse1Function parse1{};
285 
290  hilti::rt::any parse2;
291 
298  Parse3Function parse3{};
299 
304  ContextNewFunction context_new = nullptr;
305 
307  const hilti::rt::TypeInfo* type_info = nullptr;
308 
310  struct {
311  std::string prepare_block = "spicy/prepare/block/";
312  std::string prepare_input = "spicy/prepare/input/";
313  std::string prepare_stream = "spicy/prepare/stream/";
314 
315  operator bool() const {
316  // ensure initialization code has run
317  return ! prepare_input.empty();
318  }
320 
325 
330 
335 
340  detail::ParseSinkFunction __parse_sink = nullptr;
341 
343  void (*__hook_gap)(const hilti::rt::StrongReferenceGeneric&, uint64_t, uint64_t) = nullptr;
344 
347  uint64_t,
348  const hilti::rt::Bytes&,
349  const hilti::rt::Bytes&) = nullptr;
350 
352  void (*__hook_skipped)(const hilti::rt::StrongReferenceGeneric&, uint64_t) = nullptr;
353 
355  void (*__hook_undelivered)(const hilti::rt::StrongReferenceGeneric&, uint64_t, const hilti::rt::Bytes&) = nullptr;
356 
357 private:
358  void _initProfiling();
359 };
360 
362 inline auto parsers() {
363  const auto& parsers = detail::globalState()->parsers;
364 
365  std::vector<const Parser*> public_parsers;
366  std::ranges::copy_if(parsers, std::back_inserter(public_parsers), [](const auto& p) { return p->is_public; });
367 
368  return public_parsers;
369 }
370 
372 inline const auto& parserNames() { return detail::globalState()->parsers_by_name; }
373 
382 hilti::rt::Result<hilti::rt::Nothing> registerParserAlias(const hilti::rt::String& parser,
383  const hilti::rt::String& alias);
384 
400  const hilti::rt::Optional<uint64_t>& linker_scope = {});
401 
405 class ParseError : public hilti::rt::RecoverableFailure {
406 public:
407  ParseError(std::string_view msg, std::string_view location = "") : RecoverableFailure(msg, location) {}
408 
409  ParseError(const hilti::rt::result::Error& e) : RecoverableFailure(e.description()) {}
410 
411  ~ParseError() override; /* required to create vtable, see hilti::rt::Exception */
412 };
413 
419 class Backtrack : public ParseError {
420 public:
421  Backtrack() : ParseError("backtracking outside of &try scope") {}
422  ~Backtrack() override;
423 };
424 
425 class MissingData : public ParseError {
426 public:
427  MissingData(std::string_view location = "") : ParseError("missing data", location) {}
428  ~MissingData() override; /* required to create vtable, see hilti::rt::Exception */
429 };
430 
435 extern void accept_input();
436 
443 extern void decline_input(const hilti::rt::String& reason);
444 
445 namespace detail {
446 
460 template<typename UnitRef>
461 inline void registerParser(::spicy::rt::Parser& p, // NOLINT(google-runtime-references)
462  uint64_t linker_scope,
463  UnitRef /* not used, just for template instantiation */,
464  const hilti::rt::TypeInfo* /* utype */) {
465  // Note: This may may be called before spicy::rt::init(), and during
466  // hilti::rt::init(). Cannot rely on any library functionality being
467  // initialized yet.
468 
469  p.linker_scope = linker_scope;
470  globalState()->parsers.emplace_back(&p);
471 
472  using unit_type = typename UnitRef::element_type;
473 
475  ! std::is_base_of_v<hilti::rt::trait::hasParameters, unit_type> )
476  p.__parse_sink = []() {
477  auto unit = spicy::rt::UnitRef<unit_type>(unit_type());
478  return std::make_pair(hilti::rt::StrongReferenceGeneric(unit), spicy::rt::sink::detail::connectUnit(unit));
479  };
480 
481  if constexpr ( detail::has_on_gap<unit_type>::value )
482  p.__hook_gap = [](const hilti::rt::StrongReferenceGeneric& u, uint64_t seq, uint64_t len) -> void {
483  (u.as<unit_type>()->HILTI_INTERNAL(on_0x25_gap))(seq, len);
484  };
485 
486  if constexpr ( detail::has_on_skipped<unit_type>::value )
487  p.__hook_skipped = [](const hilti::rt::StrongReferenceGeneric& u, uint64_t seq) -> void {
488  (u.as<unit_type>()->HILTI_INTERNAL(on_0x25_skipped))(seq);
489  };
490 
491  if constexpr ( detail::has_on_overlap<unit_type>::value )
493  uint64_t seq,
494  const hilti::rt::Bytes& old,
495  const hilti::rt::Bytes& new_) -> void {
496  (u.as<unit_type>()->HILTI_INTERNAL(on_0x25_overlap))(seq, old, new_);
497  };
498 
499  if constexpr ( detail::has_on_undelivered<unit_type>::value )
501  [](const hilti::rt::StrongReferenceGeneric& u, uint64_t seq, const hilti::rt::Bytes& bytes) -> void {
502  (u.as<unit_type>()->HILTI_INTERNAL(on_0x25_undelivered))(seq, bytes);
503  };
504 }
505 
510 void printParserState(std::string_view unit_id,
513  const hilti::rt::stream::View& cur,
514  int64_t lahead,
515  const hilti::rt::stream::SafeConstIterator& lahead_end,
516  std::string_view literal_mode,
517  bool trim,
519 
531 extern bool waitForInputOrEod(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
532  const hilti::rt::stream::View& cur,
533  uint64_t min,
535 
544 extern void waitForEod(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
545  const hilti::rt::stream::View& cur,
547 
565 extern void waitForInput(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
566  const hilti::rt::stream::View& cur,
567  uint64_t min,
568  std::string_view error_msg,
569  std::string_view location,
571 
584 extern bool waitForInputNoThrow(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
585  const hilti::rt::stream::View& cur,
586  uint64_t min,
588 
599 extern bool waitForInputOrEod(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
600  const hilti::rt::stream::View& cur,
602 
616 extern void waitForInput(hilti::rt::ValueReference<hilti::rt::Stream>& data, // NOLINT(google-runtime-references)
617  const hilti::rt::stream::View& cur,
618  std::string_view error_msg,
619  std::string_view location,
621 
629 extern bool atEod(hilti::rt::ValueReference<hilti::rt::Stream>& data,
630  const hilti::rt::stream::View& cur,
632 
636 inline void backtrack() { throw Backtrack(); }
637 
653  const hilti::rt::Bytes& needle,
654  hilti::rt::stream::Direction d);
655 
670  const hilti::rt::stream::View& cur,
671  uint64_t size,
672  bool eod_ok,
673  std::string_view location,
675 
687 void expectBytesLiteral(hilti::rt::ValueReference<hilti::rt::Stream>& data,
688  const hilti::rt::stream::View& cur,
689  const hilti::rt::Bytes& literal,
690  std::string_view location,
692 
693 } // namespace detail
694 } // namespace spicy::rt
Definition: bytes.h:235
Definition: optional.h:33
Definition: port.h:20
Definition: result.h:73
Definition: string.h:31
Definition: reference.h:717
T * as() const
Definition: reference.h:731
Definition: reference.h:399
Definition: tuple.h:115
Definition: vector.h:281
Definition: result.h:18
Definition: stream.h:492
Definition: stream.h:1172
Definition: parser.h:419
Definition: parser.h:425
Definition: parser.h:405
std::string fmt(const char *fmt, const Args &... args)
Definition: fmt.h:17
void cannot_be_reached()
Definition: util.cc:53
std::string to_string(T &&x)
Definition: extension-points.h:26
Definition: null.h:20
Definition: type-info.h:1276
Definition: struct.h:32
Definition: parser.h:149
struct spicy::rt::Parser::@2 profiler_tags
ContextNewFunction context_new
Definition: parser.h:304
hilti::rt::String name
Definition: parser.h:267
bool is_public
Definition: parser.h:270
Parse1Function parse1
Definition: parser.h:284
void(* __hook_undelivered)(const hilti::rt::StrongReferenceGeneric &, uint64_t, const hilti::rt::Bytes &)
Definition: parser.h:355
hilti::rt::String description
Definition: parser.h:324
detail::ParseSinkFunction __parse_sink
Definition: parser.h:340
void(* __hook_skipped)(const hilti::rt::StrongReferenceGeneric &, uint64_t)
Definition: parser.h:352
hilti::rt::Vector< ParserPort > ports
Definition: parser.h:334
hilti::rt::Optional< UnitContext > createContext() const
Definition: parser.h:259
hilti::rt::any parse2
Definition: parser.h:290
void(* __hook_overlap)(const hilti::rt::StrongReferenceGeneric &, uint64_t, const hilti::rt::Bytes &, const hilti::rt::Bytes &)
Definition: parser.h:346
uint64_t linker_scope
Definition: parser.h:277
void(* __hook_gap)(const hilti::rt::StrongReferenceGeneric &, uint64_t, uint64_t)
Definition: parser.h:343
hilti::rt::Vector< MIMEType > mime_types
Definition: parser.h:329
const hilti::rt::TypeInfo * type_info
Definition: parser.h:307
Parse3Function parse3
Definition: parser.h:298
Definition: parser.h:59
Definition: parser.h:91
Definition: parser.h:115
Definition: parser.h:104