Spicy
sink.h
1 // Copyright (c) 2020-2021 by the Zeek Project. See LICENSE for details.
2 
3 #pragma once
4 
5 #include <hilti/ast/operator.h>
6 #include <hilti/ast/operators/common.h>
7 #include <hilti/ast/types/bool.h>
8 #include <hilti/ast/types/bytes.h>
9 #include <hilti/ast/types/enum.h>
10 #include <hilti/ast/types/library.h>
11 #include <hilti/ast/types/reference.h>
12 #include <hilti/ast/types/void.h>
13 
14 #include <spicy/ast/types/sink.h>
15 #include <spicy/ast/types/unit.h>
16 
17 namespace spicy::operator_ {
18 
19 STANDARD_OPERATOR_1x(sink, SizeValue, Size, type::UnsignedInteger(64), type::constant(type::Sink()), R"(
20 Returns the number of bytes written into the sink so far. If the sink has
21 filters attached, this returns the value after filtering.
22 )");
23 
24 STANDARD_OPERATOR_1x(sink, SizeReference, Size, type::UnsignedInteger(64), hilti::type::StrongReference(type::Sink()),
25  R"(
26 Returns the number of bytes written into the referenced sink so far. If the sink has
27 filters attached, this returns the value after filtering.
28 )");
29 
30 BEGIN_METHOD(sink, Close)
31  auto signature() const {
33  .result = type::Void(),
34  .id = "close",
35  .args = {},
36  .doc = R"(
37 Closes a sink by disconnecting all parsing units. Afterwards the sink's state
38 is as if it had just been created (so new units can be connected). Note that a
39 sink is automatically closed when the unit it is part of is done parsing. Also
40 note that a previously connected parsing unit can *not* be reconnected; trying
41 to do so will still throw a ``UnitAlreadyConnected`` exception.
42 )"};
43  }
44 END_METHOD
45 
46 BEGIN_METHOD(sink, Connect)
47  auto signature() const {
49  .result = type::Void(),
50  .id = "connect",
51  .args = {{.id = "u",
52  .type = type::StrongReference(type::Unit(type::Wildcard()))}},
53  .doc = R"(
54 Connects a parsing unit to a sink. All subsequent write operations to the sink will pass their
55 data on to this parsing unit. Each unit can only be connected to a single sink. If
56 the unit is already connected, a ``UnitAlreadyConnected`` exception is thrown.
57 However, a sink can have more than one unit connected to it.
58 )"};
59  }
60 END_METHOD
61 
62 BEGIN_METHOD(sink, ConnectMIMETypeString)
63  auto signature() const {
65  .result = type::Void(),
66  .id = "connect_mime_type",
67  .args = {{.id = "mt", .type = type::String()}},
68  .doc = R"(
69 Connects parsing units to a sink for all parsers that support a given MIME
70 type. All subsequent write operations to the sink will pass their data on to
71 these parsing units. The MIME type may have wildcards for type or subtype, and
72 the method will then connect units for all matching parsers.
73 )"};
74  }
75 END_METHOD
76 
77 BEGIN_METHOD(sink, ConnectMIMETypeBytes)
78  auto signature() const {
80  .result = type::Void(),
81  .id = "connect_mime_type",
82  .args = {{.id = "mt", .type = type::Bytes()}},
83  .doc = R"(
84 Connects parsing units to a sink for all parsers that support a given MIME
85 type. All subsequent write operations to the sink will pass their data on to
86 these parsing units. The MIME type may have wildcards for type or subtype, and
87 the method will then connect units for all matching parsers.
88 )"};
89  }
90 END_METHOD
91 
92 BEGIN_METHOD(sink, ConnectFilter)
93  auto signature() const {
95  .result = hilti::type::Void(),
96  .id = "connect_filter",
97  .args = {{.id = "filter",
99  spicy::type::Unit(type::Wildcard()))}},
100  .doc = R"(
101 Connects a filter unit to the sink that will transform its input transparently
102 before forwarding it for parsing to other connected units.
103 
104 Multiple filters can be added to a sink, in which case they will be chained
105 into a pipeline and the data will be passed through them in the order they have been
106 added. The parsing will then be carried out on the output of the last filter in
107 the chain.
108 
109 Filters must be added before the first data chunk is written into the sink. If
110 data has already been written when a filter is added, an error is triggered.
111 )"};
112  }
113 END_METHOD
114 
115 BEGIN_METHOD(sink, Gap)
116  auto signature() const {
118  .result = type::Void(),
119  .id = "gap",
120  .args = {{.id = "seq", .type = type::UnsignedInteger(64)},
121  {.id = "len", .type = type::UnsignedInteger(64)}},
122  .doc = R"(
123 Reports a gap in the input stream. *seq* is the sequence number of the first
124 byte missing, *len* is the length of the gap.
125 )"};
126  }
127 END_METHOD
128 
129 BEGIN_METHOD(sink, SequenceNumber)
130  auto signature() const {
131  return hilti::operator_::Signature{.self = type::constant(spicy::type::Sink()),
132  .result = type::UnsignedInteger(64),
133  .id = "sequence_number",
134  .args = {},
135  .doc = R"(
136 Returns the current sequence number of the sink's input stream, which is one
137 beyond the index of the last byte that has been put in order and delivered so far.
138 )"};
139  }
140 END_METHOD
141 
142 BEGIN_METHOD(sink, SetAutoTrim)
143  auto signature() const {
145  .result = type::Void(),
146  .id = "set_auto_trim",
147  .args = {{.id = "enable", .type = type::Bool()}},
148  .doc = R"(
149 Enables or disables auto-trimming. If enabled (which is the default) sink input
150 data is trimmed automatically once in-order and processed. See ``trim()`` for
151 more information about trimming.
152 )"};
153  }
154 END_METHOD
155 
156 BEGIN_METHOD(sink, SetInitialSequenceNumber)
157  auto signature() const {
159  .result = type::Void(),
160  .id = "set_initial_sequence_number",
161  .args =
162  {
163  {.id = "seq", .type = type::UnsignedInteger(64)},
164  },
165  .doc = R"(
166 Sets the sink's initial sequence number. All sequence numbers given to other
167 methods are then assumed to be absolute numbers beyond that initial number. If
168 the initial number is not set, the sink implicitly uses zero instead.
169 )"};
170  }
171 END_METHOD
172 
173 BEGIN_METHOD(sink, SetPolicy)
174  auto signature() const {
176  .result = type::Void(),
177  .id = "set_policy",
178  .args =
179  {
180  {.id = "policy",
181  .type =
182  type::Enum(type::Wildcard())}, // TODO(robin): Specify full type
183  },
184  .doc = R"(
185 Sets a sink's reassembly policy for ambiguous input. As long as data hasn't
186 been trimmed, a sink will detect overlapping chunks. This policy decides how to
187 handle ambiguous overlaps. The default (and currently only) policy is
188 ``ReassemblerPolicy::First``, which resolves ambiguities by taking the data
189 from the chunk that came first.
190 )"};
191  }
192 END_METHOD
193 
194 BEGIN_METHOD(sink, Skip)
195  auto signature() const {
197  .result = type::Void(),
198  .id = "skip",
199  .args =
200  {
201  {.id = "seq", .type = type::UnsignedInteger(64)},
202  },
203  .doc = R"(
204 Skips ahead in the input stream. *seq* is the sequence number where to continue
205 parsing. If there's still data buffered before that position it will be
206 ignored; if auto-skip is also active, it will be immediately deleted as well.
207 If new data is passed in later that comes before *seq*, that will likewise be
208 ignored. If the input stream is currently stuck inside a gap, and *seq* lies
209 beyond that gap, the stream will resume processing at *seq*.
210 )"};
211  }
212 END_METHOD
213 
214 BEGIN_METHOD(sink, Trim)
215  auto signature() const {
217  .result = type::Void(),
218  .id = "trim",
219  .args =
220  {
221  {.id = "seq", .type = type::UnsignedInteger(64)},
222  },
223  .doc = R"(
224 Deletes all data that's still buffered internally up to *seq*. If processing the
225 input stream hasn't reached *seq* yet, parsing will also skip ahead to *seq*.
226 
227 Trimming the input stream releases the memory, but that means that the sink won't be
228 able to detect any further data mismatches.
229 
230 Note that by default, auto-trimming is enabled, which means all data is trimmed
231 automatically once in-order and processed.
232 )"};
233  }
234 END_METHOD
235 
236 BEGIN_METHOD(sink, Write)
237  auto signature() const {
239  .result = type::Void(),
240  .id = "write",
241  .args = {{.id = "data", .type = type::Bytes()},
242  {.id = "seq", .type = type::UnsignedInteger(64), .optional = true},
243  {.id = "len", .type = type::UnsignedInteger(64), .optional = true}},
244  .doc = R"(
245 Passes data on to all connected parsing units. Multiple *write* calls act like
246 passing input in incrementally: The units will parse the pieces as if they were
247 a single stream of data. If no sequence number *seq* is provided, the data is
248 assumed to represent a chunk to be appended to the current end of the input
249 stream. If a sequence number is provided, out-of-order data will be buffered
250 and reassembled before being passed on. If *len* is provided, the data is assumed
251 to represent that many bytes inside the sequence space; if not provided, *len*
252 defaults to the length of *data*.
253 
254 If no units are connected, the call does not have any effect. If multiple units are
255 connected and one parsing unit throws an exception, parsing of subsequent units
256 does not proceed. Note that the order in which the data is parsed to each unit
257 is undefined.
258 
259 .. todo:: The error semantics for multiple units aren't great.
260 
261 )"};
262  }
263 END_METHOD
264 
265 } // namespace spicy::operator_
Definition: void.h:13
Definition: sink.h:13
Definition: unit.h:51
Definition: reference.h:16
Type self
Definition: operator.h:225
Definition: bitfield.h:18
Definition: operator.h:224