Line data Source code
1 : //
2 : // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 : //
4 : // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 : // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 : //
7 : // Official repository: https://github.com/boostorg/json
8 : //
9 :
10 : #ifndef BOOST_JSON_STREAM_PARSER_HPP
11 : #define BOOST_JSON_STREAM_PARSER_HPP
12 :
13 : #include <boost/json/detail/config.hpp>
14 : #include <boost/json/basic_parser.hpp>
15 : #include <boost/json/parse_options.hpp>
16 : #include <boost/json/storage_ptr.hpp>
17 : #include <boost/json/value.hpp>
18 : #include <boost/json/detail/handler.hpp>
19 : #include <type_traits>
20 : #include <cstddef>
21 :
22 : namespace boost {
23 : namespace json {
24 :
25 : //----------------------------------------------------------
26 :
27 : /** A DOM parser for JSON text contained in multiple buffers.
28 :
29 : This class is used to parse a JSON text contained in a series of one or
30 : more character buffers, into a @ref value container. It implements a
31 : [_streaming algorithm_](https://en.wikipedia.org/wiki/Streaming_algorithm),
32 : allowing these parsing strategies:
33 :
34 : @li parse a JSON file a piece at a time;
35 : @li parse incoming JSON text as it arrives, one buffer at a time;
36 : @li parse with bounded resource consumption per cycle.
37 :
38 : @par Usage
39 : To use the parser first construct it, then optionally call @ref reset to
40 : specify a @ref storage_ptr to use for the resulting @ref value. Then call
41 : @ref write one or more times to parse a single, complete JSON text. Call
42 : @ref done to determine if the parse has completed. To indicate there are no
43 : more buffers, call @ref finish. If the parse is successful, call @ref
44 : release to take ownership of the value:
45 :
46 : @code
47 : stream_parser p; // construct a parser
48 : p.write( "[1,2" ); // parse some of a JSON text
49 : p.write( ",3,4]" ); // parse the rest of the JSON text
50 : assert( p.done() ); // we have a complete JSON text
51 : value jv = p.release(); // take ownership of the value
52 : @endcode
53 :
54 : @par Extra Data
55 : When the character buffer provided as input contains additional data that
56 : is not part of the complete JSON text, an error is returned. The @ref
57 : write_some function is an alternative which allows the parse to finish
58 : early, without consuming all the characters in the buffer. This allows
59 : parsing of a buffer containing multiple individual JSON texts or containing
60 : different protocol data:
61 :
62 : @code
63 : stream_parser p; // construct a parser
64 : std::size_t n; // number of characters used
65 : n = p.write_some( "[1,2" ); // parse some of a JSON text
66 : assert( n == 4 ); // all characters consumed
67 : n = p.write_some( ",3,4] null" ); // parse the remainder of the JSON text
68 : assert( n == 6 ); // only some characters consumed
69 : assert( p.done() ); // we have a complete JSON text
70 : value jv = p.release(); // take ownership of the value
71 : @endcode
72 :
73 : @par Temporary Storage
74 : The parser may dynamically allocate temporary storage as needed to
75 : accommodate the nesting level of the JSON text being parsed. Temporary
76 : storage is first obtained from an optional, caller-owned buffer specified
77 : upon construction. When that is exhausted, the next allocation uses the
78 : @ref boost::container::pmr::memory_resource passed to the constructor; if
79 : no such argument is specified, the default memory resource is used.
80 : Temporary storage is freed only when the parser is destroyed; The
81 : performance of parsing multiple JSON texts may be improved by reusing the
82 : same parser instance.
83 :
84 : It is important to note that the @ref
85 : boost::container::pmr::memory_resource supplied upon construction is used
86 : for temporary storage only, and not for allocating the elements which make
87 : up the parsed value. That other memory resource is optionally supplied in
88 : each call to @ref reset.
89 :
90 : @par Duplicate Keys
91 : If there are object elements with duplicate keys; that is, if multiple
92 : elements in an object have keys that compare equal, only the last
93 : equivalent element will be inserted.
94 :
95 : @par Non-Standard JSON
96 : The @ref parse_options structure optionally provided upon construction is
97 : used to customize some parameters of the parser, including which
98 : non-standard JSON extensions should be allowed. A default-constructed parse
99 : options allows only standard JSON.
100 :
101 : @par Thread Safety
102 : Distinct instances may be accessed concurrently. Non-const member functions
103 : of a shared instance may not be called concurrently with any other member
104 : functions of that instance.
105 :
106 : @see @ref parse, @ref parser, @ref parse_options.
107 : */
108 : class stream_parser
109 : {
110 : basic_parser<detail::handler> p_;
111 :
112 : public:
113 : /** Destructor.
114 :
115 : All dynamically allocated memory, including
116 : any incomplete parsing results, is freed.
117 :
118 : @par Complexity
119 : Linear in the size of partial results
120 :
121 : @par Exception Safety
122 : No-throw guarantee.
123 : */
124 75326 : ~stream_parser() = default;
125 :
126 : /** Constructors.
127 :
128 : Construct a new parser.
129 :
130 : The parser will only support standard JSON if overloads **(1)**
131 : or **(2)** are used. Otherwise the parser will support extensions
132 : specified by the parameter `opt`.
133 :
134 : The parsed value will use the \<\<default_memory_resource,default
135 : memory resource\>\> for storage. To use a different resource, call @ref
136 : reset after construction.
137 :
138 : The main difference between the overloads is in what the constructed
139 : parser will use for temporary storage:
140 :
141 : @li **(1)** the constructed parser uses the default memory resource for
142 : temporary storage.
143 :
144 : @li **(2)**, **(3)** the constructed parser uses the memory resource of
145 : `sp` for temporary storage.
146 :
147 : @li **(4)**, **(6)** the constructed parser first uses the caller-owned
148 : storage `[buffer, buffer + size)` for temporary storage, falling back
149 : to the memory resource of `sp` if needed.
150 :
151 : @li **(5)**, **(7)** the constructed parser first uses the caller-owned
152 : storage `[buffer, buffer + N)` for temporary storage, falling back to
153 : the memory resource of `sp` if needed.
154 :
155 : @note Ownership of `buffer` is not transferred. The caller is
156 : responsible for ensuring the lifetime of the storage pointed to by
157 : `buffer` extends until the parser is destroyed.
158 :
159 : Overload **(8)** is the copy constructor. The type is neither copyable
160 : nor movable, so the overload is deleted.
161 :
162 : @par Complexity
163 : Constant.
164 :
165 : @par Exception Safety
166 : No-throw guarantee.
167 :
168 : @{
169 : */
170 187 : stream_parser() noexcept
171 187 : : stream_parser({}, {})
172 : {
173 187 : }
174 :
175 :
176 : /** Overload
177 :
178 : @param sp The memory resource to use for temporary storage.
179 : */
180 : explicit
181 2 : stream_parser(storage_ptr sp) noexcept
182 2 : : stream_parser(std::move(sp), {})
183 : {
184 2 : }
185 :
186 : /** Overload
187 :
188 : @param opt The parsing options to use.
189 : @param sp
190 : */
191 : BOOST_JSON_DECL
192 : stream_parser(
193 : storage_ptr sp,
194 : parse_options const& opt) noexcept;
195 :
196 : /** Overload
197 : @param buffer A pointer to valid storage.
198 : @param size The number of valid bytes in `buffer`.
199 : @param sp
200 : @param opt
201 : */
202 : BOOST_JSON_DECL
203 : stream_parser(
204 : storage_ptr sp,
205 : parse_options const& opt,
206 : unsigned char* buffer,
207 : std::size_t size) noexcept;
208 :
209 : /** Overload
210 :
211 : @tparam N The number of valid bytes in `buffer`.
212 : @param sp
213 : @param opt
214 : @param buffer
215 : */
216 : template<std::size_t N>
217 20 : stream_parser(
218 : storage_ptr sp,
219 : parse_options const& opt,
220 : unsigned char(&buffer)[N]) noexcept
221 20 : : stream_parser(std::move(sp),
222 20 : opt, &buffer[0], N)
223 : {
224 20 : }
225 :
226 : #if defined(__cpp_lib_byte) || defined(BOOST_JSON_DOCS)
227 : /** Overload
228 :
229 : @param sp
230 : @param opt
231 : @param buffer
232 : @param size
233 : */
234 : stream_parser(
235 : storage_ptr sp,
236 : parse_options const& opt,
237 : std::byte* buffer,
238 : std::size_t size) noexcept
239 : : stream_parser(sp, opt, reinterpret_cast<
240 : unsigned char*>(buffer), size)
241 : {
242 : }
243 :
244 : /** Overload
245 :
246 : @tparam N
247 : @param sp
248 : @param opt
249 : @param buffer
250 : */
251 : template<std::size_t N>
252 : stream_parser(
253 : storage_ptr sp,
254 : parse_options const& opt,
255 : std::byte(&buffer)[N]) noexcept
256 : : stream_parser(std::move(sp),
257 : opt, &buffer[0], N)
258 : {
259 : }
260 : #endif
261 :
262 : #ifndef BOOST_JSON_DOCS
263 : // Safety net for accidental buffer overflows
264 : template<std::size_t N>
265 : stream_parser(
266 : storage_ptr sp,
267 : parse_options const& opt,
268 : unsigned char(&buffer)[N],
269 : std::size_t n) noexcept
270 : : stream_parser(std::move(sp),
271 : opt, &buffer[0], n)
272 : {
273 : // If this goes off, check your parameters
274 : // closely, chances are you passed an array
275 : // thinking it was a pointer.
276 : BOOST_ASSERT(n <= N);
277 : }
278 :
279 : #ifdef __cpp_lib_byte
280 : // Safety net for accidental buffer overflows
281 : template<std::size_t N>
282 : stream_parser(
283 : storage_ptr sp,
284 : parse_options const& opt,
285 : std::byte(&buffer)[N], std::size_t n) noexcept
286 : : stream_parser(std::move(sp),
287 : opt, &buffer[0], n)
288 : {
289 : // If this goes off, check your parameters
290 : // closely, chances are you passed an array
291 : // thinking it was a pointer.
292 : BOOST_ASSERT(n <= N);
293 : }
294 : #endif
295 : #endif
296 :
297 : /// Overload
298 : stream_parser(
299 : stream_parser const&) = delete;
300 : /// @}
301 :
302 : /** Assignment operator.
303 :
304 : This type is neither copyable nor movable, so copy assignment operator
305 : is deleted.
306 : */
307 : stream_parser& operator=(
308 : stream_parser const&) = delete;
309 :
310 : /** Reset the parser for a new JSON text.
311 :
312 : This function is used to reset the parser to prepare it for parsing
313 : a new complete JSON text. Any previous partial results are destroyed.
314 : The new value will use the memory resource of `sp`.
315 :
316 : @par Complexity
317 : Constant or linear in the size of any previous partial parsing results.
318 :
319 : @par Exception Safety
320 : No-throw guarantee.
321 :
322 : @param sp A pointer to the @ref boost::container::pmr::memory_resource.
323 : */
324 : BOOST_JSON_DECL
325 : void
326 : reset(storage_ptr sp = {}) noexcept;
327 :
328 : /** Check if a complete JSON text has been parsed.
329 :
330 : This function returns `true` when all of these conditions are met:
331 :
332 : @li A complete serialized JSON text has been presented to the parser,
333 : and
334 :
335 : @li No error has occurred since the parser was constructed, or since
336 : the last call to @ref reset,
337 :
338 : @par Complexity
339 : Constant.
340 :
341 : @par Exception Safety
342 : No-throw guarantee.
343 : */
344 : bool
345 30 : done() const noexcept
346 : {
347 30 : return p_.done();
348 : }
349 :
350 : /** Parse a buffer containing all or part of a complete JSON text.
351 :
352 : This function parses JSON text contained in the specified character
353 : buffer. If parsing completes, any additional characters past the end of
354 : the complete JSON text are ignored. The function returns the actual
355 : number of characters parsed, which may be less than the size of the
356 : input. This allows parsing of a buffer containing multiple individual
357 : JSON texts or containing different protocol data.
358 :
359 : Overloads **(1)**, **(2)**, **(4)**, and **(5)** report errors by
360 : setting `ec`. Overloads **(3)** and **(6)** report errors by throwing
361 : exceptions. Upon error or exception, subsequent calls will fail until
362 : @ref reset is called to parse a new JSON text.
363 :
364 : @note To indicate there are no more character buffers, such as when
365 : @ref done returns `false` after writing, call @ref finish.
366 :
367 : @par Example
368 : @code
369 : stream_parser p; // construct a parser
370 : std::size_t n; // number of characters used
371 : n = p.write_some( "[1,2" ); // parse the first part of the JSON text
372 : assert( n == 4 ); // all characters consumed
373 : n = p.write_some( "3,4] null" ); // parse the rest of the JSON text
374 : assert( n == 5 ); // only some characters consumed
375 : value jv = p.release(); // take ownership of the value
376 : @endcode
377 :
378 : @par Complexity
379 : @li **(1)**--**(3)** linear in `size`.
380 : @li **(4)**--**(6)** linear in `s.size()`.
381 :
382 : @par Exception Safety
383 : Basic guarantee. Calls to `memory_resource::allocate` may throw.
384 :
385 : @return The number of characters consumed from the buffer.
386 :
387 : @param data A pointer to a buffer of `size` characters to parse.
388 : @param size The number of characters pointed to by `data`.
389 : @param ec Set to the error, if any occurred.
390 :
391 : @{
392 : */
393 : BOOST_JSON_DECL
394 : std::size_t
395 : write_some(
396 : char const* data,
397 : std::size_t size,
398 : system::error_code& ec);
399 :
400 : BOOST_JSON_DECL
401 : std::size_t
402 : write_some(
403 : char const* data,
404 : std::size_t size,
405 : std::error_code& ec);
406 :
407 : /** Overload
408 :
409 : @param data
410 : @param size
411 :
412 : @throw boost::system::system_error Thrown on error.
413 : */
414 : BOOST_JSON_DECL
415 : std::size_t
416 : write_some(
417 : char const* data,
418 : std::size_t size);
419 :
420 : /** Overload
421 : @param s The character string to parse.
422 : @param ec
423 : */
424 : std::size_t
425 2 : write_some(
426 : string_view s,
427 : system::error_code& ec)
428 : {
429 2 : return write_some(
430 2 : s.data(), s.size(), ec);
431 : }
432 :
433 : /** Overload
434 : @param s
435 : @param ec
436 : */
437 : std::size_t
438 2 : write_some(
439 : string_view s,
440 : std::error_code& ec)
441 : {
442 2 : return write_some(
443 2 : s.data(), s.size(), ec);
444 : }
445 :
446 : /** Overload
447 : @param s
448 : */
449 : std::size_t
450 4 : write_some(
451 : string_view s)
452 : {
453 4 : return write_some(
454 3 : s.data(), s.size());
455 : }
456 : /// @}
457 :
458 : /** Parse a buffer containing all or part of a complete JSON text.
459 :
460 : This function parses all or part of a JSON text contained in the
461 : specified character buffer. The entire buffer must be consumed; if
462 : there are additional characters past the end of the complete JSON text,
463 : the parse fails and an error is returned.
464 :
465 : Overloads **(1)**, **(2)**, **(4)**, and **(5)** report errors by
466 : setting `ec`. Overloads **(3)** and **(6)** report errors by throwing
467 : exceptions. Upon error or exception, subsequent calls will fail until
468 : @ref reset is called to parse a new JSON text.
469 :
470 : @note To indicate there are no more character buffers, such as when
471 : @ref done returns `false` after writing, call @ref finish.
472 :
473 : @par Example
474 : @code
475 : stream_parser p; // construct a parser
476 : std::size_t n; // number of characters used
477 : n = p.write( "[1,2" ); // parse some of the JSON text
478 : assert( n == 4 ); // all characters consumed
479 : n = p.write( "3,4]" ); // parse the rest of the JSON text
480 : assert( n == 4 ); // all characters consumed
481 : value jv = p.release(); // take ownership of the value
482 : @endcode
483 :
484 : @par Complexity
485 : @li **(1)**--**(3)** linear in `size`.
486 : @li **(4)**--**(6)** linear in `s.size()`.
487 :
488 : @par Exception Safety
489 : Basic guarantee. Calls to `memory_resource::allocate` may throw.
490 : @return The number of characters consumed from the buffer.
491 :
492 : @param data A pointer to a buffer of `size` characters to parse.
493 :
494 : @param size The number of characters pointed to by `data`.
495 :
496 : @param ec Set to the error, if any occurred.
497 :
498 : @{
499 : */
500 : BOOST_JSON_DECL
501 : std::size_t
502 : write(
503 : char const* data,
504 : std::size_t size,
505 : system::error_code& ec);
506 :
507 : BOOST_JSON_DECL
508 : std::size_t
509 : write(
510 : char const* data,
511 : std::size_t size,
512 : std::error_code& ec);
513 :
514 : /** Overload
515 :
516 : @param data
517 : @param size
518 :
519 : @throw boost::system::system_error Thrown on error.
520 : */
521 : BOOST_JSON_DECL
522 : std::size_t
523 : write(
524 : char const* data,
525 : std::size_t size);
526 :
527 : /** Overload
528 :
529 : @param s The character string to parse.
530 : @param ec
531 : */
532 : std::size_t
533 3 : write(
534 : string_view s,
535 : system::error_code& ec)
536 : {
537 3 : return write(
538 3 : s.data(), s.size(), ec);
539 : }
540 :
541 : /** Overload
542 :
543 : @param s
544 : @param ec
545 : */
546 : std::size_t
547 2 : write(
548 : string_view s,
549 : std::error_code& ec)
550 : {
551 2 : return write(
552 2 : s.data(), s.size(), ec);
553 : }
554 :
555 : /** Overload
556 : @param s
557 : */
558 : std::size_t
559 9 : write(
560 : string_view s)
561 : {
562 9 : return write(
563 8 : s.data(), s.size());
564 : }
565 : /// @}
566 :
567 : /** Indicate the end of JSON input.
568 :
569 : This function is used to indicate that there are no more character
570 : buffers in the current JSON text being parsed. If the resulting JSON
571 : text is incomplete, **(1)** and **(2)** assign the relevant
572 : `error_code` to `ec`, while **(3)** throws an exception.
573 :
574 : Upon error or exception, subsequent calls will fail until @ref reset is
575 : called to parse a new JSON text.
576 :
577 : @par Example
578 : In the code below, @ref finish is called to
579 : indicate there are no more digits in the
580 : resulting number:
581 : @code
582 : stream_parser p; // construct a parser
583 : p.write( "3." ); // write the first part of the number
584 : p.write( "14" ); // write the second part of the number
585 : assert( ! p.done() ); // there could be more digits
586 : p.finish(); // indicate the end of the JSON input
587 : assert( p.done() ); // now we are finished
588 : value jv = p.release(); // take ownership of the value
589 : @endcode
590 :
591 : @par Complexity
592 : Constant.
593 :
594 : @par Exception Safety
595 : Basic guarantee. Calls to `memory_resource::allocate` may throw.
596 :
597 : @param ec Set to the error, if any occurred.
598 :
599 : @{
600 : */
601 : BOOST_JSON_DECL
602 : void
603 : finish(system::error_code& ec);
604 :
605 : BOOST_JSON_DECL
606 : void
607 : finish(std::error_code& ec);
608 :
609 : /** Overload
610 :
611 : @throw boost::system::system_error Parsing error.
612 : */
613 : BOOST_JSON_DECL
614 : void
615 : finish();
616 : /// @}
617 :
618 : /** Return the parsed JSON as a @ref value.
619 :
620 : This returns the parsed value, or throws an exception if the parsing is
621 : incomplete or failed. If `! this->done()`, calls @ref finish() first.
622 : It is necessary to call @ref reset after calling this function in order
623 : to parse another JSON text.
624 :
625 : @par Complexity
626 : Constant.
627 :
628 : @return The parsed value. Ownership of this value is transferred to the
629 : caller.
630 :
631 : @throw boost::system::system_error A complete JSON text hasn't been
632 : parsed, or parsing failed.
633 : */
634 : BOOST_JSON_DECL
635 : value
636 : release();
637 : };
638 :
639 : } // namespace json
640 : } // namespace boost
641 :
642 : #endif
|