Line data Source code
1 : //
2 : // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 : // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
4 : //
5 : // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 : // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 : //
8 : // Official repository: https://github.com/boostorg/json
9 : //
10 :
11 : #ifndef BOOST_JSON_BASIC_PARSER_HPP
12 : #define BOOST_JSON_BASIC_PARSER_HPP
13 :
14 : #include <boost/json/detail/config.hpp>
15 : #include <boost/json/detail/except.hpp>
16 : #include <boost/json/error.hpp>
17 : #include <boost/json/kind.hpp>
18 : #include <boost/json/parse_options.hpp>
19 : #include <boost/json/detail/stack.hpp>
20 : #include <boost/json/detail/stream.hpp>
21 : #include <boost/json/detail/utf8.hpp>
22 : #include <boost/json/detail/sbo_buffer.hpp>
23 :
24 : namespace boost {
25 : namespace json {
26 :
27 : /** An incremental SAX parser for serialized JSON.
28 :
29 : This implements a SAX-style parser, invoking a caller-supplied handler with
30 : each parsing event. To use, first declare a variable of type
31 : `basic_parser<T>` where `T` meets the handler requirements specified below.
32 : Then call @ref write_some one or more times with the input, setting
33 : `more = false` on the final buffer. The parsing events are realized through
34 : member function calls on the handler, which exists as a data member of the
35 : parser.
36 :
37 : The parser may dynamically allocate intermediate storage as needed to
38 : accommodate the nesting level of the input JSON. On subsequent invocations,
39 : the parser can cheaply re-use this memory, improving performance. This
40 : storage is freed when the parser is destroyed
41 :
42 : @par Usage
43 : To get the declaration and function definitions for this class it is
44 : necessary to include this file instead:
45 : @code
46 : #include <boost/json/basic_parser_impl.hpp>
47 : @endcode
48 :
49 : Users who wish to parse JSON into the DOM container @ref value will not use
50 : this class directly; instead they will create an instance of @ref parser or
51 : @ref stream_parser and use that instead. Alternatively, they may call the
52 : function @ref parse. This class is designed for users who wish to perform
53 : custom actions instead of building a @ref value. For example, to produce a
54 : DOM from an external library.
55 :
56 : @note
57 : By default, only conforming JSON using UTF-8 encoding is accepted. However,
58 : select non-compliant syntax can be allowed by construction using a
59 : @ref parse_options set to desired values.
60 :
61 : @par Handler
62 : The handler provided must be implemented as an object of class type which
63 : defines each of the required event member functions below. The event
64 : functions return a `bool` where `true` indicates success, and `false`
65 : indicates failure. If the member function returns `false`, it must set the
66 : error code to a suitable value. This error code will be returned by the
67 : write function to the caller.
68 :
69 : Handlers are required to declare the maximum limits on various elements. If
70 : these limits are exceeded during parsing, then parsing fails with an error.
71 :
72 : The following declaration meets the parser's handler requirements:
73 :
74 : @code
75 : struct handler
76 : {
77 : /// The maximum number of elements allowed in an array
78 : static constexpr std::size_t max_array_size = -1;
79 :
80 : /// The maximum number of elements allowed in an object
81 : static constexpr std::size_t max_object_size = -1;
82 :
83 : /// The maximum number of characters allowed in a string
84 : static constexpr std::size_t max_string_size = -1;
85 :
86 : /// The maximum number of characters allowed in a key
87 : static constexpr std::size_t max_key_size = -1;
88 :
89 : /// Called once when the JSON parsing begins.
90 : ///
91 : /// @return `true` on success.
92 : /// @param ec Set to the error, if any occurred.
93 : ///
94 : bool on_document_begin( error_code& ec );
95 :
96 : /// Called when the JSON parsing is done.
97 : ///
98 : /// @return `true` on success.
99 : /// @param ec Set to the error, if any occurred.
100 : ///
101 : bool on_document_end( error_code& ec );
102 :
103 : /// Called when the beginning of an array is encountered.
104 : ///
105 : /// @return `true` on success.
106 : /// @param ec Set to the error, if any occurred.
107 : ///
108 : bool on_array_begin( error_code& ec );
109 :
110 : /// Called when the end of the current array is encountered.
111 : ///
112 : /// @return `true` on success.
113 : /// @param n The number of elements in the array.
114 : /// @param ec Set to the error, if any occurred.
115 : ///
116 : bool on_array_end( std::size_t n, error_code& ec );
117 :
118 : /// Called when the beginning of an object is encountered.
119 : ///
120 : /// @return `true` on success.
121 : /// @param ec Set to the error, if any occurred.
122 : ///
123 : bool on_object_begin( error_code& ec );
124 :
125 : /// Called when the end of the current object is encountered.
126 : ///
127 : /// @return `true` on success.
128 : /// @param n The number of elements in the object.
129 : /// @param ec Set to the error, if any occurred.
130 : ///
131 : bool on_object_end( std::size_t n, error_code& ec );
132 :
133 : /// Called with characters corresponding to part of the current string.
134 : ///
135 : /// @return `true` on success.
136 : /// @param s The partial characters
137 : /// @param n The total size of the string thus far
138 : /// @param ec Set to the error, if any occurred.
139 : ///
140 : bool on_string_part( string_view s, std::size_t n, error_code& ec );
141 :
142 : /// Called with the last characters corresponding to the current string.
143 : ///
144 : /// @return `true` on success.
145 : /// @param s The remaining characters
146 : /// @param n The total size of the string
147 : /// @param ec Set to the error, if any occurred.
148 : ///
149 : bool on_string( string_view s, std::size_t n, error_code& ec );
150 :
151 : /// Called with characters corresponding to part of the current key.
152 : ///
153 : /// @return `true` on success.
154 : /// @param s The partial characters
155 : /// @param n The total size of the key thus far
156 : /// @param ec Set to the error, if any occurred.
157 : ///
158 : bool on_key_part( string_view s, std::size_t n, error_code& ec );
159 :
160 : /// Called with the last characters corresponding to the current key.
161 : ///
162 : /// @return `true` on success.
163 : /// @param s The remaining characters
164 : /// @param n The total size of the key
165 : /// @param ec Set to the error, if any occurred.
166 : ///
167 : bool on_key( string_view s, std::size_t n, error_code& ec );
168 :
169 : /// Called with the characters corresponding to part of the current number.
170 : ///
171 : /// @return `true` on success.
172 : /// @param s The partial characters
173 : /// @param ec Set to the error, if any occurred.
174 : ///
175 : bool on_number_part( string_view s, error_code& ec );
176 :
177 : /// Called when a signed integer is parsed.
178 : ///
179 : /// @return `true` on success.
180 : /// @param i The value
181 : /// @param s The remaining characters
182 : /// @param ec Set to the error, if any occurred.
183 : ///
184 : bool on_int64( int64_t i, string_view s, error_code& ec );
185 :
186 : /// Called when an unsigend integer is parsed.
187 : ///
188 : /// @return `true` on success.
189 : /// @param u The value
190 : /// @param s The remaining characters
191 : /// @param ec Set to the error, if any occurred.
192 : ///
193 : bool on_uint64( uint64_t u, string_view s, error_code& ec );
194 :
195 : /// Called when a double is parsed.
196 : ///
197 : /// @return `true` on success.
198 : /// @param d The value
199 : /// @param s The remaining characters
200 : /// @param ec Set to the error, if any occurred.
201 : ///
202 : bool on_double( double d, string_view s, error_code& ec );
203 :
204 : /// Called when a boolean is parsed.
205 : ///
206 : /// @return `true` on success.
207 : /// @param b The value
208 : /// @param s The remaining characters
209 : /// @param ec Set to the error, if any occurred.
210 : ///
211 : bool on_bool( bool b, error_code& ec );
212 :
213 : /// Called when a null is parsed.
214 : ///
215 : /// @return `true` on success.
216 : /// @param ec Set to the error, if any occurred.
217 : ///
218 : bool on_null( error_code& ec );
219 :
220 : /// Called with characters corresponding to part of the current comment.
221 : ///
222 : /// @return `true` on success.
223 : /// @param s The partial characters.
224 : /// @param ec Set to the error, if any occurred.
225 : ///
226 : bool on_comment_part( string_view s, error_code& ec );
227 :
228 : /// Called with the last characters corresponding to the current comment.
229 : ///
230 : /// @return `true` on success.
231 : /// @param s The remaining characters
232 : /// @param ec Set to the error, if any occurred.
233 : ///
234 : bool on_comment( string_view s, error_code& ec );
235 : };
236 : @endcode
237 :
238 : @see
239 : @ref parse,
240 : @ref stream_parser,
241 : \<\<examples_validate, validating parser example\>\>.
242 : */
243 : template<class Handler>
244 : class basic_parser
245 : {
246 : enum class state : char
247 : {
248 : doc1, doc3,
249 : com1, com2, com3, com4,
250 : lit1,
251 : str1, str2, str3, str4,
252 : str5, str6, str7, str8,
253 : sur1, sur2, sur3,
254 : sur4, sur5, sur6,
255 : obj1, obj2, obj3, obj4,
256 : obj5, obj6, obj7, obj8,
257 : obj9, obj10, obj11,
258 : arr1, arr2, arr3,
259 : arr4, arr5, arr6,
260 : num1, num2, num3, num4,
261 : num5, num6, num7, num8,
262 : exp1, exp2, exp3,
263 : val1, val2, val3
264 : };
265 :
266 : struct number
267 : {
268 : uint64_t mant;
269 : int bias;
270 : int exp;
271 : bool frac;
272 : bool neg;
273 : };
274 :
275 : template< bool StackEmpty_, char First_ >
276 : struct parse_number_helper;
277 :
278 : // optimization: must come first
279 : Handler h_;
280 :
281 : number num_;
282 : system::error_code ec_;
283 : detail::stack st_;
284 : detail::utf8_sequence seq_;
285 : unsigned u1_;
286 : unsigned u2_;
287 : bool more_; // false for final buffer
288 : bool done_ = false; // true on complete parse
289 : bool clean_ = true; // write_some exited cleanly
290 : const char* end_;
291 : detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
292 : parse_options opt_;
293 : // how many levels deeper the parser can go
294 : std::size_t depth_ = opt_.max_depth;
295 : unsigned char cur_lit_ = 0;
296 : unsigned char lit_offset_ = 0;
297 :
298 : inline void reserve();
299 : inline const char* sentinel();
300 : inline bool incomplete(
301 : const detail::const_stream_wrapper& cs);
302 :
303 : #ifdef __INTEL_COMPILER
304 : #pragma warning push
305 : #pragma warning disable 2196
306 : #endif
307 :
308 : BOOST_NOINLINE
309 : inline
310 : const char*
311 : suspend_or_fail(state st);
312 :
313 : BOOST_NOINLINE
314 : inline
315 : const char*
316 : suspend_or_fail(
317 : state st,
318 : std::size_t n);
319 :
320 : BOOST_NOINLINE
321 : inline
322 : const char*
323 : fail(const char* p) noexcept;
324 :
325 : BOOST_NOINLINE
326 : inline
327 : const char*
328 : fail(
329 : const char* p,
330 : error ev,
331 : source_location const* loc) noexcept;
332 :
333 : BOOST_NOINLINE
334 : inline
335 : const char*
336 : maybe_suspend(
337 : const char* p,
338 : state st);
339 :
340 : BOOST_NOINLINE
341 : inline
342 : const char*
343 : maybe_suspend(
344 : const char* p,
345 : state st,
346 : std::size_t n);
347 :
348 : BOOST_NOINLINE
349 : inline
350 : const char*
351 : maybe_suspend(
352 : const char* p,
353 : state st,
354 : const number& num);
355 :
356 : BOOST_NOINLINE
357 : inline
358 : const char*
359 : suspend(
360 : const char* p,
361 : state st);
362 :
363 : BOOST_NOINLINE
364 : inline
365 : const char*
366 : suspend(
367 : const char* p,
368 : state st,
369 : const number& num);
370 :
371 : #ifdef __INTEL_COMPILER
372 : #pragma warning pop
373 : #endif
374 :
375 : template<bool StackEmpty_/*, bool Terminal_*/>
376 : const char* parse_comment(const char* p,
377 : std::integral_constant<bool, StackEmpty_> stack_empty,
378 : /*std::integral_constant<bool, Terminal_>*/ bool terminal);
379 :
380 : template<bool StackEmpty_>
381 : const char* parse_document(const char* p,
382 : std::integral_constant<bool, StackEmpty_> stack_empty);
383 :
384 : template<bool StackEmpty_, bool AllowComments_/*,
385 : bool AllowTrailing_, bool AllowBadUTF8_*/>
386 : const char* parse_value(const char* p,
387 : std::integral_constant<bool, StackEmpty_> stack_empty,
388 : std::integral_constant<bool, AllowComments_> allow_comments,
389 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
390 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
391 : bool allow_bad_utf16);
392 :
393 : template<bool AllowComments_/*,
394 : bool AllowTrailing_, bool AllowBadUTF8_*/>
395 : const char* resume_value(const char* p,
396 : std::integral_constant<bool, AllowComments_> allow_comments,
397 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
398 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
399 : bool allow_bad_utf16);
400 :
401 : template<bool StackEmpty_, bool AllowComments_/*,
402 : bool AllowTrailing_, bool AllowBadUTF8_*/>
403 : const char* parse_object(const char* p,
404 : std::integral_constant<bool, StackEmpty_> stack_empty,
405 : std::integral_constant<bool, AllowComments_> allow_comments,
406 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
407 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
408 : bool allow_bad_utf16);
409 :
410 : template<bool StackEmpty_, bool AllowComments_/*,
411 : bool AllowTrailing_, bool AllowBadUTF8_*/>
412 : const char* parse_array(const char* p,
413 : std::integral_constant<bool, StackEmpty_> stack_empty,
414 : std::integral_constant<bool, AllowComments_> allow_comments,
415 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
416 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
417 : bool allow_bad_utf16);
418 :
419 : template<class Literal>
420 : const char* parse_literal(const char* p, Literal literal);
421 :
422 : template<bool StackEmpty_, bool IsKey_>
423 : const char* parse_string(const char* p,
424 : std::integral_constant<bool, StackEmpty_> stack_empty,
425 : std::integral_constant<bool, IsKey_> is_key,
426 : bool allow_bad_utf8,
427 : bool allow_bad_utf16);
428 :
429 : template<bool StackEmpty_>
430 : const char* parse_escaped(
431 : const char* p,
432 : std::size_t& total,
433 : std::integral_constant<bool, StackEmpty_> stack_empty,
434 : bool is_key,
435 : bool allow_bad_utf16);
436 :
437 : template<bool StackEmpty_, char First_, number_precision Numbers_>
438 : const char* parse_number(const char* p,
439 : std::integral_constant<bool, StackEmpty_> stack_empty,
440 : std::integral_constant<char, First_> first,
441 : std::integral_constant<number_precision, Numbers_> numbers);
442 :
443 : // intentionally private
444 : std::size_t
445 173075 : depth() const noexcept
446 : {
447 173075 : return opt_.max_depth - depth_;
448 : }
449 :
450 : public:
451 : /** Destructor.
452 :
453 : All dynamically allocated internal memory is freed.
454 :
455 : @par Effects
456 : @code
457 : handler().~Handler()
458 : @endcode
459 :
460 : @par Complexity
461 : Same as `~Handler()`.
462 :
463 : @par Exception Safety
464 : Same as `~Handler()`.
465 : */
466 2164604 : ~basic_parser() = default;
467 :
468 : /** Constructors.
469 :
470 : Overload **(1)** constructs the parser with the specified options, with
471 : any additional arguments forwarded to the handler's constructor.
472 :
473 : `basic_parser` is not copyable or movable, so the copy constructor is
474 : deleted.
475 :
476 : @par Complexity
477 : Same as `Handler( std::forward< Args >( args )... )`.
478 :
479 : @par Exception Safety
480 : Same as `Handler( std::forward< Args >( args )... )`.
481 :
482 : @param opt Configuration settings for the parser. If this structure is
483 : default constructed, the parser will accept only standard JSON.
484 : @param args Optional additional arguments forwarded to the handler's
485 : constructor.
486 :
487 : @{
488 : */
489 : template<class... Args>
490 : explicit
491 : basic_parser(
492 : parse_options const& opt,
493 : Args&&... args);
494 :
495 : /// Overload
496 : basic_parser(
497 : basic_parser const&) = delete;
498 : /// @}
499 :
500 : /** Assignment.
501 :
502 : This type cannot be copied or moved. The copy assignment is deleted.
503 : */
504 : basic_parser& operator=(
505 : basic_parser const&) = delete;
506 :
507 : /** Return a reference to the handler.
508 :
509 : This function provides access to the constructed
510 : instance of the handler owned by the parser.
511 :
512 : @par Complexity
513 : Constant.
514 :
515 : @par Exception Safety
516 : No-throw guarantee.
517 :
518 : @{
519 : */
520 : Handler&
521 6310634 : handler() noexcept
522 : {
523 6310634 : return h_;
524 : }
525 :
526 : Handler const&
527 24 : handler() const noexcept
528 : {
529 24 : return h_;
530 : }
531 : /// @}
532 :
533 : /** Return the last error.
534 :
535 : This returns the last error code which
536 : was generated in the most recent call
537 : to @ref write_some.
538 :
539 : @par Complexity
540 : Constant.
541 :
542 : @par Exception Safety
543 : No-throw guarantee.
544 : */
545 : system::error_code
546 8 : last_error() const noexcept
547 : {
548 8 : return ec_;
549 : }
550 :
551 : /** Check if a complete JSON text has been parsed.
552 :
553 : This function returns `true` when all of these conditions are met:
554 :
555 : @li A complete serialized JSON text has been presented to the parser,
556 : and
557 : @li No error or exception has occurred since the parser was
558 : constructed, or since the last call to @ref reset.
559 :
560 : @par Complexity
561 : Constant.
562 :
563 : @par Exception Safety
564 : No-throw guarantee.
565 : */
566 : bool
567 4078231 : done() const noexcept
568 : {
569 4078231 : return done_;
570 : }
571 :
572 : /** Reset the state, to parse a new document.
573 :
574 : This function discards the current parsing
575 : state, to prepare for parsing a new document.
576 : Dynamically allocated temporary memory used
577 : by the implementation is not deallocated.
578 :
579 : @par Complexity
580 : Constant.
581 :
582 : @par Exception Safety
583 : No-throw guarantee.
584 : */
585 : void
586 : reset() noexcept;
587 :
588 : /** Indicate a parsing failure.
589 :
590 : This changes the state of the parser to indicate that the parse has
591 : failed. A parser implementation can use this to fail the parser if
592 : needed due to external inputs.
593 :
594 : @attention
595 : If `! ec.failed()`, an implementation-defined error code that indicates
596 : failure will be stored instead.
597 :
598 : @par Complexity
599 : Constant.
600 :
601 : @par Exception Safety
602 : No-throw guarantee.
603 :
604 : @param ec The error code to set.
605 : */
606 : void
607 : fail(system::error_code ec) noexcept;
608 :
609 : /** Parse some of input characters as JSON, incrementally.
610 :
611 : This function parses the JSON text in the specified buffer, calling the
612 : handler to emit each SAX parsing event. The parse proceeds from the
613 : current state, which is at the beginning of a new JSON or in the middle
614 : of the current JSON if any characters were already parsed.
615 :
616 : The characters in the buffer are processed starting from the beginning,
617 : until one of the following conditions is met:
618 :
619 : @li All of the characters in the buffer have been parsed, or
620 : @li Some of the characters in the buffer have been parsed and the JSON
621 : is complete, or
622 : @li A parsing error occurs.
623 :
624 : The supplied buffer does not need to contain the entire JSON.
625 : Subsequent calls can provide more serialized data, allowing JSON to be
626 : processed incrementally. The end of the serialized JSON can be
627 : indicated by passing `more = false`.
628 :
629 : @par Complexity
630 : Linear in `size`.
631 :
632 : @par Exception Safety
633 : Basic guarantee. Calls to the handler may throw.
634 :
635 : Upon error or exception, subsequent calls will fail until @ref reset
636 : is called to parse a new JSON.
637 :
638 : @return The number of characters successfully
639 : parsed, which may be smaller than `size`.
640 :
641 : @param more `true` if there are possibly more buffers in the current
642 : JSON, otherwise `false`.
643 :
644 : @param data A pointer to a buffer of `size` characters to parse.
645 :
646 : @param size The number of characters pointed to by `data`.
647 :
648 : @param ec Set to the error, if any occurred.
649 :
650 : @{
651 : */
652 : std::size_t
653 : write_some(
654 : bool more,
655 : char const* data,
656 : std::size_t size,
657 : system::error_code& ec);
658 :
659 : std::size_t
660 : write_some(
661 : bool more,
662 : char const* data,
663 : std::size_t size,
664 : std::error_code& ec);
665 : /// @}
666 : };
667 :
668 : } // namespace json
669 : } // namespace boost
670 :
671 : #endif
|