16 #include <nlohmann/detail/exceptions.hpp>
17 #include <nlohmann/detail/input/input_adapters.hpp>
18 #include <nlohmann/detail/input/json_sax.hpp>
19 #include <nlohmann/detail/macro_scope.hpp>
20 #include <nlohmann/detail/meta/is_sax.hpp>
21 #include <nlohmann/detail/value_t.hpp>
34 template<
typename BasicJsonType,
typename SAX = json_sax_dom_parser<BasicJsonType>>
37 using number_integer_t =
typename BasicJsonType::number_integer_t;
38 using number_unsigned_t =
typename BasicJsonType::number_unsigned_t;
39 using number_float_t =
typename BasicJsonType::number_float_t;
40 using string_t =
typename BasicJsonType::string_t;
41 using json_sax_t = SAX;
69 JSON_HEDLEY_NON_NULL(3)
72 const
bool strict = true)
79 case input_format_t::bson:
80 result = parse_bson_internal();
83 case input_format_t::cbor:
84 result = parse_cbor_internal();
87 case input_format_t::msgpack:
88 result = parse_msgpack_internal();
91 case input_format_t::ubjson:
92 result = parse_ubjson_internal();
100 if (result and strict)
102 if (format == input_format_t::ubjson)
111 if (JSON_HEDLEY_UNLIKELY(current != std::char_traits<char>::eof()))
113 return sax->parse_error(chars_read, get_token_string(),
114 parse_error::create(110, chars_read, exception_message(format,
"expected end of input; last byte: 0x" + get_token_string(),
"value")));
130 return *
reinterpret_cast<char*
>(&num) == 1;
142 bool parse_bson_internal()
145 get_number<std::int32_t, true>(input_format_t::bson, document_size);
147 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
152 if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_list(
false)))
157 return sax->end_object();
167 bool get_bson_cstr(string_t& result)
169 auto out = std::back_inserter(result);
173 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson,
"cstring")))
181 *out++ =
static_cast<char>(current);
198 template<
typename NumberType>
199 bool get_bson_string(
const NumberType len, string_t& result)
201 if (JSON_HEDLEY_UNLIKELY(len < 1))
203 auto last_token = get_token_string();
204 return sax->parse_error(chars_read, last_token,
parse_error::create(112, chars_read, exception_message(input_format_t::bson,
"string length must be at least 1, is " + std::to_string(len),
"string")));
207 return get_string(input_format_t::bson, len -
static_cast<NumberType
>(1), result) and get() != std::char_traits<char>::eof();
220 bool parse_bson_element_internal(
const int element_type,
221 const std::size_t element_type_parse_position)
223 switch (element_type)
228 return get_number<double, true>(input_format_t::bson, number) and sax->number_float(
static_cast<number_float_t
>(number),
"");
235 return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value);
240 return parse_bson_internal();
245 return parse_bson_array();
250 return sax->boolean(get() != 0);
261 return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value);
267 return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value);
272 std::array<char, 3> cr{{}};
273 (std::snprintf)(cr.data(), cr.size(),
"%.2hhX",
static_cast<unsigned char>(element_type));
274 return sax->parse_error(element_type_parse_position, std::string(cr.data()),
parse_error::create(114, element_type_parse_position,
"Unsupported BSON record type 0x" + std::string(cr.data())));
291 bool parse_bson_element_list(
const bool is_array)
294 while (
int element_type = get())
296 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::bson,
"element list")))
301 const std::size_t element_type_parse_position = chars_read;
302 if (JSON_HEDLEY_UNLIKELY(not get_bson_cstr(key)))
307 if (not is_array and not sax->key(key))
312 if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position)))
328 bool parse_bson_array()
331 get_number<std::int32_t, true>(input_format_t::bson, document_size);
333 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
338 if (JSON_HEDLEY_UNLIKELY(not parse_bson_element_list(
true)))
343 return sax->end_array();
357 bool parse_cbor_internal(
const bool get_char =
true)
359 switch (get_char ? get() : current)
362 case std::char_traits<char>::eof():
390 return sax->number_unsigned(
static_cast<number_unsigned_t
>(current));
395 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
401 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
407 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
413 return get_number(input_format_t::cbor, number) and sax->number_unsigned(number);
441 return sax->number_integer(
static_cast<std::int8_t>(0x20 - 1 - current));
446 return get_number(input_format_t::cbor, number) and sax->number_integer(
static_cast<number_integer_t
>(-1) - number);
452 return get_number(input_format_t::cbor, number) and sax->number_integer(
static_cast<number_integer_t
>(-1) - number);
458 return get_number(input_format_t::cbor, number) and sax->number_integer(
static_cast<number_integer_t
>(-1) - number);
464 return get_number(input_format_t::cbor, number) and sax->number_integer(
static_cast<number_integer_t
>(-1)
465 -
static_cast<number_integer_t
>(number));
500 return get_cbor_string(s) and sax->string(s);
528 return get_cbor_array(
static_cast<std::size_t
>(
static_cast<unsigned int>(current) & 0x1Fu));
533 return get_number(input_format_t::cbor, len) and get_cbor_array(
static_cast<std::size_t
>(len));
539 return get_number(input_format_t::cbor, len) and get_cbor_array(
static_cast<std::size_t
>(len));
545 return get_number(input_format_t::cbor, len) and get_cbor_array(
static_cast<std::size_t
>(len));
551 return get_number(input_format_t::cbor, len) and get_cbor_array(
static_cast<std::size_t
>(len));
555 return get_cbor_array(std::size_t(-1));
582 return get_cbor_object(
static_cast<std::size_t
>(
static_cast<unsigned int>(current) & 0x1Fu));
587 return get_number(input_format_t::cbor, len) and get_cbor_object(
static_cast<std::size_t
>(len));
593 return get_number(input_format_t::cbor, len) and get_cbor_object(
static_cast<std::size_t
>(len));
599 return get_number(input_format_t::cbor, len) and get_cbor_object(
static_cast<std::size_t
>(len));
605 return get_number(input_format_t::cbor, len) and get_cbor_object(
static_cast<std::size_t
>(len));
609 return get_cbor_object(std::size_t(-1));
612 return sax->boolean(
false);
615 return sax->boolean(
true);
622 const int byte1_raw = get();
623 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor,
"number")))
627 const int byte2_raw = get();
628 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor,
"number")))
633 const auto byte1 =
static_cast<unsigned char>(byte1_raw);
634 const auto byte2 =
static_cast<unsigned char>(byte2_raw);
644 const auto half =
static_cast<unsigned int>((byte1 << 8u) + byte2);
645 const double val = [&half]
647 const int exp = (half >> 10u) & 0x1Fu;
648 const unsigned int mant = half & 0x3FFu;
649 assert(0 <= exp and exp <= 32);
650 assert(mant <= 1024);
654 return std::ldexp(mant, -24);
657 ? std::numeric_limits<double>::infinity()
658 : std::numeric_limits<double>::quiet_NaN();
660 return std::ldexp(mant + 1024, exp - 25);
663 return sax->number_float((half & 0x8000u) != 0
664 ?
static_cast<number_float_t
>(-val)
665 :
static_cast<number_float_t
>(val),
"");
671 return get_number(input_format_t::cbor, number) and sax->number_float(
static_cast<number_float_t
>(number),
"");
677 return get_number(input_format_t::cbor, number) and sax->number_float(
static_cast<number_float_t
>(number),
"");
682 auto last_token = get_token_string();
683 return sax->parse_error(chars_read, last_token,
parse_error::create(112, chars_read, exception_message(input_format_t::cbor,
"invalid byte: 0x" + last_token,
"value")));
699 bool get_cbor_string(string_t& result)
701 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::cbor,
"string")))
734 return get_string(input_format_t::cbor,
static_cast<unsigned int>(current) & 0x1Fu, result);
740 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
746 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
752 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
758 return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result);
763 while (get() != 0xFF)
766 if (not get_cbor_string(chunk))
770 result.append(chunk);
777 auto last_token = get_token_string();
778 return sax->parse_error(chars_read, last_token,
parse_error::create(113, chars_read, exception_message(input_format_t::cbor,
"expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token,
"string")));
788 bool get_cbor_array(
const std::size_t len)
790 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(len)))
795 if (len != std::size_t(-1))
797 for (std::size_t i = 0; i < len; ++i)
799 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal()))
807 while (get() != 0xFF)
809 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal(
false)))
816 return sax->end_array();
824 bool get_cbor_object(
const std::size_t len)
826 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(len)))
832 if (len != std::size_t(-1))
834 for (std::size_t i = 0; i < len; ++i)
837 if (JSON_HEDLEY_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
842 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal()))
851 while (get() != 0xFF)
853 if (JSON_HEDLEY_UNLIKELY(not get_cbor_string(key) or not sax->key(key)))
858 if (JSON_HEDLEY_UNLIKELY(not parse_cbor_internal()))
866 return sax->end_object();
876 bool parse_msgpack_internal()
881 case std::char_traits<char>::eof():
1013 return sax->number_unsigned(
static_cast<number_unsigned_t
>(current));
1032 return get_msgpack_object(
static_cast<std::size_t
>(
static_cast<unsigned int>(current) & 0x0Fu));
1051 return get_msgpack_array(
static_cast<std::size_t
>(
static_cast<unsigned int>(current) & 0x0Fu));
1091 return get_msgpack_string(s) and sax->string(s);
1098 return sax->boolean(
false);
1101 return sax->boolean(
true);
1106 return get_number(input_format_t::msgpack, number) and sax->number_float(
static_cast<number_float_t
>(number),
"");
1112 return get_number(input_format_t::msgpack, number) and sax->number_float(
static_cast<number_float_t
>(number),
"");
1118 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1124 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1130 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1136 return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number);
1142 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1148 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1154 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1160 return get_number(input_format_t::msgpack, number) and sax->number_integer(number);
1166 return get_number(input_format_t::msgpack, len) and get_msgpack_array(
static_cast<std::size_t
>(len));
1172 return get_number(input_format_t::msgpack, len) and get_msgpack_array(
static_cast<std::size_t
>(len));
1178 return get_number(input_format_t::msgpack, len) and get_msgpack_object(
static_cast<std::size_t
>(len));
1184 return get_number(input_format_t::msgpack, len) and get_msgpack_object(
static_cast<std::size_t
>(len));
1220 return sax->number_integer(
static_cast<std::int8_t>(current));
1224 auto last_token = get_token_string();
1225 return sax->parse_error(chars_read, last_token,
parse_error::create(112, chars_read, exception_message(input_format_t::msgpack,
"invalid byte: 0x" + last_token,
"value")));
1240 bool get_msgpack_string(string_t& result)
1242 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::msgpack,
"string")))
1283 return get_string(input_format_t::msgpack,
static_cast<unsigned int>(current) & 0x1Fu, result);
1289 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1295 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1301 return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result);
1306 auto last_token = get_token_string();
1307 return sax->parse_error(chars_read, last_token,
parse_error::create(113, chars_read, exception_message(input_format_t::msgpack,
"expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token,
"string")));
1316 bool get_msgpack_array(
const std::size_t len)
1318 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(len)))
1323 for (std::size_t i = 0; i < len; ++i)
1325 if (JSON_HEDLEY_UNLIKELY(not parse_msgpack_internal()))
1331 return sax->end_array();
1338 bool get_msgpack_object(
const std::size_t len)
1340 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(len)))
1346 for (std::size_t i = 0; i < len; ++i)
1349 if (JSON_HEDLEY_UNLIKELY(not get_msgpack_string(key) or not sax->key(key)))
1354 if (JSON_HEDLEY_UNLIKELY(not parse_msgpack_internal()))
1361 return sax->end_object();
1375 bool parse_ubjson_internal(
const bool get_char =
true)
1377 return get_ubjson_value(get_char ? get_ignore_noop() : current);
1394 bool get_ubjson_string(string_t& result,
const bool get_char =
true)
1401 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson,
"value")))
1411 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1417 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1423 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1429 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1435 return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result);
1439 auto last_token = get_token_string();
1440 return sax->parse_error(chars_read, last_token,
parse_error::create(113, chars_read, exception_message(input_format_t::ubjson,
"expected length type specification (U, i, I, l, L); last byte: 0x" + last_token,
"string")));
1448 bool get_ubjson_size_value(std::size_t& result)
1450 switch (get_ignore_noop())
1455 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1459 result =
static_cast<std::size_t
>(number);
1466 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1470 result =
static_cast<std::size_t
>(number);
1477 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1481 result =
static_cast<std::size_t
>(number);
1488 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1492 result =
static_cast<std::size_t
>(number);
1499 if (JSON_HEDLEY_UNLIKELY(not get_number(input_format_t::ubjson, number)))
1503 result =
static_cast<std::size_t
>(number);
1509 auto last_token = get_token_string();
1510 return sax->parse_error(chars_read, last_token,
parse_error::create(113, chars_read, exception_message(input_format_t::ubjson,
"expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token,
"size")));
1525 bool get_ubjson_size_type(std::pair<std::size_t, int>& result)
1527 result.first = string_t::npos;
1534 result.second = get();
1535 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson,
"type")))
1541 if (JSON_HEDLEY_UNLIKELY(current !=
'#'))
1543 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson,
"value")))
1547 auto last_token = get_token_string();
1548 return sax->parse_error(chars_read, last_token,
parse_error::create(112, chars_read, exception_message(input_format_t::ubjson,
"expected '#' after type information; last byte: 0x" + last_token,
"size")));
1551 return get_ubjson_size_value(result.first);
1556 return get_ubjson_size_value(result.first);
1566 bool get_ubjson_value(
const int prefix)
1570 case std::char_traits<char>::eof():
1574 return sax->boolean(
true);
1576 return sax->boolean(
false);
1584 return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number);
1590 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1596 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1602 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1608 return get_number(input_format_t::ubjson, number) and sax->number_integer(number);
1614 return get_number(input_format_t::ubjson, number) and sax->number_float(
static_cast<number_float_t
>(number),
"");
1620 return get_number(input_format_t::ubjson, number) and sax->number_float(
static_cast<number_float_t
>(number),
"");
1626 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(input_format_t::ubjson,
"char")))
1630 if (JSON_HEDLEY_UNLIKELY(current > 127))
1632 auto last_token = get_token_string();
1633 return sax->parse_error(chars_read, last_token,
parse_error::create(113, chars_read, exception_message(input_format_t::ubjson,
"byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token,
"char")));
1635 string_t s(1,
static_cast<char>(current));
1636 return sax->string(s);
1642 return get_ubjson_string(s) and sax->string(s);
1646 return get_ubjson_array();
1649 return get_ubjson_object();
1653 auto last_token = get_token_string();
1654 return sax->parse_error(chars_read, last_token,
parse_error::create(112, chars_read, exception_message(input_format_t::ubjson,
"invalid byte: 0x" + last_token,
"value")));
1662 bool get_ubjson_array()
1664 std::pair<std::size_t, int> size_and_type;
1665 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_size_type(size_and_type)))
1670 if (size_and_type.first != string_t::npos)
1672 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(size_and_type.first)))
1677 if (size_and_type.second != 0)
1679 if (size_and_type.second !=
'N')
1681 for (std::size_t i = 0; i < size_and_type.first; ++i)
1683 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_value(size_and_type.second)))
1692 for (std::size_t i = 0; i < size_and_type.first; ++i)
1694 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal()))
1703 if (JSON_HEDLEY_UNLIKELY(not sax->start_array(std::size_t(-1))))
1708 while (current !=
']')
1710 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal(
false)))
1718 return sax->end_array();
1724 bool get_ubjson_object()
1726 std::pair<std::size_t, int> size_and_type;
1727 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_size_type(size_and_type)))
1733 if (size_and_type.first != string_t::npos)
1735 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(size_and_type.first)))
1740 if (size_and_type.second != 0)
1742 for (std::size_t i = 0; i < size_and_type.first; ++i)
1744 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key) or not sax->key(key)))
1748 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_value(size_and_type.second)))
1757 for (std::size_t i = 0; i < size_and_type.first; ++i)
1759 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key) or not sax->key(key)))
1763 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal()))
1773 if (JSON_HEDLEY_UNLIKELY(not sax->start_object(std::size_t(-1))))
1778 while (current !=
'}')
1780 if (JSON_HEDLEY_UNLIKELY(not get_ubjson_string(key,
false) or not sax->key(key)))
1784 if (JSON_HEDLEY_UNLIKELY(not parse_ubjson_internal()))
1793 return sax->end_object();
1812 return current = ia->get_character();
1818 int get_ignore_noop()
1824 while (current ==
'N');
1842 template<
typename NumberType,
bool InputIsLittleEndian = false>
1847 for (std::size_t i = 0; i <
sizeof(NumberType); ++i)
1850 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format,
"number")))
1856 if (is_little_endian != InputIsLittleEndian)
1858 vec[
sizeof(NumberType) - i - 1] =
static_cast<std::uint8_t>(current);
1867 std::memcpy(&result, vec.data(),
sizeof(NumberType));
1885 template<
typename NumberType>
1887 const NumberType len,
1890 bool success =
true;
1891 std::generate_n(std::back_inserter(result), len, [
this, &success, &format]()
1894 if (JSON_HEDLEY_UNLIKELY(not unexpect_eof(format,
"string")))
1898 return static_cast<char>(current);
1908 JSON_HEDLEY_NON_NULL(3)
1909 bool unexpect_eof(const
input_format_t format, const
char* context)
const
1911 if (JSON_HEDLEY_UNLIKELY(current == std::char_traits<char>::eof()))
1913 return sax->parse_error(chars_read,
"<end of file>",
1914 parse_error::create(110, chars_read, exception_message(format,
"unexpected end of input", context)));
1922 std::string get_token_string()
const
1924 std::array<char, 3> cr{{}};
1925 (std::snprintf)(cr.data(), cr.size(),
"%.2hhX",
static_cast<unsigned char>(current));
1926 return std::string{cr.data()};
1936 const std::string& detail,
1937 const std::string& context)
const
1939 std::string error_msg =
"syntax error while parsing ";
1943 case input_format_t::cbor:
1944 error_msg +=
"CBOR";
1947 case input_format_t::msgpack:
1948 error_msg +=
"MessagePack";
1951 case input_format_t::ubjson:
1952 error_msg +=
"UBJSON";
1955 case input_format_t::bson:
1956 error_msg +=
"BSON";
1963 return error_msg +
" " + context +
": " + detail;
1971 int current = std::char_traits<char>::eof();
1974 std::size_t chars_read = 0;
1980 json_sax_t* sax =
nullptr;