Horizon
input_adapters.hpp
1 #pragma once
2 
3 #include <array> // array
4 #include <cassert> // assert
5 #include <cstddef> // size_t
6 #include <cstdio> //FILE *
7 #include <cstring> // strlen
8 #include <istream> // istream
9 #include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next
10 #include <memory> // shared_ptr, make_shared, addressof
11 #include <numeric> // accumulate
12 #include <string> // string, char_traits
13 #include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer
14 #include <utility> // pair, declval
15 
16 #include <nlohmann/detail/iterators/iterator_traits.hpp>
17 #include <nlohmann/detail/macro_scope.hpp>
18 
19 namespace nlohmann
20 {
21 namespace detail
22 {
24 enum class input_format_t { json, cbor, msgpack, ubjson, bson };
25 
27 // input adapters //
29 
42 {
44  virtual std::char_traits<char>::int_type get_character() = 0;
45  virtual ~input_adapter_protocol() = default;
46 };
47 
49 using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
50 
56 {
57  public:
58  JSON_HEDLEY_NON_NULL(2)
59  explicit file_input_adapter(std::FILE* f) noexcept
60  : m_file(f)
61  {}
62 
63  // make class move-only
64  file_input_adapter(const file_input_adapter&) = delete;
66  file_input_adapter& operator=(const file_input_adapter&) = delete;
67  file_input_adapter& operator=(file_input_adapter&&) = default;
68  ~file_input_adapter() override = default;
69 
70  std::char_traits<char>::int_type get_character() noexcept override
71  {
72  return std::fgetc(m_file);
73  }
74 
75  private:
77  std::FILE* m_file;
78 };
79 
80 
91 {
92  public:
93  ~input_stream_adapter() override
94  {
95  // clear stream flags; we use underlying streambuf I/O, do not
96  // maintain ifstream flags, except eof
97  is.clear(is.rdstate() & std::ios::eofbit);
98  }
99 
100  explicit input_stream_adapter(std::istream& i)
101  : is(i), sb(*i.rdbuf())
102  {}
103 
104  // delete because of pointer members
106  input_stream_adapter& operator=(input_stream_adapter&) = delete;
108  input_stream_adapter& operator=(input_stream_adapter&&) = delete;
109 
110  // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
111  // ensure that std::char_traits<char>::eof() and the character 0xFF do not
112  // end up as the same value, eg. 0xFFFFFFFF.
113  std::char_traits<char>::int_type get_character() override
114  {
115  auto res = sb.sbumpc();
116  // set eof manually, as we don't use the istream interface.
117  if (res == EOF)
118  {
119  is.clear(is.rdstate() | std::ios::eofbit);
120  }
121  return res;
122  }
123 
124  private:
126  std::istream& is;
127  std::streambuf& sb;
128 };
129 
132 {
133  public:
134  input_buffer_adapter(const char* b, const std::size_t l) noexcept
135  : cursor(b), limit(b == nullptr ? nullptr : (b + l))
136  {}
137 
138  // delete because of pointer members
140  input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
142  input_buffer_adapter& operator=(input_buffer_adapter&&) = delete;
143  ~input_buffer_adapter() override = default;
144 
145  std::char_traits<char>::int_type get_character() noexcept override
146  {
147  if (JSON_HEDLEY_LIKELY(cursor < limit))
148  {
149  assert(cursor != nullptr and limit != nullptr);
150  return std::char_traits<char>::to_int_type(*(cursor++));
151  }
152 
153  return std::char_traits<char>::eof();
154  }
155 
156  private:
158  const char* cursor;
160  const char* const limit;
161 };
162 
163 template<typename WideStringType, size_t T>
165 {
166  // UTF-32
167  static void fill_buffer(const WideStringType& str,
168  size_t& current_wchar,
169  std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
170  size_t& utf8_bytes_index,
171  size_t& utf8_bytes_filled)
172  {
173  utf8_bytes_index = 0;
174 
175  if (current_wchar == str.size())
176  {
177  utf8_bytes[0] = std::char_traits<char>::eof();
178  utf8_bytes_filled = 1;
179  }
180  else
181  {
182  // get the current character
183  const auto wc = static_cast<unsigned int>(str[current_wchar++]);
184 
185  // UTF-32 to UTF-8 encoding
186  if (wc < 0x80)
187  {
188  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
189  utf8_bytes_filled = 1;
190  }
191  else if (wc <= 0x7FF)
192  {
193  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u) & 0x1Fu));
194  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
195  utf8_bytes_filled = 2;
196  }
197  else if (wc <= 0xFFFF)
198  {
199  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u) & 0x0Fu));
200  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
201  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
202  utf8_bytes_filled = 3;
203  }
204  else if (wc <= 0x10FFFF)
205  {
206  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | ((wc >> 18u) & 0x07u));
207  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 12u) & 0x3Fu));
208  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
209  utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
210  utf8_bytes_filled = 4;
211  }
212  else
213  {
214  // unknown character
215  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
216  utf8_bytes_filled = 1;
217  }
218  }
219  }
220 };
221 
222 template<typename WideStringType>
223 struct wide_string_input_helper<WideStringType, 2>
224 {
225  // UTF-16
226  static void fill_buffer(const WideStringType& str,
227  size_t& current_wchar,
228  std::array<std::char_traits<char>::int_type, 4>& utf8_bytes,
229  size_t& utf8_bytes_index,
230  size_t& utf8_bytes_filled)
231  {
232  utf8_bytes_index = 0;
233 
234  if (current_wchar == str.size())
235  {
236  utf8_bytes[0] = std::char_traits<char>::eof();
237  utf8_bytes_filled = 1;
238  }
239  else
240  {
241  // get the current character
242  const auto wc = static_cast<unsigned int>(str[current_wchar++]);
243 
244  // UTF-16 to UTF-8 encoding
245  if (wc < 0x80)
246  {
247  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
248  utf8_bytes_filled = 1;
249  }
250  else if (wc <= 0x7FF)
251  {
252  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xC0u | ((wc >> 6u)));
253  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
254  utf8_bytes_filled = 2;
255  }
256  else if (0xD800 > wc or wc >= 0xE000)
257  {
258  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xE0u | ((wc >> 12u)));
259  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((wc >> 6u) & 0x3Fu));
260  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | (wc & 0x3Fu));
261  utf8_bytes_filled = 3;
262  }
263  else
264  {
265  if (current_wchar < str.size())
266  {
267  const auto wc2 = static_cast<unsigned int>(str[current_wchar++]);
268  const auto charcode = 0x10000u + (((wc & 0x3FFu) << 10u) | (wc2 & 0x3FFu));
269  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(0xF0u | (charcode >> 18u));
270  utf8_bytes[1] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 12u) & 0x3Fu));
271  utf8_bytes[2] = static_cast<std::char_traits<char>::int_type>(0x80u | ((charcode >> 6u) & 0x3Fu));
272  utf8_bytes[3] = static_cast<std::char_traits<char>::int_type>(0x80u | (charcode & 0x3Fu));
273  utf8_bytes_filled = 4;
274  }
275  else
276  {
277  // unknown character
278  ++current_wchar;
279  utf8_bytes[0] = static_cast<std::char_traits<char>::int_type>(wc);
280  utf8_bytes_filled = 1;
281  }
282  }
283  }
284  }
285 };
286 
287 template<typename WideStringType>
289 {
290  public:
291  explicit wide_string_input_adapter(const WideStringType& w) noexcept
292  : str(w)
293  {}
294 
295  std::char_traits<char>::int_type get_character() noexcept override
296  {
297  // check if buffer needs to be filled
298  if (utf8_bytes_index == utf8_bytes_filled)
299  {
300  fill_buffer<sizeof(typename WideStringType::value_type)>();
301 
302  assert(utf8_bytes_filled > 0);
303  assert(utf8_bytes_index == 0);
304  }
305 
306  // use buffer
307  assert(utf8_bytes_filled > 0);
308  assert(utf8_bytes_index < utf8_bytes_filled);
309  return utf8_bytes[utf8_bytes_index++];
310  }
311 
312  private:
313  template<size_t T>
314  void fill_buffer()
315  {
316  wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled);
317  }
318 
320  const WideStringType& str;
321 
323  std::size_t current_wchar = 0;
324 
326  std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}};
327 
329  std::size_t utf8_bytes_index = 0;
331  std::size_t utf8_bytes_filled = 0;
332 };
333 
335 {
336  public:
337  // native support
338  JSON_HEDLEY_NON_NULL(2)
339  input_adapter(std::FILE* file)
340  : ia(std::make_shared<file_input_adapter>(file)) {}
342  input_adapter(std::istream& i)
343  : ia(std::make_shared<input_stream_adapter>(i)) {}
344 
346  input_adapter(std::istream&& i)
347  : ia(std::make_shared<input_stream_adapter>(i)) {}
348 
349  input_adapter(const std::wstring& ws)
350  : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {}
351 
352  input_adapter(const std::u16string& ws)
353  : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {}
354 
355  input_adapter(const std::u32string& ws)
356  : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {}
357 
359  template<typename CharT,
360  typename std::enable_if<
361  std::is_pointer<CharT>::value and
362  std::is_integral<typename std::remove_pointer<CharT>::type>::value and
363  sizeof(typename std::remove_pointer<CharT>::type) == 1,
364  int>::type = 0>
365  input_adapter(CharT b, std::size_t l)
366  : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {}
367 
368  // derived support
369 
371  template<typename CharT,
372  typename std::enable_if<
373  std::is_pointer<CharT>::value and
374  std::is_integral<typename std::remove_pointer<CharT>::type>::value and
375  sizeof(typename std::remove_pointer<CharT>::type) == 1,
376  int>::type = 0>
377  input_adapter(CharT b)
378  : input_adapter(reinterpret_cast<const char*>(b),
379  std::strlen(reinterpret_cast<const char*>(b))) {}
380 
382  template<class IteratorType,
383  typename std::enable_if<
384  std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value,
385  int>::type = 0>
386  input_adapter(IteratorType first, IteratorType last)
387  {
388 #ifndef NDEBUG
389  // assertion to check that the iterator range is indeed contiguous,
390  // see http://stackoverflow.com/a/35008842/266378 for more discussion
391  const auto is_contiguous = std::accumulate(
392  first, last, std::pair<bool, int>(true, 0),
393  [&first](std::pair<bool, int> res, decltype(*first) val)
394  {
395  res.first &= (val == *(std::next(std::addressof(*first), res.second++)));
396  return res;
397  }).first;
398  assert(is_contiguous);
399 #endif
400 
401  // assertion to check that each element is 1 byte long
402  static_assert(
403  sizeof(typename iterator_traits<IteratorType>::value_type) == 1,
404  "each element in the iterator range must have the size of 1 byte");
405 
406  const auto len = static_cast<size_t>(std::distance(first, last));
407  if (JSON_HEDLEY_LIKELY(len > 0))
408  {
409  // there is at least one element: use the address of first
410  ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len);
411  }
412  else
413  {
414  // the address of first cannot be used: use nullptr
415  ia = std::make_shared<input_buffer_adapter>(nullptr, len);
416  }
417  }
418 
420  template<class T, std::size_t N>
421  input_adapter(T (&array)[N])
422  : input_adapter(std::begin(array), std::end(array)) {}
423 
425  template<class ContiguousContainer, typename
426  std::enable_if<not std::is_pointer<ContiguousContainer>::value and
427  std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value,
428  int>::type = 0>
429  input_adapter(const ContiguousContainer& c)
430  : input_adapter(std::begin(c), std::end(c)) {}
431 
432  operator input_adapter_t()
433  {
434  return ia;
435  }
436 
437  private:
439  input_adapter_t ia = nullptr;
440 };
441 } // namespace detail
442 } // namespace nlohmann
nlohmann::detail::input_buffer_adapter::get_character
std::char_traits< char >::int_type get_character() noexcept override
get a character [0,255] or std::char_traits<char>::eof().
Definition: input_adapters.hpp:145
nlohmann::detail::input_stream_adapter::get_character
std::char_traits< char >::int_type get_character() override
get a character [0,255] or std::char_traits<char>::eof().
Definition: input_adapters.hpp:113
nlohmann::detail::wide_string_input_adapter::get_character
std::char_traits< char >::int_type get_character() noexcept override
get a character [0,255] or std::char_traits<char>::eof().
Definition: input_adapters.hpp:295
nlohmann::detail::input_stream_adapter
Definition: input_adapters.hpp:91
nlohmann::detail::file_input_adapter::get_character
std::char_traits< char >::int_type get_character() noexcept override
get a character [0,255] or std::char_traits<char>::eof().
Definition: input_adapters.hpp:70
nlohmann
namespace for Niels Lohmann
Definition: adl_serializer.hpp:9
nlohmann::detail::input_buffer_adapter
input adapter for buffer input
Definition: input_adapters.hpp:132
nlohmann::detail::input_adapter::input_adapter
input_adapter(T(&array)[N])
input adapter for array
Definition: input_adapters.hpp:421
nlohmann::detail::wide_string_input_adapter
Definition: input_adapters.hpp:289
nlohmann::detail::wide_string_input_helper
Definition: input_adapters.hpp:165
nlohmann::detail::input_format_t
input_format_t
the supported input formats
Definition: input_adapters.hpp:24
nlohmann::detail::iterator_traits
Definition: iterator_traits.hpp:32
nlohmann::detail::input_adapter_protocol
abstract input adapter interface
Definition: input_adapters.hpp:42
nlohmann::detail::input_adapter::input_adapter
input_adapter(IteratorType first, IteratorType last)
input adapter for iterator range with contiguous storage
Definition: input_adapters.hpp:386
nlohmann::detail::input_adapter::input_adapter
input_adapter(CharT b, std::size_t l)
input adapter for buffer
Definition: input_adapters.hpp:365
nlohmann::json
basic_json<> json
default JSON class
Definition: json_fwd.hpp:61
nlohmann::detail::input_adapter
Definition: input_adapters.hpp:335
nlohmann::detail::input_adapter::input_adapter
input_adapter(std::istream &i)
input adapter for input stream
Definition: input_adapters.hpp:342
nlohmann::detail::input_adapter::input_adapter
input_adapter(const ContiguousContainer &c)
input adapter for contiguous container
Definition: input_adapters.hpp:429
nlohmann::detail::input_adapter_t
std::shared_ptr< input_adapter_protocol > input_adapter_t
a type to simplify interfaces
Definition: input_adapters.hpp:49
nlohmann::detail::input_adapter_protocol::get_character
virtual std::char_traits< char >::int_type get_character()=0
get a character [0,255] or std::char_traits<char>::eof().
nlohmann::detail::file_input_adapter
Definition: input_adapters.hpp:56
nlohmann::detail::input_adapter::input_adapter
input_adapter(CharT b)
input adapter for string literal
Definition: input_adapters.hpp:377
nlohmann::detail::input_adapter::input_adapter
input_adapter(std::istream &&i)
input adapter for input stream
Definition: input_adapters.hpp:346