Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===---------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file supports working with JSON data.
  11. ///
  12. /// It comprises:
  13. ///
  14. /// - classes which hold dynamically-typed parsed JSON structures
  15. ///   These are value types that can be composed, inspected, and modified.
  16. ///   See json::Value, and the related types json::Object and json::Array.
  17. ///
  18. /// - functions to parse JSON text into Values, and to serialize Values to text.
  19. ///   See parse(), operator<<, and format_provider.
  20. ///
  21. /// - a convention and helpers for mapping between json::Value and user-defined
  22. ///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
  23. ///
  24. /// - an output API json::OStream which can emit JSON without materializing
  25. ///   all structures as json::Value.
  26. ///
  27. /// Typically, JSON data would be read from an external source, parsed into
  28. /// a Value, and then converted into some native data structure before doing
  29. /// real work on it. (And vice versa when writing).
  30. ///
  31. /// Other serialization mechanisms you may consider:
  32. ///
  33. /// - YAML is also text-based, and more human-readable than JSON. It's a more
  34. ///   complex format and data model, and YAML parsers aren't ubiquitous.
  35. ///   YAMLParser.h is a streaming parser suitable for parsing large documents
  36. ///   (including JSON, as YAML is a superset). It can be awkward to use
  37. ///   directly. YAML I/O (YAMLTraits.h) provides data mapping that is more
  38. ///   declarative than the toJSON/fromJSON conventions here.
  39. ///
  40. /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
  41. ///   encodes LLVM IR ("bitcode"), but it can be a container for other data.
  42. ///   Low-level reader/writer libraries are in Bitstream/Bitstream*.h
  43. ///
  44. //===---------------------------------------------------------------------===//
  45.  
  46. #ifndef LLVM_SUPPORT_JSON_H
  47. #define LLVM_SUPPORT_JSON_H
  48.  
  49. #include "llvm/ADT/DenseMap.h"
  50. #include "llvm/ADT/SmallVector.h"
  51. #include "llvm/ADT/StringRef.h"
  52. #include "llvm/ADT/STLFunctionalExtras.h"
  53. #include "llvm/Support/Error.h"
  54. #include "llvm/Support/FormatVariadic.h"
  55. #include "llvm/Support/raw_ostream.h"
  56. #include <cmath>
  57. #include <map>
  58.  
  59. namespace llvm {
  60. namespace json {
  61.  
  62. // === String encodings ===
  63. //
  64. // JSON strings are character sequences (not byte sequences like std::string).
  65. // We need to know the encoding, and for simplicity only support UTF-8.
  66. //
  67. //   - When parsing, invalid UTF-8 is a syntax error like any other
  68. //
  69. //   - When creating Values from strings, callers must ensure they are UTF-8.
  70. //        with asserts on, invalid UTF-8 will crash the program
  71. //        with asserts off, we'll substitute the replacement character (U+FFFD)
  72. //     Callers can use json::isUTF8() and json::fixUTF8() for validation.
  73. //
  74. //   - When retrieving strings from Values (e.g. asString()), the result will
  75. //     always be valid UTF-8.
  76.  
  77. /// Returns true if \p S is valid UTF-8, which is required for use as JSON.
  78. /// If it returns false, \p Offset is set to a byte offset near the first error.
  79. bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
  80. /// Replaces invalid UTF-8 sequences in \p S with the replacement character
  81. /// (U+FFFD). The returned string is valid UTF-8.
  82. /// This is much slower than isUTF8, so test that first.
  83. std::string fixUTF8(llvm::StringRef S);
  84.  
  85. class Array;
  86. class ObjectKey;
  87. class Value;
  88. template <typename T> Value toJSON(const std::optional<T> &Opt);
  89.  
  90. /// An Object is a JSON object, which maps strings to heterogenous JSON values.
  91. /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string.
  92. class Object {
  93.   using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>;
  94.   Storage M;
  95.  
  96. public:
  97.   using key_type = ObjectKey;
  98.   using mapped_type = Value;
  99.   using value_type = Storage::value_type;
  100.   using iterator = Storage::iterator;
  101.   using const_iterator = Storage::const_iterator;
  102.  
  103.   Object() = default;
  104.   // KV is a trivial key-value struct for list-initialization.
  105.   // (using std::pair forces extra copies).
  106.   struct KV;
  107.   explicit Object(std::initializer_list<KV> Properties);
  108.  
  109.   iterator begin() { return M.begin(); }
  110.   const_iterator begin() const { return M.begin(); }
  111.   iterator end() { return M.end(); }
  112.   const_iterator end() const { return M.end(); }
  113.  
  114.   bool empty() const { return M.empty(); }
  115.   size_t size() const { return M.size(); }
  116.  
  117.   void clear() { M.clear(); }
  118.   std::pair<iterator, bool> insert(KV E);
  119.   template <typename... Ts>
  120.   std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) {
  121.     return M.try_emplace(K, std::forward<Ts>(Args)...);
  122.   }
  123.   template <typename... Ts>
  124.   std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) {
  125.     return M.try_emplace(std::move(K), std::forward<Ts>(Args)...);
  126.   }
  127.   bool erase(StringRef K);
  128.   void erase(iterator I) { M.erase(I); }
  129.  
  130.   iterator find(StringRef K) { return M.find_as(K); }
  131.   const_iterator find(StringRef K) const { return M.find_as(K); }
  132.   // operator[] acts as if Value was default-constructible as null.
  133.   Value &operator[](const ObjectKey &K);
  134.   Value &operator[](ObjectKey &&K);
  135.   // Look up a property, returning nullptr if it doesn't exist.
  136.   Value *get(StringRef K);
  137.   const Value *get(StringRef K) const;
  138.   // Typed accessors return std::nullopt/nullptr if
  139.   //   - the property doesn't exist
  140.   //   - or it has the wrong type
  141.   std::optional<std::nullptr_t> getNull(StringRef K) const;
  142.   std::optional<bool> getBoolean(StringRef K) const;
  143.   std::optional<double> getNumber(StringRef K) const;
  144.   std::optional<int64_t> getInteger(StringRef K) const;
  145.   std::optional<llvm::StringRef> getString(StringRef K) const;
  146.   const json::Object *getObject(StringRef K) const;
  147.   json::Object *getObject(StringRef K);
  148.   const json::Array *getArray(StringRef K) const;
  149.   json::Array *getArray(StringRef K);
  150. };
  151. bool operator==(const Object &LHS, const Object &RHS);
  152. inline bool operator!=(const Object &LHS, const Object &RHS) {
  153.   return !(LHS == RHS);
  154. }
  155.  
  156. /// An Array is a JSON array, which contains heterogeneous JSON values.
  157. /// It simulates std::vector<Value>.
  158. class Array {
  159.   std::vector<Value> V;
  160.  
  161. public:
  162.   using value_type = Value;
  163.   using iterator = std::vector<Value>::iterator;
  164.   using const_iterator = std::vector<Value>::const_iterator;
  165.  
  166.   Array() = default;
  167.   explicit Array(std::initializer_list<Value> Elements);
  168.   template <typename Collection> explicit Array(const Collection &C) {
  169.     for (const auto &V : C)
  170.       emplace_back(V);
  171.   }
  172.  
  173.   Value &operator[](size_t I);
  174.   const Value &operator[](size_t I) const;
  175.   Value &front();
  176.   const Value &front() const;
  177.   Value &back();
  178.   const Value &back() const;
  179.   Value *data();
  180.   const Value *data() const;
  181.  
  182.   iterator begin();
  183.   const_iterator begin() const;
  184.   iterator end();
  185.   const_iterator end() const;
  186.  
  187.   bool empty() const;
  188.   size_t size() const;
  189.   void reserve(size_t S);
  190.  
  191.   void clear();
  192.   void push_back(const Value &E);
  193.   void push_back(Value &&E);
  194.   template <typename... Args> void emplace_back(Args &&...A);
  195.   void pop_back();
  196.   iterator insert(const_iterator P, const Value &E);
  197.   iterator insert(const_iterator P, Value &&E);
  198.   template <typename It> iterator insert(const_iterator P, It A, It Z);
  199.   template <typename... Args> iterator emplace(const_iterator P, Args &&...A);
  200.  
  201.   friend bool operator==(const Array &L, const Array &R);
  202. };
  203. inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
  204.  
  205. /// A Value is an JSON value of unknown type.
  206. /// They can be copied, but should generally be moved.
  207. ///
  208. /// === Composing values ===
  209. ///
  210. /// You can implicitly construct Values from:
  211. ///   - strings: std::string, SmallString, formatv, StringRef, char*
  212. ///              (char*, and StringRef are references, not copies!)
  213. ///   - numbers
  214. ///   - booleans
  215. ///   - null: nullptr
  216. ///   - arrays: {"foo", 42.0, false}
  217. ///   - serializable things: types with toJSON(const T&)->Value, found by ADL
  218. ///
  219. /// They can also be constructed from object/array helpers:
  220. ///   - json::Object is a type like map<ObjectKey, Value>
  221. ///   - json::Array is a type like vector<Value>
  222. /// These can be list-initialized, or used to build up collections in a loop.
  223. /// json::ary(Collection) converts all items in a collection to Values.
  224. ///
  225. /// === Inspecting values ===
  226. ///
  227. /// Each Value is one of the JSON kinds:
  228. ///   null    (nullptr_t)
  229. ///   boolean (bool)
  230. ///   number  (double, int64 or uint64)
  231. ///   string  (StringRef)
  232. ///   array   (json::Array)
  233. ///   object  (json::Object)
  234. ///
  235. /// The kind can be queried directly, or implicitly via the typed accessors:
  236. ///   if (std::optional<StringRef> S = E.getAsString()
  237. ///     assert(E.kind() == Value::String);
  238. ///
  239. /// Array and Object also have typed indexing accessors for easy traversal:
  240. ///   Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )");
  241. ///   if (Object* O = E->getAsObject())
  242. ///     if (Object* Opts = O->getObject("options"))
  243. ///       if (std::optional<StringRef> Font = Opts->getString("font"))
  244. ///         assert(Opts->at("font").kind() == Value::String);
  245. ///
  246. /// === Converting JSON values to C++ types ===
  247. ///
  248. /// The convention is to have a deserializer function findable via ADL:
  249. ///     fromJSON(const json::Value&, T&, Path) -> bool
  250. ///
  251. /// The return value indicates overall success, and Path is used for precise
  252. /// error reporting. (The Path::Root passed in at the top level fromJSON call
  253. /// captures any nested error and can render it in context).
  254. /// If conversion fails, fromJSON calls Path::report() and immediately returns.
  255. /// This ensures that the first fatal error survives.
  256. ///
  257. /// Deserializers are provided for:
  258. ///   - bool
  259. ///   - int and int64_t
  260. ///   - double
  261. ///   - std::string
  262. ///   - vector<T>, where T is deserializable
  263. ///   - map<string, T>, where T is deserializable
  264. ///   - std::optional<T>, where T is deserializable
  265. /// ObjectMapper can help writing fromJSON() functions for object types.
  266. ///
  267. /// For conversion in the other direction, the serializer function is:
  268. ///    toJSON(const T&) -> json::Value
  269. /// If this exists, then it also allows constructing Value from T, and can
  270. /// be used to serialize vector<T>, map<string, T>, and std::optional<T>.
  271. ///
  272. /// === Serialization ===
  273. ///
  274. /// Values can be serialized to JSON:
  275. ///   1) raw_ostream << Value                    // Basic formatting.
  276. ///   2) raw_ostream << formatv("{0}", Value)    // Basic formatting.
  277. ///   3) raw_ostream << formatv("{0:2}", Value)  // Pretty-print with indent 2.
  278. ///
  279. /// And parsed:
  280. ///   Expected<Value> E = json::parse("[1, 2, null]");
  281. ///   assert(E && E->kind() == Value::Array);
  282. class Value {
  283. public:
  284.   enum Kind {
  285.     Null,
  286.     Boolean,
  287.     /// Number values can store both int64s and doubles at full precision,
  288.     /// depending on what they were constructed/parsed from.
  289.     Number,
  290.     String,
  291.     Array,
  292.     Object,
  293.   };
  294.  
  295.   // It would be nice to have Value() be null. But that would make {} null too.
  296.   Value(const Value &M) { copyFrom(M); }
  297.   Value(Value &&M) { moveFrom(std::move(M)); }
  298.   Value(std::initializer_list<Value> Elements);
  299.   Value(json::Array &&Elements) : Type(T_Array) {
  300.     create<json::Array>(std::move(Elements));
  301.   }
  302.   template <typename Elt>
  303.   Value(const std::vector<Elt> &C) : Value(json::Array(C)) {}
  304.   Value(json::Object &&Properties) : Type(T_Object) {
  305.     create<json::Object>(std::move(Properties));
  306.   }
  307.   template <typename Elt>
  308.   Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {}
  309.   // Strings: types with value semantics. Must be valid UTF-8.
  310.   Value(std::string V) : Type(T_String) {
  311.     if (LLVM_UNLIKELY(!isUTF8(V))) {
  312.       assert(false && "Invalid UTF-8 in value used as JSON");
  313.       V = fixUTF8(std::move(V));
  314.     }
  315.     create<std::string>(std::move(V));
  316.   }
  317.   Value(const llvm::SmallVectorImpl<char> &V)
  318.       : Value(std::string(V.begin(), V.end())) {}
  319.   Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
  320.   // Strings: types with reference semantics. Must be valid UTF-8.
  321.   Value(StringRef V) : Type(T_StringRef) {
  322.     create<llvm::StringRef>(V);
  323.     if (LLVM_UNLIKELY(!isUTF8(V))) {
  324.       assert(false && "Invalid UTF-8 in value used as JSON");
  325.       *this = Value(fixUTF8(V));
  326.     }
  327.   }
  328.   Value(const char *V) : Value(StringRef(V)) {}
  329.   Value(std::nullptr_t) : Type(T_Null) {}
  330.   // Boolean (disallow implicit conversions).
  331.   // (The last template parameter is a dummy to keep templates distinct.)
  332.   template <typename T,
  333.             typename = std::enable_if_t<std::is_same<T, bool>::value>,
  334.             bool = false>
  335.   Value(T B) : Type(T_Boolean) {
  336.     create<bool>(B);
  337.   }
  338.  
  339.   // Unsigned 64-bit long integers.
  340.   template <typename T,
  341.             typename = std::enable_if_t<std::is_same<T, uint64_t>::value>,
  342.             bool = false, bool = false>
  343.   Value(T V) : Type(T_UINT64) {
  344.     create<uint64_t>(uint64_t{V});
  345.   }
  346.  
  347.   // Integers (except boolean and uint64_t).
  348.   // Must be non-narrowing convertible to int64_t.
  349.   template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
  350.             typename = std::enable_if_t<!std::is_same<T, bool>::value>,
  351.             typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>>
  352.   Value(T I) : Type(T_Integer) {
  353.     create<int64_t>(int64_t{I});
  354.   }
  355.   // Floating point. Must be non-narrowing convertible to double.
  356.   template <typename T,
  357.             typename = std::enable_if_t<std::is_floating_point<T>::value>,
  358.             double * = nullptr>
  359.   Value(T D) : Type(T_Double) {
  360.     create<double>(double{D});
  361.   }
  362.   // Serializable types: with a toJSON(const T&)->Value function, found by ADL.
  363.   template <typename T,
  364.             typename = std::enable_if_t<std::is_same<
  365.                 Value, decltype(toJSON(*(const T *)nullptr))>::value>,
  366.             Value * = nullptr>
  367.   Value(const T &V) : Value(toJSON(V)) {}
  368.  
  369.   Value &operator=(const Value &M) {
  370.     destroy();
  371.     copyFrom(M);
  372.     return *this;
  373.   }
  374.   Value &operator=(Value &&M) {
  375.     destroy();
  376.     moveFrom(std::move(M));
  377.     return *this;
  378.   }
  379.   ~Value() { destroy(); }
  380.  
  381.   Kind kind() const {
  382.     switch (Type) {
  383.     case T_Null:
  384.       return Null;
  385.     case T_Boolean:
  386.       return Boolean;
  387.     case T_Double:
  388.     case T_Integer:
  389.     case T_UINT64:
  390.       return Number;
  391.     case T_String:
  392.     case T_StringRef:
  393.       return String;
  394.     case T_Object:
  395.       return Object;
  396.     case T_Array:
  397.       return Array;
  398.     }
  399.     llvm_unreachable("Unknown kind");
  400.   }
  401.  
  402.   // Typed accessors return std::nullopt/nullptr if the Value is not of this
  403.   // type.
  404.   std::optional<std::nullptr_t> getAsNull() const {
  405.     if (LLVM_LIKELY(Type == T_Null))
  406.       return nullptr;
  407.     return std::nullopt;
  408.   }
  409.   std::optional<bool> getAsBoolean() const {
  410.     if (LLVM_LIKELY(Type == T_Boolean))
  411.       return as<bool>();
  412.     return std::nullopt;
  413.   }
  414.   std::optional<double> getAsNumber() const {
  415.     if (LLVM_LIKELY(Type == T_Double))
  416.       return as<double>();
  417.     if (LLVM_LIKELY(Type == T_Integer))
  418.       return as<int64_t>();
  419.     if (LLVM_LIKELY(Type == T_UINT64))
  420.       return as<uint64_t>();
  421.     return std::nullopt;
  422.   }
  423.   // Succeeds if the Value is a Number, and exactly representable as int64_t.
  424.   std::optional<int64_t> getAsInteger() const {
  425.     if (LLVM_LIKELY(Type == T_Integer))
  426.       return as<int64_t>();
  427.     if (LLVM_LIKELY(Type == T_Double)) {
  428.       double D = as<double>();
  429.       if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
  430.                       D >= double(std::numeric_limits<int64_t>::min()) &&
  431.                       D <= double(std::numeric_limits<int64_t>::max())))
  432.         return D;
  433.     }
  434.     return std::nullopt;
  435.   }
  436.   std::optional<uint64_t> getAsUINT64() const {
  437.     if (Type == T_UINT64)
  438.       return as<uint64_t>();
  439.     else if (Type == T_Integer) {
  440.       int64_t N = as<int64_t>();
  441.       if (N >= 0)
  442.         return as<uint64_t>();
  443.     }
  444.     return std::nullopt;
  445.   }
  446.   std::optional<llvm::StringRef> getAsString() const {
  447.     if (Type == T_String)
  448.       return llvm::StringRef(as<std::string>());
  449.     if (LLVM_LIKELY(Type == T_StringRef))
  450.       return as<llvm::StringRef>();
  451.     return std::nullopt;
  452.   }
  453.   const json::Object *getAsObject() const {
  454.     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
  455.   }
  456.   json::Object *getAsObject() {
  457.     return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr;
  458.   }
  459.   const json::Array *getAsArray() const {
  460.     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
  461.   }
  462.   json::Array *getAsArray() {
  463.     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
  464.   }
  465.  
  466. private:
  467.   void destroy();
  468.   void copyFrom(const Value &M);
  469.   // We allow moving from *const* Values, by marking all members as mutable!
  470.   // This hack is needed to support initializer-list syntax efficiently.
  471.   // (std::initializer_list<T> is a container of const T).
  472.   void moveFrom(const Value &&M);
  473.   friend class Array;
  474.   friend class Object;
  475.  
  476.   template <typename T, typename... U> void create(U &&... V) {
  477.     new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...);
  478.   }
  479.   template <typename T> T &as() const {
  480.     // Using this two-step static_cast via void * instead of reinterpret_cast
  481.     // silences a -Wstrict-aliasing false positive from GCC6 and earlier.
  482.     void *Storage = static_cast<void *>(&Union);
  483.     return *static_cast<T *>(Storage);
  484.   }
  485.  
  486.   friend class OStream;
  487.  
  488.   enum ValueType : char16_t {
  489.     T_Null,
  490.     T_Boolean,
  491.     T_Double,
  492.     T_Integer,
  493.     T_UINT64,
  494.     T_StringRef,
  495.     T_String,
  496.     T_Object,
  497.     T_Array,
  498.   };
  499.   // All members mutable, see moveFrom().
  500.   mutable ValueType Type;
  501.   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
  502.                                       llvm::StringRef, std::string, json::Array,
  503.                                       json::Object>
  504.       Union;
  505.   friend bool operator==(const Value &, const Value &);
  506. };
  507.  
  508. bool operator==(const Value &, const Value &);
  509. inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
  510.  
  511. // Array Methods
  512. inline Value &Array::operator[](size_t I) { return V[I]; }
  513. inline const Value &Array::operator[](size_t I) const { return V[I]; }
  514. inline Value &Array::front() { return V.front(); }
  515. inline const Value &Array::front() const { return V.front(); }
  516. inline Value &Array::back() { return V.back(); }
  517. inline const Value &Array::back() const { return V.back(); }
  518. inline Value *Array::data() { return V.data(); }
  519. inline const Value *Array::data() const { return V.data(); }
  520.  
  521. inline typename Array::iterator Array::begin() { return V.begin(); }
  522. inline typename Array::const_iterator Array::begin() const { return V.begin(); }
  523. inline typename Array::iterator Array::end() { return V.end(); }
  524. inline typename Array::const_iterator Array::end() const { return V.end(); }
  525.  
  526. inline bool Array::empty() const { return V.empty(); }
  527. inline size_t Array::size() const { return V.size(); }
  528. inline void Array::reserve(size_t S) { V.reserve(S); }
  529.  
  530. inline void Array::clear() { V.clear(); }
  531. inline void Array::push_back(const Value &E) { V.push_back(E); }
  532. inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); }
  533. template <typename... Args> inline void Array::emplace_back(Args &&...A) {
  534.   V.emplace_back(std::forward<Args>(A)...);
  535. }
  536. inline void Array::pop_back() { V.pop_back(); }
  537. inline typename Array::iterator Array::insert(const_iterator P, const Value &E) {
  538.   return V.insert(P, E);
  539. }
  540. inline typename Array::iterator Array::insert(const_iterator P, Value &&E) {
  541.   return V.insert(P, std::move(E));
  542. }
  543. template <typename It>
  544. inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) {
  545.   return V.insert(P, A, Z);
  546. }
  547. template <typename... Args>
  548. inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) {
  549.   return V.emplace(P, std::forward<Args>(A)...);
  550. }
  551. inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; }
  552.  
  553. /// ObjectKey is a used to capture keys in Object. Like Value but:
  554. ///   - only strings are allowed
  555. ///   - it's optimized for the string literal case (Owned == nullptr)
  556. /// Like Value, strings must be UTF-8. See isUTF8 documentation for details.
  557. class ObjectKey {
  558. public:
  559.   ObjectKey(const char *S) : ObjectKey(StringRef(S)) {}
  560.   ObjectKey(std::string S) : Owned(new std::string(std::move(S))) {
  561.     if (LLVM_UNLIKELY(!isUTF8(*Owned))) {
  562.       assert(false && "Invalid UTF-8 in value used as JSON");
  563.       *Owned = fixUTF8(std::move(*Owned));
  564.     }
  565.     Data = *Owned;
  566.   }
  567.   ObjectKey(llvm::StringRef S) : Data(S) {
  568.     if (LLVM_UNLIKELY(!isUTF8(Data))) {
  569.       assert(false && "Invalid UTF-8 in value used as JSON");
  570.       *this = ObjectKey(fixUTF8(S));
  571.     }
  572.   }
  573.   ObjectKey(const llvm::SmallVectorImpl<char> &V)
  574.       : ObjectKey(std::string(V.begin(), V.end())) {}
  575.   ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {}
  576.  
  577.   ObjectKey(const ObjectKey &C) { *this = C; }
  578.   ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {}
  579.   ObjectKey &operator=(const ObjectKey &C) {
  580.     if (C.Owned) {
  581.       Owned.reset(new std::string(*C.Owned));
  582.       Data = *Owned;
  583.     } else {
  584.       Data = C.Data;
  585.     }
  586.     return *this;
  587.   }
  588.   ObjectKey &operator=(ObjectKey &&) = default;
  589.  
  590.   operator llvm::StringRef() const { return Data; }
  591.   std::string str() const { return Data.str(); }
  592.  
  593. private:
  594.   // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned
  595.   // could be 2 pointers at most.
  596.   std::unique_ptr<std::string> Owned;
  597.   llvm::StringRef Data;
  598. };
  599.  
  600. inline bool operator==(const ObjectKey &L, const ObjectKey &R) {
  601.   return llvm::StringRef(L) == llvm::StringRef(R);
  602. }
  603. inline bool operator!=(const ObjectKey &L, const ObjectKey &R) {
  604.   return !(L == R);
  605. }
  606. inline bool operator<(const ObjectKey &L, const ObjectKey &R) {
  607.   return StringRef(L) < StringRef(R);
  608. }
  609.  
  610. struct Object::KV {
  611.   ObjectKey K;
  612.   Value V;
  613. };
  614.  
  615. inline Object::Object(std::initializer_list<KV> Properties) {
  616.   for (const auto &P : Properties) {
  617.     auto R = try_emplace(P.K, nullptr);
  618.     if (R.second)
  619.       R.first->getSecond().moveFrom(std::move(P.V));
  620.   }
  621. }
  622. inline std::pair<Object::iterator, bool> Object::insert(KV E) {
  623.   return try_emplace(std::move(E.K), std::move(E.V));
  624. }
  625. inline bool Object::erase(StringRef K) {
  626.   return M.erase(ObjectKey(K));
  627. }
  628.  
  629. /// A "cursor" marking a position within a Value.
  630. /// The Value is a tree, and this is the path from the root to the current node.
  631. /// This is used to associate errors with particular subobjects.
  632. class Path {
  633. public:
  634.   class Root;
  635.  
  636.   /// Records that the value at the current path is invalid.
  637.   /// Message is e.g. "expected number" and becomes part of the final error.
  638.   /// This overwrites any previously written error message in the root.
  639.   void report(llvm::StringLiteral Message);
  640.  
  641.   /// The root may be treated as a Path.
  642.   Path(Root &R) : Parent(nullptr), Seg(&R) {}
  643.   /// Derives a path for an array element: this[Index]
  644.   Path index(unsigned Index) const { return Path(this, Segment(Index)); }
  645.   /// Derives a path for an object field: this.Field
  646.   Path field(StringRef Field) const { return Path(this, Segment(Field)); }
  647.  
  648. private:
  649.   /// One element in a JSON path: an object field (.foo) or array index [27].
  650.   /// Exception: the root Path encodes a pointer to the Path::Root.
  651.   class Segment {
  652.     uintptr_t Pointer;
  653.     unsigned Offset;
  654.  
  655.   public:
  656.     Segment() = default;
  657.     Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
  658.     Segment(llvm::StringRef Field)
  659.         : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
  660.           Offset(static_cast<unsigned>(Field.size())) {}
  661.     Segment(unsigned Index) : Pointer(0), Offset(Index) {}
  662.  
  663.     bool isField() const { return Pointer != 0; }
  664.     StringRef field() const {
  665.       return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
  666.     }
  667.     unsigned index() const { return Offset; }
  668.     Root *root() const { return reinterpret_cast<Root *>(Pointer); }
  669.   };
  670.  
  671.   const Path *Parent;
  672.   Segment Seg;
  673.  
  674.   Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
  675. };
  676.  
  677. /// The root is the trivial Path to the root value.
  678. /// It also stores the latest reported error and the path where it occurred.
  679. class Path::Root {
  680.   llvm::StringRef Name;
  681.   llvm::StringLiteral ErrorMessage;
  682.   std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
  683.  
  684.   friend void Path::report(llvm::StringLiteral Message);
  685.  
  686. public:
  687.   Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
  688.   // No copy/move allowed as there are incoming pointers.
  689.   Root(Root &&) = delete;
  690.   Root &operator=(Root &&) = delete;
  691.   Root(const Root &) = delete;
  692.   Root &operator=(const Root &) = delete;
  693.  
  694.   /// Returns the last error reported, or else a generic error.
  695.   Error getError() const;
  696.   /// Print the root value with the error shown inline as a comment.
  697.   /// Unrelated parts of the value are elided for brevity, e.g.
  698.   ///   {
  699.   ///      "id": 42,
  700.   ///      "name": /* expected string */ null,
  701.   ///      "properties": { ... }
  702.   ///   }
  703.   void printErrorContext(const Value &, llvm::raw_ostream &) const;
  704. };
  705.  
  706. // Standard deserializers are provided for primitive types.
  707. // See comments on Value.
  708. inline bool fromJSON(const Value &E, std::string &Out, Path P) {
  709.   if (auto S = E.getAsString()) {
  710.     Out = std::string(*S);
  711.     return true;
  712.   }
  713.   P.report("expected string");
  714.   return false;
  715. }
  716. inline bool fromJSON(const Value &E, int &Out, Path P) {
  717.   if (auto S = E.getAsInteger()) {
  718.     Out = *S;
  719.     return true;
  720.   }
  721.   P.report("expected integer");
  722.   return false;
  723. }
  724. inline bool fromJSON(const Value &E, int64_t &Out, Path P) {
  725.   if (auto S = E.getAsInteger()) {
  726.     Out = *S;
  727.     return true;
  728.   }
  729.   P.report("expected integer");
  730.   return false;
  731. }
  732. inline bool fromJSON(const Value &E, double &Out, Path P) {
  733.   if (auto S = E.getAsNumber()) {
  734.     Out = *S;
  735.     return true;
  736.   }
  737.   P.report("expected number");
  738.   return false;
  739. }
  740. inline bool fromJSON(const Value &E, bool &Out, Path P) {
  741.   if (auto S = E.getAsBoolean()) {
  742.     Out = *S;
  743.     return true;
  744.   }
  745.   P.report("expected boolean");
  746.   return false;
  747. }
  748. inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
  749.   if (auto S = E.getAsUINT64()) {
  750.     Out = *S;
  751.     return true;
  752.   }
  753.   P.report("expected uint64_t");
  754.   return false;
  755. }
  756. inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
  757.   if (auto S = E.getAsNull()) {
  758.     Out = *S;
  759.     return true;
  760.   }
  761.   P.report("expected null");
  762.   return false;
  763. }
  764. template <typename T>
  765. bool fromJSON(const Value &E, std::optional<T> &Out, Path P) {
  766.   if (E.getAsNull()) {
  767.     Out = std::nullopt;
  768.     return true;
  769.   }
  770.   T Result;
  771.   if (!fromJSON(E, Result, P))
  772.     return false;
  773.   Out = std::move(Result);
  774.   return true;
  775. }
  776. template <typename T>
  777. bool fromJSON(const Value &E, std::vector<T> &Out, Path P) {
  778.   if (auto *A = E.getAsArray()) {
  779.     Out.clear();
  780.     Out.resize(A->size());
  781.     for (size_t I = 0; I < A->size(); ++I)
  782.       if (!fromJSON((*A)[I], Out[I], P.index(I)))
  783.         return false;
  784.     return true;
  785.   }
  786.   P.report("expected array");
  787.   return false;
  788. }
  789. template <typename T>
  790. bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) {
  791.   if (auto *O = E.getAsObject()) {
  792.     Out.clear();
  793.     for (const auto &KV : *O)
  794.       if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))],
  795.                     P.field(KV.first)))
  796.         return false;
  797.     return true;
  798.   }
  799.   P.report("expected object");
  800.   return false;
  801. }
  802.  
  803. // Allow serialization of std::optional<T> for supported T.
  804. template <typename T> Value toJSON(const std::optional<T> &Opt) {
  805.   return Opt ? Value(*Opt) : Value(nullptr);
  806. }
  807.  
  808. /// Helper for mapping JSON objects onto protocol structs.
  809. ///
  810. /// Example:
  811. /// \code
  812. ///   bool fromJSON(const Value &E, MyStruct &R, Path P) {
  813. ///     ObjectMapper O(E, P);
  814. ///     // When returning false, error details were already reported.
  815. ///     return O && O.map("mandatory_field", R.MandatoryField) &&
  816. ///         O.mapOptional("optional_field", R.OptionalField);
  817. ///   }
  818. /// \endcode
  819. class ObjectMapper {
  820. public:
  821.   /// If O is not an object, this mapper is invalid and an error is reported.
  822.   ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) {
  823.     if (!O)
  824.       P.report("expected object");
  825.   }
  826.  
  827.   /// True if the expression is an object.
  828.   /// Must be checked before calling map().
  829.   operator bool() const { return O; }
  830.  
  831.   /// Maps a property to a field.
  832.   /// If the property is missing or invalid, reports an error.
  833.   template <typename T> bool map(StringLiteral Prop, T &Out) {
  834.     assert(*this && "Must check this is an object before calling map()");
  835.     if (const Value *E = O->get(Prop))
  836.       return fromJSON(*E, Out, P.field(Prop));
  837.     P.field(Prop).report("missing value");
  838.     return false;
  839.   }
  840.  
  841.   /// Maps a property to a field, if it exists.
  842.   /// If the property exists and is invalid, reports an error.
  843.   /// (Optional requires special handling, because missing keys are OK).
  844.   template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) {
  845.     assert(*this && "Must check this is an object before calling map()");
  846.     if (const Value *E = O->get(Prop))
  847.       return fromJSON(*E, Out, P.field(Prop));
  848.     Out = std::nullopt;
  849.     return true;
  850.   }
  851.  
  852.   /// Maps a property to a field, if it exists.
  853.   /// If the property exists and is invalid, reports an error.
  854.   /// If the property does not exist, Out is unchanged.
  855.   template <typename T> bool mapOptional(StringLiteral Prop, T &Out) {
  856.     assert(*this && "Must check this is an object before calling map()");
  857.     if (const Value *E = O->get(Prop))
  858.       return fromJSON(*E, Out, P.field(Prop));
  859.     return true;
  860.   }
  861.  
  862. private:
  863.   const Object *O;
  864.   Path P;
  865. };
  866.  
  867. /// Parses the provided JSON source, or returns a ParseError.
  868. /// The returned Value is self-contained and owns its strings (they do not refer
  869. /// to the original source).
  870. llvm::Expected<Value> parse(llvm::StringRef JSON);
  871.  
  872. class ParseError : public llvm::ErrorInfo<ParseError> {
  873.   const char *Msg;
  874.   unsigned Line, Column, Offset;
  875.  
  876. public:
  877.   static char ID;
  878.   ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset)
  879.       : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {}
  880.   void log(llvm::raw_ostream &OS) const override {
  881.     OS << llvm::formatv("[{0}:{1}, byte={2}]: {3}", Line, Column, Offset, Msg);
  882.   }
  883.   std::error_code convertToErrorCode() const override {
  884.     return llvm::inconvertibleErrorCode();
  885.   }
  886. };
  887.  
  888. /// Version of parse() that converts the parsed value to the type T.
  889. /// RootName describes the root object and is used in error messages.
  890. template <typename T>
  891. Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") {
  892.   auto V = parse(JSON);
  893.   if (!V)
  894.     return V.takeError();
  895.   Path::Root R(RootName);
  896.   T Result;
  897.   if (fromJSON(*V, Result, R))
  898.     return std::move(Result);
  899.   return R.getError();
  900. }
  901.  
  902. /// json::OStream allows writing well-formed JSON without materializing
  903. /// all structures as json::Value ahead of time.
  904. /// It's faster, lower-level, and less safe than OS << json::Value.
  905. /// It also allows emitting more constructs, such as comments.
  906. ///
  907. /// Only one "top-level" object can be written to a stream.
  908. /// Simplest usage involves passing lambdas (Blocks) to fill in containers:
  909. ///
  910. ///   json::OStream J(OS);
  911. ///   J.array([&]{
  912. ///     for (const Event &E : Events)
  913. ///       J.object([&] {
  914. ///         J.attribute("timestamp", int64_t(E.Time));
  915. ///         J.attributeArray("participants", [&] {
  916. ///           for (const Participant &P : E.Participants)
  917. ///             J.value(P.toString());
  918. ///         });
  919. ///       });
  920. ///   });
  921. ///
  922. /// This would produce JSON like:
  923. ///
  924. ///   [
  925. ///     {
  926. ///       "timestamp": 19287398741,
  927. ///       "participants": [
  928. ///         "King Kong",
  929. ///         "Miley Cyrus",
  930. ///         "Cleopatra"
  931. ///       ]
  932. ///     },
  933. ///     ...
  934. ///   ]
  935. ///
  936. /// The lower level begin/end methods (arrayBegin()) are more flexible but
  937. /// care must be taken to pair them correctly:
  938. ///
  939. ///   json::OStream J(OS);
  940. //    J.arrayBegin();
  941. ///   for (const Event &E : Events) {
  942. ///     J.objectBegin();
  943. ///     J.attribute("timestamp", int64_t(E.Time));
  944. ///     J.attributeBegin("participants");
  945. ///     for (const Participant &P : E.Participants)
  946. ///       J.value(P.toString());
  947. ///     J.attributeEnd();
  948. ///     J.objectEnd();
  949. ///   }
  950. ///   J.arrayEnd();
  951. ///
  952. /// If the call sequence isn't valid JSON, asserts will fire in debug mode.
  953. /// This can be mismatched begin()/end() pairs, trying to emit attributes inside
  954. /// an array, and so on.
  955. /// With asserts disabled, this is undefined behavior.
  956. class OStream {
  957.  public:
  958.   using Block = llvm::function_ref<void()>;
  959.   // If IndentSize is nonzero, output is pretty-printed.
  960.   explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
  961.       : OS(OS), IndentSize(IndentSize) {
  962.     Stack.emplace_back();
  963.   }
  964.   ~OStream() {
  965.     assert(Stack.size() == 1 && "Unmatched begin()/end()");
  966.     assert(Stack.back().Ctx == Singleton);
  967.     assert(Stack.back().HasValue && "Did not write top-level value");
  968.   }
  969.  
  970.   /// Flushes the underlying ostream. OStream does not buffer internally.
  971.   void flush() { OS.flush(); }
  972.  
  973.   // High level functions to output a value.
  974.   // Valid at top-level (exactly once), in an attribute value (exactly once),
  975.   // or in an array (any number of times).
  976.  
  977.   /// Emit a self-contained value (number, string, vector<string> etc).
  978.   void value(const Value &V);
  979.   /// Emit an array whose elements are emitted in the provided Block.
  980.   void array(Block Contents) {
  981.     arrayBegin();
  982.     Contents();
  983.     arrayEnd();
  984.   }
  985.   /// Emit an object whose elements are emitted in the provided Block.
  986.   void object(Block Contents) {
  987.     objectBegin();
  988.     Contents();
  989.     objectEnd();
  990.   }
  991.   /// Emit an externally-serialized value.
  992.   /// The caller must write exactly one valid JSON value to the provided stream.
  993.   /// No validation or formatting of this value occurs.
  994.   void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) {
  995.     rawValueBegin();
  996.     Contents(OS);
  997.     rawValueEnd();
  998.   }
  999.   void rawValue(llvm::StringRef Contents) {
  1000.     rawValue([&](raw_ostream &OS) { OS << Contents; });
  1001.   }
  1002.   /// Emit a JavaScript comment associated with the next printed value.
  1003.   /// The string must be valid until the next attribute or value is emitted.
  1004.   /// Comments are not part of standard JSON, and many parsers reject them!
  1005.   void comment(llvm::StringRef);
  1006.  
  1007.   // High level functions to output object attributes.
  1008.   // Valid only within an object (any number of times).
  1009.  
  1010.   /// Emit an attribute whose value is self-contained (number, vector<int> etc).
  1011.   void attribute(llvm::StringRef Key, const Value& Contents) {
  1012.     attributeImpl(Key, [&] { value(Contents); });
  1013.   }
  1014.   /// Emit an attribute whose value is an array with elements from the Block.
  1015.   void attributeArray(llvm::StringRef Key, Block Contents) {
  1016.     attributeImpl(Key, [&] { array(Contents); });
  1017.   }
  1018.   /// Emit an attribute whose value is an object with attributes from the Block.
  1019.   void attributeObject(llvm::StringRef Key, Block Contents) {
  1020.     attributeImpl(Key, [&] { object(Contents); });
  1021.   }
  1022.  
  1023.   // Low-level begin/end functions to output arrays, objects, and attributes.
  1024.   // Must be correctly paired. Allowed contexts are as above.
  1025.  
  1026.   void arrayBegin();
  1027.   void arrayEnd();
  1028.   void objectBegin();
  1029.   void objectEnd();
  1030.   void attributeBegin(llvm::StringRef Key);
  1031.   void attributeEnd();
  1032.   raw_ostream &rawValueBegin();
  1033.   void rawValueEnd();
  1034.  
  1035. private:
  1036.   void attributeImpl(llvm::StringRef Key, Block Contents) {
  1037.     attributeBegin(Key);
  1038.     Contents();
  1039.     attributeEnd();
  1040.   }
  1041.  
  1042.   void valueBegin();
  1043.   void flushComment();
  1044.   void newline();
  1045.  
  1046.   enum Context {
  1047.     Singleton, // Top level, or object attribute.
  1048.     Array,
  1049.     Object,
  1050.     RawValue, // External code writing a value to OS directly.
  1051.   };
  1052.   struct State {
  1053.     Context Ctx = Singleton;
  1054.     bool HasValue = false;
  1055.   };
  1056.   llvm::SmallVector<State, 16> Stack; // Never empty.
  1057.   llvm::StringRef PendingComment;
  1058.   llvm::raw_ostream &OS;
  1059.   unsigned IndentSize;
  1060.   unsigned Indent = 0;
  1061. };
  1062.  
  1063. /// Serializes this Value to JSON, writing it to the provided stream.
  1064. /// The formatting is compact (no extra whitespace) and deterministic.
  1065. /// For pretty-printing, use the formatv() format_provider below.
  1066. inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
  1067.   OStream(OS).value(V);
  1068.   return OS;
  1069. }
  1070. } // namespace json
  1071.  
  1072. /// Allow printing json::Value with formatv().
  1073. /// The default style is basic/compact formatting, like operator<<.
  1074. /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2.
  1075. template <> struct format_provider<llvm::json::Value> {
  1076.   static void format(const llvm::json::Value &, raw_ostream &, StringRef);
  1077. };
  1078. } // namespace llvm
  1079.  
  1080. #endif
  1081.