Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //  This file defines a DataflowAnalysisContext class that owns objects that
  10. //  encompass the state of a program and stores context that is used during
  11. //  dataflow analysis.
  12. //
  13. //===----------------------------------------------------------------------===//
  14.  
  15. #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
  16. #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
  17.  
  18. #include "clang/AST/Decl.h"
  19. #include "clang/AST/Expr.h"
  20. #include "clang/AST/TypeOrdering.h"
  21. #include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
  22. #include "clang/Analysis/FlowSensitive/Solver.h"
  23. #include "clang/Analysis/FlowSensitive/StorageLocation.h"
  24. #include "clang/Analysis/FlowSensitive/Value.h"
  25. #include "llvm/ADT/DenseMap.h"
  26. #include "llvm/ADT/DenseSet.h"
  27. #include "llvm/Support/Compiler.h"
  28. #include <cassert>
  29. #include <memory>
  30. #include <optional>
  31. #include <type_traits>
  32. #include <utility>
  33. #include <vector>
  34.  
  35. namespace clang {
  36. namespace dataflow {
  37.  
  38. /// Skip past nodes that the CFG does not emit. These nodes are invisible to
  39. /// flow-sensitive analysis, and should be ignored as they will effectively not
  40. /// exist.
  41. ///
  42. ///   * `ParenExpr` - The CFG takes the operator precedence into account, but
  43. ///   otherwise omits the node afterwards.
  44. ///
  45. ///   * `ExprWithCleanups` - The CFG will generate the appropriate calls to
  46. ///   destructors and then omit the node.
  47. ///
  48. const Expr &ignoreCFGOmittedNodes(const Expr &E);
  49. const Stmt &ignoreCFGOmittedNodes(const Stmt &S);
  50.  
  51. /// Returns the set of all fields in the type.
  52. llvm::DenseSet<const FieldDecl *> getObjectFields(QualType Type);
  53.  
  54. struct ContextSensitiveOptions {
  55.   /// The maximum depth to analyze. A value of zero is equivalent to disabling
  56.   /// context-sensitive analysis entirely.
  57.   unsigned Depth = 2;
  58. };
  59.  
  60. /// Owns objects that encompass the state of a program and stores context that
  61. /// is used during dataflow analysis.
  62. class DataflowAnalysisContext {
  63. public:
  64.   struct Options {
  65.     /// Options for analyzing function bodies when present in the translation
  66.     /// unit, or empty to disable context-sensitive analysis. Note that this is
  67.     /// fundamentally limited: some constructs, such as recursion, are
  68.     /// explicitly unsupported.
  69.     std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
  70.   };
  71.  
  72.   /// Constructs a dataflow analysis context.
  73.   ///
  74.   /// Requirements:
  75.   ///
  76.   ///  `S` must not be null.
  77.   DataflowAnalysisContext(std::unique_ptr<Solver> S,
  78.                           Options Opts = Options{
  79.                               /*ContextSensitiveOpts=*/std::nullopt})
  80.       : S(std::move(S)), TrueVal(createAtomicBoolValue()),
  81.         FalseVal(createAtomicBoolValue()), Opts(Opts) {
  82.     assert(this->S != nullptr);
  83.   }
  84.  
  85.   /// Takes ownership of `Loc` and returns a reference to it.
  86.   ///
  87.   /// Requirements:
  88.   ///
  89.   ///  `Loc` must not be null.
  90.   template <typename T>
  91.   std::enable_if_t<std::is_base_of<StorageLocation, T>::value, T &>
  92.   takeOwnership(std::unique_ptr<T> Loc) {
  93.     assert(Loc != nullptr);
  94.     Locs.push_back(std::move(Loc));
  95.     return *cast<T>(Locs.back().get());
  96.   }
  97.  
  98.   /// Takes ownership of `Val` and returns a reference to it.
  99.   ///
  100.   /// Requirements:
  101.   ///
  102.   ///  `Val` must not be null.
  103.   template <typename T>
  104.   std::enable_if_t<std::is_base_of<Value, T>::value, T &>
  105.   takeOwnership(std::unique_ptr<T> Val) {
  106.     assert(Val != nullptr);
  107.     Vals.push_back(std::move(Val));
  108.     return *cast<T>(Vals.back().get());
  109.   }
  110.  
  111.   /// Returns a new storage location appropriate for `Type`.
  112.   ///
  113.   /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
  114.   StorageLocation &createStorageLocation(QualType Type);
  115.  
  116.   /// Returns a stable storage location for `D`.
  117.   StorageLocation &getStableStorageLocation(const VarDecl &D);
  118.  
  119.   /// Returns a stable storage location for `E`.
  120.   StorageLocation &getStableStorageLocation(const Expr &E);
  121.  
  122.   /// Assigns `Loc` as the storage location of `D`.
  123.   ///
  124.   /// Requirements:
  125.   ///
  126.   ///  `D` must not be assigned a storage location.
  127.   void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
  128.     assert(DeclToLoc.find(&D) == DeclToLoc.end());
  129.     DeclToLoc[&D] = &Loc;
  130.   }
  131.  
  132.   /// Returns the storage location assigned to `D` or null if `D` has no
  133.   /// assigned storage location.
  134.   StorageLocation *getStorageLocation(const ValueDecl &D) const {
  135.     auto It = DeclToLoc.find(&D);
  136.     return It == DeclToLoc.end() ? nullptr : It->second;
  137.   }
  138.  
  139.   /// Assigns `Loc` as the storage location of `E`.
  140.   ///
  141.   /// Requirements:
  142.   ///
  143.   ///  `E` must not be assigned a storage location.
  144.   void setStorageLocation(const Expr &E, StorageLocation &Loc) {
  145.     const Expr &CanonE = ignoreCFGOmittedNodes(E);
  146.     assert(ExprToLoc.find(&CanonE) == ExprToLoc.end());
  147.     ExprToLoc[&CanonE] = &Loc;
  148.   }
  149.  
  150.   /// Returns the storage location assigned to `E` or null if `E` has no
  151.   /// assigned storage location.
  152.   StorageLocation *getStorageLocation(const Expr &E) const {
  153.     auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
  154.     return It == ExprToLoc.end() ? nullptr : It->second;
  155.   }
  156.  
  157.   /// Returns a pointer value that represents a null pointer. Calls with
  158.   /// `PointeeType` that are canonically equivalent will return the same result.
  159.   /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
  160.   PointerValue &getOrCreateNullPointerValue(QualType PointeeType);
  161.  
  162.   /// Returns a symbolic boolean value that models a boolean literal equal to
  163.   /// `Value`.
  164.   AtomicBoolValue &getBoolLiteralValue(bool Value) const {
  165.     return Value ? TrueVal : FalseVal;
  166.   }
  167.  
  168.   /// Creates an atomic boolean value.
  169.   AtomicBoolValue &createAtomicBoolValue() {
  170.     return takeOwnership(std::make_unique<AtomicBoolValue>());
  171.   }
  172.  
  173.   /// Creates a Top value for booleans. Each instance is unique and can be
  174.   /// assigned a distinct truth value during solving.
  175.   ///
  176.   /// FIXME: `Top iff Top` is true when both Tops are identical (by pointer
  177.   /// equality), but not when they are distinct values. We should improve the
  178.   /// implementation so that `Top iff Top` has a consistent meaning, regardless
  179.   /// of the identity of `Top`. Moreover, I think the meaning should be
  180.   /// `false`.
  181.   TopBoolValue &createTopBoolValue() {
  182.     return takeOwnership(std::make_unique<TopBoolValue>());
  183.   }
  184.  
  185.   /// Returns a boolean value that represents the conjunction of `LHS` and
  186.   /// `RHS`. Subsequent calls with the same arguments, regardless of their
  187.   /// order, will return the same result. If the given boolean values represent
  188.   /// the same value, the result will be the value itself.
  189.   BoolValue &getOrCreateConjunction(BoolValue &LHS, BoolValue &RHS);
  190.  
  191.   /// Returns a boolean value that represents the disjunction of `LHS` and
  192.   /// `RHS`. Subsequent calls with the same arguments, regardless of their
  193.   /// order, will return the same result. If the given boolean values represent
  194.   /// the same value, the result will be the value itself.
  195.   BoolValue &getOrCreateDisjunction(BoolValue &LHS, BoolValue &RHS);
  196.  
  197.   /// Returns a boolean value that represents the negation of `Val`. Subsequent
  198.   /// calls with the same argument will return the same result.
  199.   BoolValue &getOrCreateNegation(BoolValue &Val);
  200.  
  201.   /// Returns a boolean value that represents `LHS => RHS`. Subsequent calls
  202.   /// with the same arguments, will return the same result. If the given boolean
  203.   /// values represent the same value, the result will be a value that
  204.   /// represents the true boolean literal.
  205.   BoolValue &getOrCreateImplication(BoolValue &LHS, BoolValue &RHS);
  206.  
  207.   /// Returns a boolean value that represents `LHS <=> RHS`. Subsequent calls
  208.   /// with the same arguments, regardless of their order, will return the same
  209.   /// result. If the given boolean values represent the same value, the result
  210.   /// will be a value that represents the true boolean literal.
  211.   BoolValue &getOrCreateIff(BoolValue &LHS, BoolValue &RHS);
  212.  
  213.   /// Creates a fresh flow condition and returns a token that identifies it. The
  214.   /// token can be used to perform various operations on the flow condition such
  215.   /// as adding constraints to it, forking it, joining it with another flow
  216.   /// condition, or checking implications.
  217.   AtomicBoolValue &makeFlowConditionToken();
  218.  
  219.   /// Adds `Constraint` to the flow condition identified by `Token`.
  220.   void addFlowConditionConstraint(AtomicBoolValue &Token,
  221.                                   BoolValue &Constraint);
  222.  
  223.   /// Creates a new flow condition with the same constraints as the flow
  224.   /// condition identified by `Token` and returns its token.
  225.   AtomicBoolValue &forkFlowCondition(AtomicBoolValue &Token);
  226.  
  227.   /// Creates a new flow condition that represents the disjunction of the flow
  228.   /// conditions identified by `FirstToken` and `SecondToken`, and returns its
  229.   /// token.
  230.   AtomicBoolValue &joinFlowConditions(AtomicBoolValue &FirstToken,
  231.                                       AtomicBoolValue &SecondToken);
  232.  
  233.   // FIXME: This function returns the flow condition expressed directly as its
  234.   // constraints: (C1 AND C2 AND ...). This differs from the general approach in
  235.   // the framework where a flow condition is represented as a token (an atomic
  236.   // boolean) with dependencies and constraints tracked in `FlowConditionDeps`
  237.   // and `FlowConditionConstraints`: (FC <=> C1 AND C2 AND ...).
  238.   // Consider if we should make the representation of flow condition consistent,
  239.   // returning an atomic boolean token with separate constraints instead.
  240.   //
  241.   /// Builds and returns the logical formula defining the flow condition
  242.   /// identified by `Token`. If a value in the formula is present as a key in
  243.   /// `Substitutions`, it will be substituted with the value it maps to.
  244.   /// As an example, say we have flow condition tokens FC1, FC2, FC3 and
  245.   /// FlowConditionConstraints: { FC1: C1,
  246.   ///                             FC2: C2,
  247.   ///                             FC3: (FC1 v FC2) ^ C3 }
  248.   /// buildAndSubstituteFlowCondition(FC3, {{C1 -> C1'}}) will return a value
  249.   /// corresponding to (C1' v C2) ^ C3.
  250.   BoolValue &buildAndSubstituteFlowCondition(
  251.       AtomicBoolValue &Token,
  252.       llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions);
  253.  
  254.   /// Returns true if and only if the constraints of the flow condition
  255.   /// identified by `Token` imply that `Val` is true.
  256.   bool flowConditionImplies(AtomicBoolValue &Token, BoolValue &Val);
  257.  
  258.   /// Returns true if and only if the constraints of the flow condition
  259.   /// identified by `Token` are always true.
  260.   bool flowConditionIsTautology(AtomicBoolValue &Token);
  261.  
  262.   /// Returns true if `Val1` is equivalent to `Val2`.
  263.   /// Note: This function doesn't take into account constraints on `Val1` and
  264.   /// `Val2` imposed by the flow condition.
  265.   bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2);
  266.  
  267.   LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token);
  268.  
  269.   /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise,
  270.   /// returns null.
  271.   const ControlFlowContext *getControlFlowContext(const FunctionDecl *F);
  272.  
  273.   const Options &getOptions() { return Opts; }
  274.  
  275. private:
  276.   friend class Environment;
  277.  
  278.   struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
  279.     static QualType getEmptyKey() {
  280.       // Allow a NULL `QualType` by using a different value as the empty key.
  281.       return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
  282.     }
  283.  
  284.     using DenseMapInfo::getHashValue;
  285.     using DenseMapInfo::getTombstoneKey;
  286.     using DenseMapInfo::isEqual;
  287.   };
  288.  
  289.   // Extends the set of modeled field declarations.
  290.   void addModeledFields(const llvm::DenseSet<const FieldDecl *> &Fields);
  291.  
  292.   /// Returns the fields of `Type`, limited to the set of fields modeled by this
  293.   /// context.
  294.   llvm::DenseSet<const FieldDecl *> getReferencedFields(QualType Type);
  295.  
  296.   /// Adds all constraints of the flow condition identified by `Token` and all
  297.   /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used
  298.   /// to track tokens of flow conditions that were already visited by recursive
  299.   /// calls.
  300.   void addTransitiveFlowConditionConstraints(
  301.       AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints,
  302.       llvm::DenseSet<AtomicBoolValue *> &VisitedTokens);
  303.  
  304.   /// Returns the outcome of satisfiability checking on `Constraints`.
  305.   /// Possible outcomes are:
  306.   /// - `Satisfiable`: A satisfying assignment exists and is returned.
  307.   /// - `Unsatisfiable`: A satisfying assignment does not exist.
  308.   /// - `TimedOut`: The search for a satisfying assignment was not completed.
  309.   Solver::Result querySolver(llvm::DenseSet<BoolValue *> Constraints);
  310.  
  311.   /// Returns true if the solver is able to prove that there is no satisfying
  312.   /// assignment for `Constraints`
  313.   bool isUnsatisfiable(llvm::DenseSet<BoolValue *> Constraints) {
  314.     return querySolver(std::move(Constraints)).getStatus() ==
  315.            Solver::Result::Status::Unsatisfiable;
  316.   }
  317.  
  318.   /// Returns a boolean value as a result of substituting `Val` and its sub
  319.   /// values based on entries in `SubstitutionsCache`. Intermediate results are
  320.   /// stored in `SubstitutionsCache` to avoid reprocessing values that have
  321.   /// already been visited.
  322.   BoolValue &substituteBoolValue(
  323.       BoolValue &Val,
  324.       llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache);
  325.  
  326.   /// Builds and returns the logical formula defining the flow condition
  327.   /// identified by `Token`, sub values may be substituted based on entries in
  328.   /// `SubstitutionsCache`. Intermediate results are stored in
  329.   /// `SubstitutionsCache` to avoid reprocessing values that have already been
  330.   /// visited.
  331.   BoolValue &buildAndSubstituteFlowConditionWithCache(
  332.       AtomicBoolValue &Token,
  333.       llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache);
  334.  
  335.   std::unique_ptr<Solver> S;
  336.  
  337.   // Storage for the state of a program.
  338.   std::vector<std::unique_ptr<StorageLocation>> Locs;
  339.   std::vector<std::unique_ptr<Value>> Vals;
  340.  
  341.   // Maps from program declarations and statements to storage locations that are
  342.   // assigned to them. These assignments are global (aggregated across all basic
  343.   // blocks) and are used to produce stable storage locations when the same
  344.   // basic blocks are evaluated multiple times. The storage locations that are
  345.   // in scope for a particular basic block are stored in `Environment`.
  346.   llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
  347.   llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
  348.  
  349.   // Null pointer values, keyed by the canonical pointee type.
  350.   //
  351.   // FIXME: The pointer values are indexed by the pointee types which are
  352.   // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
  353.   // creating a type-independent `NullPointerValue` without a `PointeeLoc`
  354.   // field.
  355.   llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
  356.       NullPointerVals;
  357.  
  358.   AtomicBoolValue &TrueVal;
  359.   AtomicBoolValue &FalseVal;
  360.  
  361.   Options Opts;
  362.  
  363.   // Indices that are used to avoid recreating the same composite boolean
  364.   // values.
  365.   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ConjunctionValue *>
  366.       ConjunctionVals;
  367.   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, DisjunctionValue *>
  368.       DisjunctionVals;
  369.   llvm::DenseMap<BoolValue *, NegationValue *> NegationVals;
  370.   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ImplicationValue *>
  371.       ImplicationVals;
  372.   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, BiconditionalValue *>
  373.       BiconditionalVals;
  374.  
  375.   // Flow conditions are tracked symbolically: each unique flow condition is
  376.   // associated with a fresh symbolic variable (token), bound to the clause that
  377.   // defines the flow condition. Conceptually, each binding corresponds to an
  378.   // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
  379.   // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
  380.   // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
  381.   // the `FlowConditionConstraints` map, keyed by the token of the flow
  382.   // condition.
  383.   //
  384.   // Flow conditions depend on other flow conditions if they are created using
  385.   // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
  386.   // dependencies is stored in the `FlowConditionDeps` map.
  387.   llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<AtomicBoolValue *>>
  388.       FlowConditionDeps;
  389.   llvm::DenseMap<AtomicBoolValue *, BoolValue *> FlowConditionConstraints;
  390.  
  391.   llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts;
  392.  
  393.   // Fields modeled by environments covered by this context.
  394.   llvm::DenseSet<const FieldDecl *> ModeledFields;
  395. };
  396.  
  397. } // namespace dataflow
  398. } // namespace clang
  399.  
  400. #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
  401.