Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // This file defines a DataflowAnalysisContext class that owns objects that |
||
| 10 | // encompass the state of a program and stores context that is used during |
||
| 11 | // dataflow analysis. |
||
| 12 | // |
||
| 13 | //===----------------------------------------------------------------------===// |
||
| 14 | |||
| 15 | #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H |
||
| 16 | #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H |
||
| 17 | |||
| 18 | #include "clang/AST/Decl.h" |
||
| 19 | #include "clang/AST/Expr.h" |
||
| 20 | #include "clang/AST/TypeOrdering.h" |
||
| 21 | #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" |
||
| 22 | #include "clang/Analysis/FlowSensitive/Solver.h" |
||
| 23 | #include "clang/Analysis/FlowSensitive/StorageLocation.h" |
||
| 24 | #include "clang/Analysis/FlowSensitive/Value.h" |
||
| 25 | #include "llvm/ADT/DenseMap.h" |
||
| 26 | #include "llvm/ADT/DenseSet.h" |
||
| 27 | #include "llvm/Support/Compiler.h" |
||
| 28 | #include <cassert> |
||
| 29 | #include <memory> |
||
| 30 | #include <optional> |
||
| 31 | #include <type_traits> |
||
| 32 | #include <utility> |
||
| 33 | #include <vector> |
||
| 34 | |||
| 35 | namespace clang { |
||
| 36 | namespace dataflow { |
||
| 37 | |||
| 38 | /// Skip past nodes that the CFG does not emit. These nodes are invisible to |
||
| 39 | /// flow-sensitive analysis, and should be ignored as they will effectively not |
||
| 40 | /// exist. |
||
| 41 | /// |
||
| 42 | /// * `ParenExpr` - The CFG takes the operator precedence into account, but |
||
| 43 | /// otherwise omits the node afterwards. |
||
| 44 | /// |
||
| 45 | /// * `ExprWithCleanups` - The CFG will generate the appropriate calls to |
||
| 46 | /// destructors and then omit the node. |
||
| 47 | /// |
||
| 48 | const Expr &ignoreCFGOmittedNodes(const Expr &E); |
||
| 49 | const Stmt &ignoreCFGOmittedNodes(const Stmt &S); |
||
| 50 | |||
| 51 | /// Returns the set of all fields in the type. |
||
| 52 | llvm::DenseSet<const FieldDecl *> getObjectFields(QualType Type); |
||
| 53 | |||
| 54 | struct ContextSensitiveOptions { |
||
| 55 | /// The maximum depth to analyze. A value of zero is equivalent to disabling |
||
| 56 | /// context-sensitive analysis entirely. |
||
| 57 | unsigned Depth = 2; |
||
| 58 | }; |
||
| 59 | |||
| 60 | /// Owns objects that encompass the state of a program and stores context that |
||
| 61 | /// is used during dataflow analysis. |
||
| 62 | class DataflowAnalysisContext { |
||
| 63 | public: |
||
| 64 | struct Options { |
||
| 65 | /// Options for analyzing function bodies when present in the translation |
||
| 66 | /// unit, or empty to disable context-sensitive analysis. Note that this is |
||
| 67 | /// fundamentally limited: some constructs, such as recursion, are |
||
| 68 | /// explicitly unsupported. |
||
| 69 | std::optional<ContextSensitiveOptions> ContextSensitiveOpts; |
||
| 70 | }; |
||
| 71 | |||
| 72 | /// Constructs a dataflow analysis context. |
||
| 73 | /// |
||
| 74 | /// Requirements: |
||
| 75 | /// |
||
| 76 | /// `S` must not be null. |
||
| 77 | DataflowAnalysisContext(std::unique_ptr<Solver> S, |
||
| 78 | Options Opts = Options{ |
||
| 79 | /*ContextSensitiveOpts=*/std::nullopt}) |
||
| 80 | : S(std::move(S)), TrueVal(createAtomicBoolValue()), |
||
| 81 | FalseVal(createAtomicBoolValue()), Opts(Opts) { |
||
| 82 | assert(this->S != nullptr); |
||
| 83 | } |
||
| 84 | |||
| 85 | /// Takes ownership of `Loc` and returns a reference to it. |
||
| 86 | /// |
||
| 87 | /// Requirements: |
||
| 88 | /// |
||
| 89 | /// `Loc` must not be null. |
||
| 90 | template <typename T> |
||
| 91 | std::enable_if_t<std::is_base_of<StorageLocation, T>::value, T &> |
||
| 92 | takeOwnership(std::unique_ptr<T> Loc) { |
||
| 93 | assert(Loc != nullptr); |
||
| 94 | Locs.push_back(std::move(Loc)); |
||
| 95 | return *cast<T>(Locs.back().get()); |
||
| 96 | } |
||
| 97 | |||
| 98 | /// Takes ownership of `Val` and returns a reference to it. |
||
| 99 | /// |
||
| 100 | /// Requirements: |
||
| 101 | /// |
||
| 102 | /// `Val` must not be null. |
||
| 103 | template <typename T> |
||
| 104 | std::enable_if_t<std::is_base_of<Value, T>::value, T &> |
||
| 105 | takeOwnership(std::unique_ptr<T> Val) { |
||
| 106 | assert(Val != nullptr); |
||
| 107 | Vals.push_back(std::move(Val)); |
||
| 108 | return *cast<T>(Vals.back().get()); |
||
| 109 | } |
||
| 110 | |||
| 111 | /// Returns a new storage location appropriate for `Type`. |
||
| 112 | /// |
||
| 113 | /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. |
||
| 114 | StorageLocation &createStorageLocation(QualType Type); |
||
| 115 | |||
| 116 | /// Returns a stable storage location for `D`. |
||
| 117 | StorageLocation &getStableStorageLocation(const VarDecl &D); |
||
| 118 | |||
| 119 | /// Returns a stable storage location for `E`. |
||
| 120 | StorageLocation &getStableStorageLocation(const Expr &E); |
||
| 121 | |||
| 122 | /// Assigns `Loc` as the storage location of `D`. |
||
| 123 | /// |
||
| 124 | /// Requirements: |
||
| 125 | /// |
||
| 126 | /// `D` must not be assigned a storage location. |
||
| 127 | void setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { |
||
| 128 | assert(DeclToLoc.find(&D) == DeclToLoc.end()); |
||
| 129 | DeclToLoc[&D] = &Loc; |
||
| 130 | } |
||
| 131 | |||
| 132 | /// Returns the storage location assigned to `D` or null if `D` has no |
||
| 133 | /// assigned storage location. |
||
| 134 | StorageLocation *getStorageLocation(const ValueDecl &D) const { |
||
| 135 | auto It = DeclToLoc.find(&D); |
||
| 136 | return It == DeclToLoc.end() ? nullptr : It->second; |
||
| 137 | } |
||
| 138 | |||
| 139 | /// Assigns `Loc` as the storage location of `E`. |
||
| 140 | /// |
||
| 141 | /// Requirements: |
||
| 142 | /// |
||
| 143 | /// `E` must not be assigned a storage location. |
||
| 144 | void setStorageLocation(const Expr &E, StorageLocation &Loc) { |
||
| 145 | const Expr &CanonE = ignoreCFGOmittedNodes(E); |
||
| 146 | assert(ExprToLoc.find(&CanonE) == ExprToLoc.end()); |
||
| 147 | ExprToLoc[&CanonE] = &Loc; |
||
| 148 | } |
||
| 149 | |||
| 150 | /// Returns the storage location assigned to `E` or null if `E` has no |
||
| 151 | /// assigned storage location. |
||
| 152 | StorageLocation *getStorageLocation(const Expr &E) const { |
||
| 153 | auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); |
||
| 154 | return It == ExprToLoc.end() ? nullptr : It->second; |
||
| 155 | } |
||
| 156 | |||
| 157 | /// Returns a pointer value that represents a null pointer. Calls with |
||
| 158 | /// `PointeeType` that are canonically equivalent will return the same result. |
||
| 159 | /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. |
||
| 160 | PointerValue &getOrCreateNullPointerValue(QualType PointeeType); |
||
| 161 | |||
| 162 | /// Returns a symbolic boolean value that models a boolean literal equal to |
||
| 163 | /// `Value`. |
||
| 164 | AtomicBoolValue &getBoolLiteralValue(bool Value) const { |
||
| 165 | return Value ? TrueVal : FalseVal; |
||
| 166 | } |
||
| 167 | |||
| 168 | /// Creates an atomic boolean value. |
||
| 169 | AtomicBoolValue &createAtomicBoolValue() { |
||
| 170 | return takeOwnership(std::make_unique<AtomicBoolValue>()); |
||
| 171 | } |
||
| 172 | |||
| 173 | /// Creates a Top value for booleans. Each instance is unique and can be |
||
| 174 | /// assigned a distinct truth value during solving. |
||
| 175 | /// |
||
| 176 | /// FIXME: `Top iff Top` is true when both Tops are identical (by pointer |
||
| 177 | /// equality), but not when they are distinct values. We should improve the |
||
| 178 | /// implementation so that `Top iff Top` has a consistent meaning, regardless |
||
| 179 | /// of the identity of `Top`. Moreover, I think the meaning should be |
||
| 180 | /// `false`. |
||
| 181 | TopBoolValue &createTopBoolValue() { |
||
| 182 | return takeOwnership(std::make_unique<TopBoolValue>()); |
||
| 183 | } |
||
| 184 | |||
| 185 | /// Returns a boolean value that represents the conjunction of `LHS` and |
||
| 186 | /// `RHS`. Subsequent calls with the same arguments, regardless of their |
||
| 187 | /// order, will return the same result. If the given boolean values represent |
||
| 188 | /// the same value, the result will be the value itself. |
||
| 189 | BoolValue &getOrCreateConjunction(BoolValue &LHS, BoolValue &RHS); |
||
| 190 | |||
| 191 | /// Returns a boolean value that represents the disjunction of `LHS` and |
||
| 192 | /// `RHS`. Subsequent calls with the same arguments, regardless of their |
||
| 193 | /// order, will return the same result. If the given boolean values represent |
||
| 194 | /// the same value, the result will be the value itself. |
||
| 195 | BoolValue &getOrCreateDisjunction(BoolValue &LHS, BoolValue &RHS); |
||
| 196 | |||
| 197 | /// Returns a boolean value that represents the negation of `Val`. Subsequent |
||
| 198 | /// calls with the same argument will return the same result. |
||
| 199 | BoolValue &getOrCreateNegation(BoolValue &Val); |
||
| 200 | |||
| 201 | /// Returns a boolean value that represents `LHS => RHS`. Subsequent calls |
||
| 202 | /// with the same arguments, will return the same result. If the given boolean |
||
| 203 | /// values represent the same value, the result will be a value that |
||
| 204 | /// represents the true boolean literal. |
||
| 205 | BoolValue &getOrCreateImplication(BoolValue &LHS, BoolValue &RHS); |
||
| 206 | |||
| 207 | /// Returns a boolean value that represents `LHS <=> RHS`. Subsequent calls |
||
| 208 | /// with the same arguments, regardless of their order, will return the same |
||
| 209 | /// result. If the given boolean values represent the same value, the result |
||
| 210 | /// will be a value that represents the true boolean literal. |
||
| 211 | BoolValue &getOrCreateIff(BoolValue &LHS, BoolValue &RHS); |
||
| 212 | |||
| 213 | /// Creates a fresh flow condition and returns a token that identifies it. The |
||
| 214 | /// token can be used to perform various operations on the flow condition such |
||
| 215 | /// as adding constraints to it, forking it, joining it with another flow |
||
| 216 | /// condition, or checking implications. |
||
| 217 | AtomicBoolValue &makeFlowConditionToken(); |
||
| 218 | |||
| 219 | /// Adds `Constraint` to the flow condition identified by `Token`. |
||
| 220 | void addFlowConditionConstraint(AtomicBoolValue &Token, |
||
| 221 | BoolValue &Constraint); |
||
| 222 | |||
| 223 | /// Creates a new flow condition with the same constraints as the flow |
||
| 224 | /// condition identified by `Token` and returns its token. |
||
| 225 | AtomicBoolValue &forkFlowCondition(AtomicBoolValue &Token); |
||
| 226 | |||
| 227 | /// Creates a new flow condition that represents the disjunction of the flow |
||
| 228 | /// conditions identified by `FirstToken` and `SecondToken`, and returns its |
||
| 229 | /// token. |
||
| 230 | AtomicBoolValue &joinFlowConditions(AtomicBoolValue &FirstToken, |
||
| 231 | AtomicBoolValue &SecondToken); |
||
| 232 | |||
| 233 | // FIXME: This function returns the flow condition expressed directly as its |
||
| 234 | // constraints: (C1 AND C2 AND ...). This differs from the general approach in |
||
| 235 | // the framework where a flow condition is represented as a token (an atomic |
||
| 236 | // boolean) with dependencies and constraints tracked in `FlowConditionDeps` |
||
| 237 | // and `FlowConditionConstraints`: (FC <=> C1 AND C2 AND ...). |
||
| 238 | // Consider if we should make the representation of flow condition consistent, |
||
| 239 | // returning an atomic boolean token with separate constraints instead. |
||
| 240 | // |
||
| 241 | /// Builds and returns the logical formula defining the flow condition |
||
| 242 | /// identified by `Token`. If a value in the formula is present as a key in |
||
| 243 | /// `Substitutions`, it will be substituted with the value it maps to. |
||
| 244 | /// As an example, say we have flow condition tokens FC1, FC2, FC3 and |
||
| 245 | /// FlowConditionConstraints: { FC1: C1, |
||
| 246 | /// FC2: C2, |
||
| 247 | /// FC3: (FC1 v FC2) ^ C3 } |
||
| 248 | /// buildAndSubstituteFlowCondition(FC3, {{C1 -> C1'}}) will return a value |
||
| 249 | /// corresponding to (C1' v C2) ^ C3. |
||
| 250 | BoolValue &buildAndSubstituteFlowCondition( |
||
| 251 | AtomicBoolValue &Token, |
||
| 252 | llvm::DenseMap<AtomicBoolValue *, BoolValue *> Substitutions); |
||
| 253 | |||
| 254 | /// Returns true if and only if the constraints of the flow condition |
||
| 255 | /// identified by `Token` imply that `Val` is true. |
||
| 256 | bool flowConditionImplies(AtomicBoolValue &Token, BoolValue &Val); |
||
| 257 | |||
| 258 | /// Returns true if and only if the constraints of the flow condition |
||
| 259 | /// identified by `Token` are always true. |
||
| 260 | bool flowConditionIsTautology(AtomicBoolValue &Token); |
||
| 261 | |||
| 262 | /// Returns true if `Val1` is equivalent to `Val2`. |
||
| 263 | /// Note: This function doesn't take into account constraints on `Val1` and |
||
| 264 | /// `Val2` imposed by the flow condition. |
||
| 265 | bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2); |
||
| 266 | |||
| 267 | LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token); |
||
| 268 | |||
| 269 | /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise, |
||
| 270 | /// returns null. |
||
| 271 | const ControlFlowContext *getControlFlowContext(const FunctionDecl *F); |
||
| 272 | |||
| 273 | const Options &getOptions() { return Opts; } |
||
| 274 | |||
| 275 | private: |
||
| 276 | friend class Environment; |
||
| 277 | |||
| 278 | struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { |
||
| 279 | static QualType getEmptyKey() { |
||
| 280 | // Allow a NULL `QualType` by using a different value as the empty key. |
||
| 281 | return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); |
||
| 282 | } |
||
| 283 | |||
| 284 | using DenseMapInfo::getHashValue; |
||
| 285 | using DenseMapInfo::getTombstoneKey; |
||
| 286 | using DenseMapInfo::isEqual; |
||
| 287 | }; |
||
| 288 | |||
| 289 | // Extends the set of modeled field declarations. |
||
| 290 | void addModeledFields(const llvm::DenseSet<const FieldDecl *> &Fields); |
||
| 291 | |||
| 292 | /// Returns the fields of `Type`, limited to the set of fields modeled by this |
||
| 293 | /// context. |
||
| 294 | llvm::DenseSet<const FieldDecl *> getReferencedFields(QualType Type); |
||
| 295 | |||
| 296 | /// Adds all constraints of the flow condition identified by `Token` and all |
||
| 297 | /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used |
||
| 298 | /// to track tokens of flow conditions that were already visited by recursive |
||
| 299 | /// calls. |
||
| 300 | void addTransitiveFlowConditionConstraints( |
||
| 301 | AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints, |
||
| 302 | llvm::DenseSet<AtomicBoolValue *> &VisitedTokens); |
||
| 303 | |||
| 304 | /// Returns the outcome of satisfiability checking on `Constraints`. |
||
| 305 | /// Possible outcomes are: |
||
| 306 | /// - `Satisfiable`: A satisfying assignment exists and is returned. |
||
| 307 | /// - `Unsatisfiable`: A satisfying assignment does not exist. |
||
| 308 | /// - `TimedOut`: The search for a satisfying assignment was not completed. |
||
| 309 | Solver::Result querySolver(llvm::DenseSet<BoolValue *> Constraints); |
||
| 310 | |||
| 311 | /// Returns true if the solver is able to prove that there is no satisfying |
||
| 312 | /// assignment for `Constraints` |
||
| 313 | bool isUnsatisfiable(llvm::DenseSet<BoolValue *> Constraints) { |
||
| 314 | return querySolver(std::move(Constraints)).getStatus() == |
||
| 315 | Solver::Result::Status::Unsatisfiable; |
||
| 316 | } |
||
| 317 | |||
| 318 | /// Returns a boolean value as a result of substituting `Val` and its sub |
||
| 319 | /// values based on entries in `SubstitutionsCache`. Intermediate results are |
||
| 320 | /// stored in `SubstitutionsCache` to avoid reprocessing values that have |
||
| 321 | /// already been visited. |
||
| 322 | BoolValue &substituteBoolValue( |
||
| 323 | BoolValue &Val, |
||
| 324 | llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache); |
||
| 325 | |||
| 326 | /// Builds and returns the logical formula defining the flow condition |
||
| 327 | /// identified by `Token`, sub values may be substituted based on entries in |
||
| 328 | /// `SubstitutionsCache`. Intermediate results are stored in |
||
| 329 | /// `SubstitutionsCache` to avoid reprocessing values that have already been |
||
| 330 | /// visited. |
||
| 331 | BoolValue &buildAndSubstituteFlowConditionWithCache( |
||
| 332 | AtomicBoolValue &Token, |
||
| 333 | llvm::DenseMap<BoolValue *, BoolValue *> &SubstitutionsCache); |
||
| 334 | |||
| 335 | std::unique_ptr<Solver> S; |
||
| 336 | |||
| 337 | // Storage for the state of a program. |
||
| 338 | std::vector<std::unique_ptr<StorageLocation>> Locs; |
||
| 339 | std::vector<std::unique_ptr<Value>> Vals; |
||
| 340 | |||
| 341 | // Maps from program declarations and statements to storage locations that are |
||
| 342 | // assigned to them. These assignments are global (aggregated across all basic |
||
| 343 | // blocks) and are used to produce stable storage locations when the same |
||
| 344 | // basic blocks are evaluated multiple times. The storage locations that are |
||
| 345 | // in scope for a particular basic block are stored in `Environment`. |
||
| 346 | llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; |
||
| 347 | llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; |
||
| 348 | |||
| 349 | // Null pointer values, keyed by the canonical pointee type. |
||
| 350 | // |
||
| 351 | // FIXME: The pointer values are indexed by the pointee types which are |
||
| 352 | // required to initialize the `PointeeLoc` field in `PointerValue`. Consider |
||
| 353 | // creating a type-independent `NullPointerValue` without a `PointeeLoc` |
||
| 354 | // field. |
||
| 355 | llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> |
||
| 356 | NullPointerVals; |
||
| 357 | |||
| 358 | AtomicBoolValue &TrueVal; |
||
| 359 | AtomicBoolValue &FalseVal; |
||
| 360 | |||
| 361 | Options Opts; |
||
| 362 | |||
| 363 | // Indices that are used to avoid recreating the same composite boolean |
||
| 364 | // values. |
||
| 365 | llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ConjunctionValue *> |
||
| 366 | ConjunctionVals; |
||
| 367 | llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, DisjunctionValue *> |
||
| 368 | DisjunctionVals; |
||
| 369 | llvm::DenseMap<BoolValue *, NegationValue *> NegationVals; |
||
| 370 | llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, ImplicationValue *> |
||
| 371 | ImplicationVals; |
||
| 372 | llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, BiconditionalValue *> |
||
| 373 | BiconditionalVals; |
||
| 374 | |||
| 375 | // Flow conditions are tracked symbolically: each unique flow condition is |
||
| 376 | // associated with a fresh symbolic variable (token), bound to the clause that |
||
| 377 | // defines the flow condition. Conceptually, each binding corresponds to an |
||
| 378 | // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition |
||
| 379 | // token (an atomic boolean) and `Ci`s are the set of constraints in the flow |
||
| 380 | // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in |
||
| 381 | // the `FlowConditionConstraints` map, keyed by the token of the flow |
||
| 382 | // condition. |
||
| 383 | // |
||
| 384 | // Flow conditions depend on other flow conditions if they are created using |
||
| 385 | // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition |
||
| 386 | // dependencies is stored in the `FlowConditionDeps` map. |
||
| 387 | llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<AtomicBoolValue *>> |
||
| 388 | FlowConditionDeps; |
||
| 389 | llvm::DenseMap<AtomicBoolValue *, BoolValue *> FlowConditionConstraints; |
||
| 390 | |||
| 391 | llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts; |
||
| 392 | |||
| 393 | // Fields modeled by environments covered by this context. |
||
| 394 | llvm::DenseSet<const FieldDecl *> ModeledFields; |
||
| 395 | }; |
||
| 396 | |||
| 397 | } // namespace dataflow |
||
| 398 | } // namespace clang |
||
| 399 | |||
| 400 | #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H |