Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | |||
| 9 | #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |
||
| 10 | #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |
||
| 11 | |||
| 12 | #include "clang/Basic/LLVM.h" |
||
| 13 | #include "clang/Lex/DependencyDirectivesScanner.h" |
||
| 14 | #include "llvm/ADT/DenseSet.h" |
||
| 15 | #include "llvm/ADT/StringMap.h" |
||
| 16 | #include "llvm/Support/Allocator.h" |
||
| 17 | #include "llvm/Support/ErrorOr.h" |
||
| 18 | #include "llvm/Support/VirtualFileSystem.h" |
||
| 19 | #include <mutex> |
||
| 20 | #include <optional> |
||
| 21 | |||
| 22 | namespace clang { |
||
| 23 | namespace tooling { |
||
| 24 | namespace dependencies { |
||
| 25 | |||
| 26 | using DependencyDirectivesTy = |
||
| 27 | SmallVector<dependency_directives_scan::Directive, 20>; |
||
| 28 | |||
| 29 | /// Contents and directive tokens of a cached file entry. Single instance can |
||
| 30 | /// be shared between multiple entries. |
||
| 31 | struct CachedFileContents { |
||
| 32 | CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents) |
||
| 33 | : Original(std::move(Contents)), DepDirectives(nullptr) {} |
||
| 34 | |||
| 35 | /// Owning storage for the original contents. |
||
| 36 | std::unique_ptr<llvm::MemoryBuffer> Original; |
||
| 37 | |||
| 38 | /// The mutex that must be locked before mutating directive tokens. |
||
| 39 | std::mutex ValueLock; |
||
| 40 | SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens; |
||
| 41 | /// Accessor to the directive tokens that's atomic to avoid data races. |
||
| 42 | /// \p CachedFileContents has ownership of the pointer. |
||
| 43 | std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives; |
||
| 44 | |||
| 45 | ~CachedFileContents() { delete DepDirectives.load(); } |
||
| 46 | }; |
||
| 47 | |||
| 48 | /// An in-memory representation of a file system entity that is of interest to |
||
| 49 | /// the dependency scanning filesystem. |
||
| 50 | /// |
||
| 51 | /// It represents one of the following: |
||
| 52 | /// - opened file with contents and a stat value, |
||
| 53 | /// - opened file with contents, directive tokens and a stat value, |
||
| 54 | /// - directory entry with its stat value, |
||
| 55 | /// - filesystem error. |
||
| 56 | /// |
||
| 57 | /// Single instance of this class can be shared across different filenames (e.g. |
||
| 58 | /// a regular file and a symlink). For this reason the status filename is empty |
||
| 59 | /// and is only materialized by \c EntryRef that knows the requested filename. |
||
| 60 | class CachedFileSystemEntry { |
||
| 61 | public: |
||
| 62 | /// Creates an entry without contents: either a filesystem error or |
||
| 63 | /// a directory with stat value. |
||
| 64 | CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat) |
||
| 65 | : MaybeStat(std::move(Stat)), Contents(nullptr) { |
||
| 66 | clearStatName(); |
||
| 67 | } |
||
| 68 | |||
| 69 | /// Creates an entry representing a file with contents. |
||
| 70 | CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat, |
||
| 71 | CachedFileContents *Contents) |
||
| 72 | : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) { |
||
| 73 | clearStatName(); |
||
| 74 | } |
||
| 75 | |||
| 76 | /// \returns True if the entry is a filesystem error. |
||
| 77 | bool isError() const { return !MaybeStat; } |
||
| 78 | |||
| 79 | /// \returns True if the current entry represents a directory. |
||
| 80 | bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } |
||
| 81 | |||
| 82 | /// \returns Original contents of the file. |
||
| 83 | StringRef getOriginalContents() const { |
||
| 84 | assert(!isError() && "error"); |
||
| 85 | assert(!MaybeStat->isDirectory() && "not a file"); |
||
| 86 | assert(Contents && "contents not initialized"); |
||
| 87 | return Contents->Original->getBuffer(); |
||
| 88 | } |
||
| 89 | |||
| 90 | /// \returns The scanned preprocessor directive tokens of the file that are |
||
| 91 | /// used to speed up preprocessing, if available. |
||
| 92 | std::optional<ArrayRef<dependency_directives_scan::Directive>> |
||
| 93 | getDirectiveTokens() const { |
||
| 94 | assert(!isError() && "error"); |
||
| 95 | assert(!isDirectory() && "not a file"); |
||
| 96 | assert(Contents && "contents not initialized"); |
||
| 97 | if (auto *Directives = Contents->DepDirectives.load()) { |
||
| 98 | if (Directives->has_value()) |
||
| 99 | return ArrayRef<dependency_directives_scan::Directive>(**Directives); |
||
| 100 | } |
||
| 101 | return std::nullopt; |
||
| 102 | } |
||
| 103 | |||
| 104 | /// \returns The error. |
||
| 105 | std::error_code getError() const { return MaybeStat.getError(); } |
||
| 106 | |||
| 107 | /// \returns The entry status with empty filename. |
||
| 108 | llvm::vfs::Status getStatus() const { |
||
| 109 | assert(!isError() && "error"); |
||
| 110 | assert(MaybeStat->getName().empty() && "stat name must be empty"); |
||
| 111 | return *MaybeStat; |
||
| 112 | } |
||
| 113 | |||
| 114 | /// \returns The unique ID of the entry. |
||
| 115 | llvm::sys::fs::UniqueID getUniqueID() const { |
||
| 116 | assert(!isError() && "error"); |
||
| 117 | return MaybeStat->getUniqueID(); |
||
| 118 | } |
||
| 119 | |||
| 120 | /// \returns The data structure holding both contents and directive tokens. |
||
| 121 | CachedFileContents *getCachedContents() const { |
||
| 122 | assert(!isError() && "error"); |
||
| 123 | assert(!isDirectory() && "not a file"); |
||
| 124 | return Contents; |
||
| 125 | } |
||
| 126 | |||
| 127 | private: |
||
| 128 | void clearStatName() { |
||
| 129 | if (MaybeStat) |
||
| 130 | MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, ""); |
||
| 131 | } |
||
| 132 | |||
| 133 | /// Either the filesystem error or status of the entry. |
||
| 134 | /// The filename is empty and only materialized by \c EntryRef. |
||
| 135 | llvm::ErrorOr<llvm::vfs::Status> MaybeStat; |
||
| 136 | |||
| 137 | /// Non-owning pointer to the file contents. |
||
| 138 | /// |
||
| 139 | /// We're using pointer here to keep the size of this class small. Instances |
||
| 140 | /// representing directories and filesystem errors don't hold any contents |
||
| 141 | /// anyway. |
||
| 142 | CachedFileContents *Contents; |
||
| 143 | }; |
||
| 144 | |||
| 145 | /// This class is a shared cache, that caches the 'stat' and 'open' calls to the |
||
| 146 | /// underlying real file system, and the scanned preprocessor directives of |
||
| 147 | /// files. |
||
| 148 | /// |
||
| 149 | /// It is sharded based on the hash of the key to reduce the lock contention for |
||
| 150 | /// the worker threads. |
||
| 151 | class DependencyScanningFilesystemSharedCache { |
||
| 152 | public: |
||
| 153 | struct CacheShard { |
||
| 154 | /// The mutex that needs to be locked before mutation of any member. |
||
| 155 | mutable std::mutex CacheLock; |
||
| 156 | |||
| 157 | /// Map from filenames to cached entries. |
||
| 158 | llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> |
||
| 159 | EntriesByFilename; |
||
| 160 | |||
| 161 | /// Map from unique IDs to cached entries. |
||
| 162 | llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *> |
||
| 163 | EntriesByUID; |
||
| 164 | |||
| 165 | /// The backing storage for cached entries. |
||
| 166 | llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage; |
||
| 167 | |||
| 168 | /// The backing storage for cached contents. |
||
| 169 | llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage; |
||
| 170 | |||
| 171 | /// Returns entry associated with the filename or nullptr if none is found. |
||
| 172 | const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; |
||
| 173 | |||
| 174 | /// Returns entry associated with the unique ID or nullptr if none is found. |
||
| 175 | const CachedFileSystemEntry * |
||
| 176 | findEntryByUID(llvm::sys::fs::UniqueID UID) const; |
||
| 177 | |||
| 178 | /// Returns entry associated with the filename if there is some. Otherwise, |
||
| 179 | /// constructs new one with the given status, associates it with the |
||
| 180 | /// filename and returns the result. |
||
| 181 | const CachedFileSystemEntry & |
||
| 182 | getOrEmplaceEntryForFilename(StringRef Filename, |
||
| 183 | llvm::ErrorOr<llvm::vfs::Status> Stat); |
||
| 184 | |||
| 185 | /// Returns entry associated with the unique ID if there is some. Otherwise, |
||
| 186 | /// constructs new one with the given status and contents, associates it |
||
| 187 | /// with the unique ID and returns the result. |
||
| 188 | const CachedFileSystemEntry & |
||
| 189 | getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, |
||
| 190 | std::unique_ptr<llvm::MemoryBuffer> Contents); |
||
| 191 | |||
| 192 | /// Returns entry associated with the filename if there is some. Otherwise, |
||
| 193 | /// associates the given entry with the filename and returns it. |
||
| 194 | const CachedFileSystemEntry & |
||
| 195 | getOrInsertEntryForFilename(StringRef Filename, |
||
| 196 | const CachedFileSystemEntry &Entry); |
||
| 197 | }; |
||
| 198 | |||
| 199 | DependencyScanningFilesystemSharedCache(); |
||
| 200 | |||
| 201 | /// Returns shard for the given key. |
||
| 202 | CacheShard &getShardForFilename(StringRef Filename) const; |
||
| 203 | CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const; |
||
| 204 | |||
| 205 | private: |
||
| 206 | std::unique_ptr<CacheShard[]> CacheShards; |
||
| 207 | unsigned NumShards; |
||
| 208 | }; |
||
| 209 | |||
| 210 | /// This class is a local cache, that caches the 'stat' and 'open' calls to the |
||
| 211 | /// underlying real file system. |
||
| 212 | class DependencyScanningFilesystemLocalCache { |
||
| 213 | llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache; |
||
| 214 | |||
| 215 | public: |
||
| 216 | /// Returns entry associated with the filename or nullptr if none is found. |
||
| 217 | const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { |
||
| 218 | auto It = Cache.find(Filename); |
||
| 219 | return It == Cache.end() ? nullptr : It->getValue(); |
||
| 220 | } |
||
| 221 | |||
| 222 | /// Associates the given entry with the filename and returns the given entry |
||
| 223 | /// pointer (for convenience). |
||
| 224 | const CachedFileSystemEntry & |
||
| 225 | insertEntryForFilename(StringRef Filename, |
||
| 226 | const CachedFileSystemEntry &Entry) { |
||
| 227 | const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second; |
||
| 228 | assert(InsertedEntry == &Entry && "entry already present"); |
||
| 229 | return *InsertedEntry; |
||
| 230 | } |
||
| 231 | }; |
||
| 232 | |||
| 233 | /// Reference to a CachedFileSystemEntry. |
||
| 234 | /// If the underlying entry is an opened file, this wrapper returns the file |
||
| 235 | /// contents and the scanned preprocessor directives. |
||
| 236 | class EntryRef { |
||
| 237 | /// The filename used to access this entry. |
||
| 238 | std::string Filename; |
||
| 239 | |||
| 240 | /// The underlying cached entry. |
||
| 241 | const CachedFileSystemEntry &Entry; |
||
| 242 | |||
| 243 | public: |
||
| 244 | EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) |
||
| 245 | : Filename(Name), Entry(Entry) {} |
||
| 246 | |||
| 247 | llvm::vfs::Status getStatus() const { |
||
| 248 | llvm::vfs::Status Stat = Entry.getStatus(); |
||
| 249 | if (!Stat.isDirectory()) |
||
| 250 | Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size()); |
||
| 251 | return llvm::vfs::Status::copyWithNewName(Stat, Filename); |
||
| 252 | } |
||
| 253 | |||
| 254 | bool isError() const { return Entry.isError(); } |
||
| 255 | bool isDirectory() const { return Entry.isDirectory(); } |
||
| 256 | |||
| 257 | /// If the cached entry represents an error, promotes it into `ErrorOr`. |
||
| 258 | llvm::ErrorOr<EntryRef> unwrapError() const { |
||
| 259 | if (isError()) |
||
| 260 | return Entry.getError(); |
||
| 261 | return *this; |
||
| 262 | } |
||
| 263 | |||
| 264 | StringRef getContents() const { return Entry.getOriginalContents(); } |
||
| 265 | |||
| 266 | std::optional<ArrayRef<dependency_directives_scan::Directive>> |
||
| 267 | getDirectiveTokens() const { |
||
| 268 | return Entry.getDirectiveTokens(); |
||
| 269 | } |
||
| 270 | }; |
||
| 271 | |||
| 272 | /// A virtual file system optimized for the dependency discovery. |
||
| 273 | /// |
||
| 274 | /// It is primarily designed to work with source files whose contents was |
||
| 275 | /// preprocessed to remove any tokens that are unlikely to affect the dependency |
||
| 276 | /// computation. |
||
| 277 | /// |
||
| 278 | /// This is not a thread safe VFS. A single instance is meant to be used only in |
||
| 279 | /// one thread. Multiple instances are allowed to service multiple threads |
||
| 280 | /// running in parallel. |
||
| 281 | class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { |
||
| 282 | public: |
||
| 283 | DependencyScanningWorkerFilesystem( |
||
| 284 | DependencyScanningFilesystemSharedCache &SharedCache, |
||
| 285 | IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) |
||
| 286 | : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {} |
||
| 287 | |||
| 288 | llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override; |
||
| 289 | llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> |
||
| 290 | openFileForRead(const Twine &Path) override; |
||
| 291 | |||
| 292 | /// Returns entry for the given filename. |
||
| 293 | /// |
||
| 294 | /// Attempts to use the local and shared caches first, then falls back to |
||
| 295 | /// using the underlying filesystem. |
||
| 296 | llvm::ErrorOr<EntryRef> |
||
| 297 | getOrCreateFileSystemEntry(StringRef Filename, |
||
| 298 | bool DisableDirectivesScanning = false); |
||
| 299 | |||
| 300 | private: |
||
| 301 | /// Check whether the file should be scanned for preprocessor directives. |
||
| 302 | bool shouldScanForDirectives(StringRef Filename); |
||
| 303 | |||
| 304 | /// For a filename that's not yet associated with any entry in the caches, |
||
| 305 | /// uses the underlying filesystem to either look up the entry based in the |
||
| 306 | /// shared cache indexed by unique ID, or creates new entry from scratch. |
||
| 307 | llvm::ErrorOr<const CachedFileSystemEntry &> |
||
| 308 | computeAndStoreResult(StringRef Filename); |
||
| 309 | |||
| 310 | /// Scan for preprocessor directives for the given entry if necessary and |
||
| 311 | /// returns a wrapper object with reference semantics. |
||
| 312 | EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry, |
||
| 313 | StringRef Filename, bool Disable); |
||
| 314 | |||
| 315 | /// Represents a filesystem entry that has been stat-ed (and potentially read) |
||
| 316 | /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. |
||
| 317 | struct TentativeEntry { |
||
| 318 | llvm::vfs::Status Status; |
||
| 319 | std::unique_ptr<llvm::MemoryBuffer> Contents; |
||
| 320 | |||
| 321 | TentativeEntry(llvm::vfs::Status Status, |
||
| 322 | std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr) |
||
| 323 | : Status(std::move(Status)), Contents(std::move(Contents)) {} |
||
| 324 | }; |
||
| 325 | |||
| 326 | /// Reads file at the given path. Enforces consistency between the file size |
||
| 327 | /// in status and size of read contents. |
||
| 328 | llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename); |
||
| 329 | |||
| 330 | /// Returns entry associated with the unique ID of the given tentative entry |
||
| 331 | /// if there is some in the shared cache. Otherwise, constructs new one, |
||
| 332 | /// associates it with the unique ID and returns the result. |
||
| 333 | const CachedFileSystemEntry & |
||
| 334 | getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); |
||
| 335 | |||
| 336 | /// Returns entry associated with the filename or nullptr if none is found. |
||
| 337 | /// |
||
| 338 | /// Returns entry from local cache if there is some. Otherwise, if the entry |
||
| 339 | /// is found in the shared cache, writes it through the local cache and |
||
| 340 | /// returns it. Otherwise returns nullptr. |
||
| 341 | const CachedFileSystemEntry * |
||
| 342 | findEntryByFilenameWithWriteThrough(StringRef Filename); |
||
| 343 | |||
| 344 | /// Returns entry associated with the unique ID in the shared cache or nullptr |
||
| 345 | /// if none is found. |
||
| 346 | const CachedFileSystemEntry * |
||
| 347 | findSharedEntryByUID(llvm::vfs::Status Stat) const { |
||
| 348 | return SharedCache.getShardForUID(Stat.getUniqueID()) |
||
| 349 | .findEntryByUID(Stat.getUniqueID()); |
||
| 350 | } |
||
| 351 | |||
| 352 | /// Associates the given entry with the filename in the local cache and |
||
| 353 | /// returns it. |
||
| 354 | const CachedFileSystemEntry & |
||
| 355 | insertLocalEntryForFilename(StringRef Filename, |
||
| 356 | const CachedFileSystemEntry &Entry) { |
||
| 357 | return LocalCache.insertEntryForFilename(Filename, Entry); |
||
| 358 | } |
||
| 359 | |||
| 360 | /// Returns entry associated with the filename in the shared cache if there is |
||
| 361 | /// some. Otherwise, constructs new one with the given error code, associates |
||
| 362 | /// it with the filename and returns the result. |
||
| 363 | const CachedFileSystemEntry & |
||
| 364 | getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) { |
||
| 365 | return SharedCache.getShardForFilename(Filename) |
||
| 366 | .getOrEmplaceEntryForFilename(Filename, EC); |
||
| 367 | } |
||
| 368 | |||
| 369 | /// Returns entry associated with the filename in the shared cache if there is |
||
| 370 | /// some. Otherwise, associates the given entry with the filename and returns |
||
| 371 | /// it. |
||
| 372 | const CachedFileSystemEntry & |
||
| 373 | getOrInsertSharedEntryForFilename(StringRef Filename, |
||
| 374 | const CachedFileSystemEntry &Entry) { |
||
| 375 | return SharedCache.getShardForFilename(Filename) |
||
| 376 | .getOrInsertEntryForFilename(Filename, Entry); |
||
| 377 | } |
||
| 378 | |||
| 379 | void printImpl(raw_ostream &OS, PrintType Type, |
||
| 380 | unsigned IndentLevel) const override { |
||
| 381 | printIndent(OS, IndentLevel); |
||
| 382 | OS << "DependencyScanningFilesystem\n"; |
||
| 383 | getUnderlyingFS().print(OS, Type, IndentLevel + 1); |
||
| 384 | } |
||
| 385 | |||
| 386 | /// The global cache shared between worker threads. |
||
| 387 | DependencyScanningFilesystemSharedCache &SharedCache; |
||
| 388 | /// The local cache is used by the worker thread to cache file system queries |
||
| 389 | /// locally instead of querying the global cache every time. |
||
| 390 | DependencyScanningFilesystemLocalCache LocalCache; |
||
| 391 | }; |
||
| 392 | |||
| 393 | } // end namespace dependencies |
||
| 394 | } // end namespace tooling |
||
| 395 | } // end namespace clang |
||
| 396 | |||
| 397 | #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |