Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | |||
9 | #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |
||
10 | #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |
||
11 | |||
12 | #include "clang/Basic/LLVM.h" |
||
13 | #include "clang/Lex/DependencyDirectivesScanner.h" |
||
14 | #include "llvm/ADT/DenseSet.h" |
||
15 | #include "llvm/ADT/StringMap.h" |
||
16 | #include "llvm/Support/Allocator.h" |
||
17 | #include "llvm/Support/ErrorOr.h" |
||
18 | #include "llvm/Support/VirtualFileSystem.h" |
||
19 | #include <mutex> |
||
20 | #include <optional> |
||
21 | |||
22 | namespace clang { |
||
23 | namespace tooling { |
||
24 | namespace dependencies { |
||
25 | |||
26 | using DependencyDirectivesTy = |
||
27 | SmallVector<dependency_directives_scan::Directive, 20>; |
||
28 | |||
29 | /// Contents and directive tokens of a cached file entry. Single instance can |
||
30 | /// be shared between multiple entries. |
||
31 | struct CachedFileContents { |
||
32 | CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents) |
||
33 | : Original(std::move(Contents)), DepDirectives(nullptr) {} |
||
34 | |||
35 | /// Owning storage for the original contents. |
||
36 | std::unique_ptr<llvm::MemoryBuffer> Original; |
||
37 | |||
38 | /// The mutex that must be locked before mutating directive tokens. |
||
39 | std::mutex ValueLock; |
||
40 | SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens; |
||
41 | /// Accessor to the directive tokens that's atomic to avoid data races. |
||
42 | /// \p CachedFileContents has ownership of the pointer. |
||
43 | std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives; |
||
44 | |||
45 | ~CachedFileContents() { delete DepDirectives.load(); } |
||
46 | }; |
||
47 | |||
48 | /// An in-memory representation of a file system entity that is of interest to |
||
49 | /// the dependency scanning filesystem. |
||
50 | /// |
||
51 | /// It represents one of the following: |
||
52 | /// - opened file with contents and a stat value, |
||
53 | /// - opened file with contents, directive tokens and a stat value, |
||
54 | /// - directory entry with its stat value, |
||
55 | /// - filesystem error. |
||
56 | /// |
||
57 | /// Single instance of this class can be shared across different filenames (e.g. |
||
58 | /// a regular file and a symlink). For this reason the status filename is empty |
||
59 | /// and is only materialized by \c EntryRef that knows the requested filename. |
||
60 | class CachedFileSystemEntry { |
||
61 | public: |
||
62 | /// Creates an entry without contents: either a filesystem error or |
||
63 | /// a directory with stat value. |
||
64 | CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat) |
||
65 | : MaybeStat(std::move(Stat)), Contents(nullptr) { |
||
66 | clearStatName(); |
||
67 | } |
||
68 | |||
69 | /// Creates an entry representing a file with contents. |
||
70 | CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat, |
||
71 | CachedFileContents *Contents) |
||
72 | : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) { |
||
73 | clearStatName(); |
||
74 | } |
||
75 | |||
76 | /// \returns True if the entry is a filesystem error. |
||
77 | bool isError() const { return !MaybeStat; } |
||
78 | |||
79 | /// \returns True if the current entry represents a directory. |
||
80 | bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); } |
||
81 | |||
82 | /// \returns Original contents of the file. |
||
83 | StringRef getOriginalContents() const { |
||
84 | assert(!isError() && "error"); |
||
85 | assert(!MaybeStat->isDirectory() && "not a file"); |
||
86 | assert(Contents && "contents not initialized"); |
||
87 | return Contents->Original->getBuffer(); |
||
88 | } |
||
89 | |||
90 | /// \returns The scanned preprocessor directive tokens of the file that are |
||
91 | /// used to speed up preprocessing, if available. |
||
92 | std::optional<ArrayRef<dependency_directives_scan::Directive>> |
||
93 | getDirectiveTokens() const { |
||
94 | assert(!isError() && "error"); |
||
95 | assert(!isDirectory() && "not a file"); |
||
96 | assert(Contents && "contents not initialized"); |
||
97 | if (auto *Directives = Contents->DepDirectives.load()) { |
||
98 | if (Directives->has_value()) |
||
99 | return ArrayRef<dependency_directives_scan::Directive>(**Directives); |
||
100 | } |
||
101 | return std::nullopt; |
||
102 | } |
||
103 | |||
104 | /// \returns The error. |
||
105 | std::error_code getError() const { return MaybeStat.getError(); } |
||
106 | |||
107 | /// \returns The entry status with empty filename. |
||
108 | llvm::vfs::Status getStatus() const { |
||
109 | assert(!isError() && "error"); |
||
110 | assert(MaybeStat->getName().empty() && "stat name must be empty"); |
||
111 | return *MaybeStat; |
||
112 | } |
||
113 | |||
114 | /// \returns The unique ID of the entry. |
||
115 | llvm::sys::fs::UniqueID getUniqueID() const { |
||
116 | assert(!isError() && "error"); |
||
117 | return MaybeStat->getUniqueID(); |
||
118 | } |
||
119 | |||
120 | /// \returns The data structure holding both contents and directive tokens. |
||
121 | CachedFileContents *getCachedContents() const { |
||
122 | assert(!isError() && "error"); |
||
123 | assert(!isDirectory() && "not a file"); |
||
124 | return Contents; |
||
125 | } |
||
126 | |||
127 | private: |
||
128 | void clearStatName() { |
||
129 | if (MaybeStat) |
||
130 | MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, ""); |
||
131 | } |
||
132 | |||
133 | /// Either the filesystem error or status of the entry. |
||
134 | /// The filename is empty and only materialized by \c EntryRef. |
||
135 | llvm::ErrorOr<llvm::vfs::Status> MaybeStat; |
||
136 | |||
137 | /// Non-owning pointer to the file contents. |
||
138 | /// |
||
139 | /// We're using pointer here to keep the size of this class small. Instances |
||
140 | /// representing directories and filesystem errors don't hold any contents |
||
141 | /// anyway. |
||
142 | CachedFileContents *Contents; |
||
143 | }; |
||
144 | |||
145 | /// This class is a shared cache, that caches the 'stat' and 'open' calls to the |
||
146 | /// underlying real file system, and the scanned preprocessor directives of |
||
147 | /// files. |
||
148 | /// |
||
149 | /// It is sharded based on the hash of the key to reduce the lock contention for |
||
150 | /// the worker threads. |
||
151 | class DependencyScanningFilesystemSharedCache { |
||
152 | public: |
||
153 | struct CacheShard { |
||
154 | /// The mutex that needs to be locked before mutation of any member. |
||
155 | mutable std::mutex CacheLock; |
||
156 | |||
157 | /// Map from filenames to cached entries. |
||
158 | llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> |
||
159 | EntriesByFilename; |
||
160 | |||
161 | /// Map from unique IDs to cached entries. |
||
162 | llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *> |
||
163 | EntriesByUID; |
||
164 | |||
165 | /// The backing storage for cached entries. |
||
166 | llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage; |
||
167 | |||
168 | /// The backing storage for cached contents. |
||
169 | llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage; |
||
170 | |||
171 | /// Returns entry associated with the filename or nullptr if none is found. |
||
172 | const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const; |
||
173 | |||
174 | /// Returns entry associated with the unique ID or nullptr if none is found. |
||
175 | const CachedFileSystemEntry * |
||
176 | findEntryByUID(llvm::sys::fs::UniqueID UID) const; |
||
177 | |||
178 | /// Returns entry associated with the filename if there is some. Otherwise, |
||
179 | /// constructs new one with the given status, associates it with the |
||
180 | /// filename and returns the result. |
||
181 | const CachedFileSystemEntry & |
||
182 | getOrEmplaceEntryForFilename(StringRef Filename, |
||
183 | llvm::ErrorOr<llvm::vfs::Status> Stat); |
||
184 | |||
185 | /// Returns entry associated with the unique ID if there is some. Otherwise, |
||
186 | /// constructs new one with the given status and contents, associates it |
||
187 | /// with the unique ID and returns the result. |
||
188 | const CachedFileSystemEntry & |
||
189 | getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat, |
||
190 | std::unique_ptr<llvm::MemoryBuffer> Contents); |
||
191 | |||
192 | /// Returns entry associated with the filename if there is some. Otherwise, |
||
193 | /// associates the given entry with the filename and returns it. |
||
194 | const CachedFileSystemEntry & |
||
195 | getOrInsertEntryForFilename(StringRef Filename, |
||
196 | const CachedFileSystemEntry &Entry); |
||
197 | }; |
||
198 | |||
199 | DependencyScanningFilesystemSharedCache(); |
||
200 | |||
201 | /// Returns shard for the given key. |
||
202 | CacheShard &getShardForFilename(StringRef Filename) const; |
||
203 | CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const; |
||
204 | |||
205 | private: |
||
206 | std::unique_ptr<CacheShard[]> CacheShards; |
||
207 | unsigned NumShards; |
||
208 | }; |
||
209 | |||
210 | /// This class is a local cache, that caches the 'stat' and 'open' calls to the |
||
211 | /// underlying real file system. |
||
212 | class DependencyScanningFilesystemLocalCache { |
||
213 | llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache; |
||
214 | |||
215 | public: |
||
216 | /// Returns entry associated with the filename or nullptr if none is found. |
||
217 | const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const { |
||
218 | auto It = Cache.find(Filename); |
||
219 | return It == Cache.end() ? nullptr : It->getValue(); |
||
220 | } |
||
221 | |||
222 | /// Associates the given entry with the filename and returns the given entry |
||
223 | /// pointer (for convenience). |
||
224 | const CachedFileSystemEntry & |
||
225 | insertEntryForFilename(StringRef Filename, |
||
226 | const CachedFileSystemEntry &Entry) { |
||
227 | const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second; |
||
228 | assert(InsertedEntry == &Entry && "entry already present"); |
||
229 | return *InsertedEntry; |
||
230 | } |
||
231 | }; |
||
232 | |||
233 | /// Reference to a CachedFileSystemEntry. |
||
234 | /// If the underlying entry is an opened file, this wrapper returns the file |
||
235 | /// contents and the scanned preprocessor directives. |
||
236 | class EntryRef { |
||
237 | /// The filename used to access this entry. |
||
238 | std::string Filename; |
||
239 | |||
240 | /// The underlying cached entry. |
||
241 | const CachedFileSystemEntry &Entry; |
||
242 | |||
243 | public: |
||
244 | EntryRef(StringRef Name, const CachedFileSystemEntry &Entry) |
||
245 | : Filename(Name), Entry(Entry) {} |
||
246 | |||
247 | llvm::vfs::Status getStatus() const { |
||
248 | llvm::vfs::Status Stat = Entry.getStatus(); |
||
249 | if (!Stat.isDirectory()) |
||
250 | Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size()); |
||
251 | return llvm::vfs::Status::copyWithNewName(Stat, Filename); |
||
252 | } |
||
253 | |||
254 | bool isError() const { return Entry.isError(); } |
||
255 | bool isDirectory() const { return Entry.isDirectory(); } |
||
256 | |||
257 | /// If the cached entry represents an error, promotes it into `ErrorOr`. |
||
258 | llvm::ErrorOr<EntryRef> unwrapError() const { |
||
259 | if (isError()) |
||
260 | return Entry.getError(); |
||
261 | return *this; |
||
262 | } |
||
263 | |||
264 | StringRef getContents() const { return Entry.getOriginalContents(); } |
||
265 | |||
266 | std::optional<ArrayRef<dependency_directives_scan::Directive>> |
||
267 | getDirectiveTokens() const { |
||
268 | return Entry.getDirectiveTokens(); |
||
269 | } |
||
270 | }; |
||
271 | |||
272 | /// A virtual file system optimized for the dependency discovery. |
||
273 | /// |
||
274 | /// It is primarily designed to work with source files whose contents was |
||
275 | /// preprocessed to remove any tokens that are unlikely to affect the dependency |
||
276 | /// computation. |
||
277 | /// |
||
278 | /// This is not a thread safe VFS. A single instance is meant to be used only in |
||
279 | /// one thread. Multiple instances are allowed to service multiple threads |
||
280 | /// running in parallel. |
||
281 | class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { |
||
282 | public: |
||
283 | DependencyScanningWorkerFilesystem( |
||
284 | DependencyScanningFilesystemSharedCache &SharedCache, |
||
285 | IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS) |
||
286 | : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {} |
||
287 | |||
288 | llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override; |
||
289 | llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> |
||
290 | openFileForRead(const Twine &Path) override; |
||
291 | |||
292 | /// Returns entry for the given filename. |
||
293 | /// |
||
294 | /// Attempts to use the local and shared caches first, then falls back to |
||
295 | /// using the underlying filesystem. |
||
296 | llvm::ErrorOr<EntryRef> |
||
297 | getOrCreateFileSystemEntry(StringRef Filename, |
||
298 | bool DisableDirectivesScanning = false); |
||
299 | |||
300 | private: |
||
301 | /// Check whether the file should be scanned for preprocessor directives. |
||
302 | bool shouldScanForDirectives(StringRef Filename); |
||
303 | |||
304 | /// For a filename that's not yet associated with any entry in the caches, |
||
305 | /// uses the underlying filesystem to either look up the entry based in the |
||
306 | /// shared cache indexed by unique ID, or creates new entry from scratch. |
||
307 | llvm::ErrorOr<const CachedFileSystemEntry &> |
||
308 | computeAndStoreResult(StringRef Filename); |
||
309 | |||
310 | /// Scan for preprocessor directives for the given entry if necessary and |
||
311 | /// returns a wrapper object with reference semantics. |
||
312 | EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry, |
||
313 | StringRef Filename, bool Disable); |
||
314 | |||
315 | /// Represents a filesystem entry that has been stat-ed (and potentially read) |
||
316 | /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. |
||
317 | struct TentativeEntry { |
||
318 | llvm::vfs::Status Status; |
||
319 | std::unique_ptr<llvm::MemoryBuffer> Contents; |
||
320 | |||
321 | TentativeEntry(llvm::vfs::Status Status, |
||
322 | std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr) |
||
323 | : Status(std::move(Status)), Contents(std::move(Contents)) {} |
||
324 | }; |
||
325 | |||
326 | /// Reads file at the given path. Enforces consistency between the file size |
||
327 | /// in status and size of read contents. |
||
328 | llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename); |
||
329 | |||
330 | /// Returns entry associated with the unique ID of the given tentative entry |
||
331 | /// if there is some in the shared cache. Otherwise, constructs new one, |
||
332 | /// associates it with the unique ID and returns the result. |
||
333 | const CachedFileSystemEntry & |
||
334 | getOrEmplaceSharedEntryForUID(TentativeEntry TEntry); |
||
335 | |||
336 | /// Returns entry associated with the filename or nullptr if none is found. |
||
337 | /// |
||
338 | /// Returns entry from local cache if there is some. Otherwise, if the entry |
||
339 | /// is found in the shared cache, writes it through the local cache and |
||
340 | /// returns it. Otherwise returns nullptr. |
||
341 | const CachedFileSystemEntry * |
||
342 | findEntryByFilenameWithWriteThrough(StringRef Filename); |
||
343 | |||
344 | /// Returns entry associated with the unique ID in the shared cache or nullptr |
||
345 | /// if none is found. |
||
346 | const CachedFileSystemEntry * |
||
347 | findSharedEntryByUID(llvm::vfs::Status Stat) const { |
||
348 | return SharedCache.getShardForUID(Stat.getUniqueID()) |
||
349 | .findEntryByUID(Stat.getUniqueID()); |
||
350 | } |
||
351 | |||
352 | /// Associates the given entry with the filename in the local cache and |
||
353 | /// returns it. |
||
354 | const CachedFileSystemEntry & |
||
355 | insertLocalEntryForFilename(StringRef Filename, |
||
356 | const CachedFileSystemEntry &Entry) { |
||
357 | return LocalCache.insertEntryForFilename(Filename, Entry); |
||
358 | } |
||
359 | |||
360 | /// Returns entry associated with the filename in the shared cache if there is |
||
361 | /// some. Otherwise, constructs new one with the given error code, associates |
||
362 | /// it with the filename and returns the result. |
||
363 | const CachedFileSystemEntry & |
||
364 | getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) { |
||
365 | return SharedCache.getShardForFilename(Filename) |
||
366 | .getOrEmplaceEntryForFilename(Filename, EC); |
||
367 | } |
||
368 | |||
369 | /// Returns entry associated with the filename in the shared cache if there is |
||
370 | /// some. Otherwise, associates the given entry with the filename and returns |
||
371 | /// it. |
||
372 | const CachedFileSystemEntry & |
||
373 | getOrInsertSharedEntryForFilename(StringRef Filename, |
||
374 | const CachedFileSystemEntry &Entry) { |
||
375 | return SharedCache.getShardForFilename(Filename) |
||
376 | .getOrInsertEntryForFilename(Filename, Entry); |
||
377 | } |
||
378 | |||
379 | void printImpl(raw_ostream &OS, PrintType Type, |
||
380 | unsigned IndentLevel) const override { |
||
381 | printIndent(OS, IndentLevel); |
||
382 | OS << "DependencyScanningFilesystem\n"; |
||
383 | getUnderlyingFS().print(OS, Type, IndentLevel + 1); |
||
384 | } |
||
385 | |||
386 | /// The global cache shared between worker threads. |
||
387 | DependencyScanningFilesystemSharedCache &SharedCache; |
||
388 | /// The local cache is used by the worker thread to cache file system queries |
||
389 | /// locally instead of querying the global cache every time. |
||
390 | DependencyScanningFilesystemLocalCache LocalCache; |
||
391 | }; |
||
392 | |||
393 | } // end namespace dependencies |
||
394 | } // end namespace tooling |
||
395 | } // end namespace clang |
||
396 | |||
397 | #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H |