Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- Parser.h - Matcher expression parser ---------------------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | /// \file |
||
10 | /// Simple matcher expression parser. |
||
11 | /// |
||
12 | /// The parser understands matcher expressions of the form: |
||
13 | /// MatcherName(Arg0, Arg1, ..., ArgN) |
||
14 | /// as well as simple types like strings. |
||
15 | /// The parser does not know how to process the matchers. It delegates this task |
||
16 | /// to a Sema object received as an argument. |
||
17 | /// |
||
18 | /// \code |
||
19 | /// Grammar for the expressions supported: |
||
20 | /// <Expression> := <Literal> | <NamedValue> | <MatcherExpression> |
||
21 | /// <Literal> := <StringLiteral> | <Boolean> | <Double> | <Unsigned> |
||
22 | /// <StringLiteral> := "quoted string" |
||
23 | /// <Boolean> := true | false |
||
24 | /// <Double> := [0-9]+.[0-9]* | [0-9]+.[0-9]*[eE][-+]?[0-9]+ |
||
25 | /// <Unsigned> := [0-9]+ |
||
26 | /// <NamedValue> := <Identifier> |
||
27 | /// <MatcherExpression> := <Identifier>(<ArgumentList>) | |
||
28 | /// <Identifier>(<ArgumentList>).bind(<StringLiteral>) |
||
29 | /// <Identifier> := [a-zA-Z]+ |
||
30 | /// <ArgumentList> := <Expression> | <Expression>,<ArgumentList> |
||
31 | /// \endcode |
||
32 | // |
||
33 | //===----------------------------------------------------------------------===// |
||
34 | |||
35 | #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H |
||
36 | #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H |
||
37 | |||
38 | #include "clang/ASTMatchers/ASTMatchersInternal.h" |
||
39 | #include "clang/ASTMatchers/Dynamic/Registry.h" |
||
40 | #include "clang/ASTMatchers/Dynamic/VariantValue.h" |
||
41 | #include "llvm/ADT/ArrayRef.h" |
||
42 | #include "llvm/ADT/StringMap.h" |
||
43 | #include "llvm/ADT/StringRef.h" |
||
44 | #include <optional> |
||
45 | #include <utility> |
||
46 | #include <vector> |
||
47 | |||
48 | namespace clang { |
||
49 | namespace ast_matchers { |
||
50 | namespace dynamic { |
||
51 | |||
52 | class Diagnostics; |
||
53 | |||
54 | /// Matcher expression parser. |
||
55 | class Parser { |
||
56 | public: |
||
57 | /// Interface to connect the parser with the registry and more. |
||
58 | /// |
||
59 | /// The parser uses the Sema instance passed into |
||
60 | /// parseMatcherExpression() to handle all matcher tokens. The simplest |
||
61 | /// processor implementation would simply call into the registry to create |
||
62 | /// the matchers. |
||
63 | /// However, a more complex processor might decide to intercept the matcher |
||
64 | /// creation and do some extra work. For example, it could apply some |
||
65 | /// transformation to the matcher by adding some id() nodes, or could detect |
||
66 | /// specific matcher nodes for more efficient lookup. |
||
67 | class Sema { |
||
68 | public: |
||
69 | virtual ~Sema(); |
||
70 | |||
71 | /// Process a matcher expression. |
||
72 | /// |
||
73 | /// All the arguments passed here have already been processed. |
||
74 | /// |
||
75 | /// \param Ctor A matcher constructor looked up by lookupMatcherCtor. |
||
76 | /// |
||
77 | /// \param NameRange The location of the name in the matcher source. |
||
78 | /// Useful for error reporting. |
||
79 | /// |
||
80 | /// \param BindID The ID to use to bind the matcher, or a null \c StringRef |
||
81 | /// if no ID is specified. |
||
82 | /// |
||
83 | /// \param Args The argument list for the matcher. |
||
84 | /// |
||
85 | /// \return The matcher objects constructed by the processor, or a null |
||
86 | /// matcher if an error occurred. In that case, \c Error will contain a |
||
87 | /// description of the error. |
||
88 | virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
||
89 | SourceRange NameRange, |
||
90 | StringRef BindID, |
||
91 | ArrayRef<ParserValue> Args, |
||
92 | Diagnostics *Error) = 0; |
||
93 | |||
94 | /// Look up a matcher by name. |
||
95 | /// |
||
96 | /// \param MatcherName The matcher name found by the parser. |
||
97 | /// |
||
98 | /// \return The matcher constructor, or std::optional<MatcherCtor>() if not |
||
99 | /// found. |
||
100 | virtual std::optional<MatcherCtor> |
||
101 | lookupMatcherCtor(StringRef MatcherName) = 0; |
||
102 | |||
103 | virtual bool isBuilderMatcher(MatcherCtor) const = 0; |
||
104 | |||
105 | virtual ASTNodeKind nodeMatcherType(MatcherCtor) const = 0; |
||
106 | |||
107 | virtual internal::MatcherDescriptorPtr |
||
108 | buildMatcherCtor(MatcherCtor, SourceRange NameRange, |
||
109 | ArrayRef<ParserValue> Args, Diagnostics *Error) const = 0; |
||
110 | |||
111 | /// Compute the list of completion types for \p Context. |
||
112 | /// |
||
113 | /// Each element of \p Context represents a matcher invocation, going from |
||
114 | /// outermost to innermost. Elements are pairs consisting of a reference to |
||
115 | /// the matcher constructor and the index of the next element in the |
||
116 | /// argument list of that matcher (or for the last element, the index of |
||
117 | /// the completion point in the argument list). An empty list requests |
||
118 | /// completion for the root matcher. |
||
119 | virtual std::vector<ArgKind> getAcceptedCompletionTypes( |
||
120 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); |
||
121 | |||
122 | /// Compute the list of completions that match any of |
||
123 | /// \p AcceptedTypes. |
||
124 | /// |
||
125 | /// \param AcceptedTypes All types accepted for this completion. |
||
126 | /// |
||
127 | /// \return All completions for the specified types. |
||
128 | /// Completions should be valid when used in \c lookupMatcherCtor(). |
||
129 | /// The matcher constructed from the return of \c lookupMatcherCtor() |
||
130 | /// should be convertible to some type in \p AcceptedTypes. |
||
131 | virtual std::vector<MatcherCompletion> |
||
132 | getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes); |
||
133 | }; |
||
134 | |||
135 | /// Sema implementation that uses the matcher registry to process the |
||
136 | /// tokens. |
||
137 | class RegistrySema : public Parser::Sema { |
||
138 | public: |
||
139 | ~RegistrySema() override; |
||
140 | |||
141 | std::optional<MatcherCtor> |
||
142 | lookupMatcherCtor(StringRef MatcherName) override; |
||
143 | |||
144 | VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
||
145 | SourceRange NameRange, |
||
146 | StringRef BindID, |
||
147 | ArrayRef<ParserValue> Args, |
||
148 | Diagnostics *Error) override; |
||
149 | |||
150 | std::vector<ArgKind> getAcceptedCompletionTypes( |
||
151 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override; |
||
152 | |||
153 | bool isBuilderMatcher(MatcherCtor Ctor) const override; |
||
154 | |||
155 | ASTNodeKind nodeMatcherType(MatcherCtor) const override; |
||
156 | |||
157 | internal::MatcherDescriptorPtr |
||
158 | buildMatcherCtor(MatcherCtor, SourceRange NameRange, |
||
159 | ArrayRef<ParserValue> Args, |
||
160 | Diagnostics *Error) const override; |
||
161 | |||
162 | std::vector<MatcherCompletion> |
||
163 | getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override; |
||
164 | }; |
||
165 | |||
166 | using NamedValueMap = llvm::StringMap<VariantValue>; |
||
167 | |||
168 | /// Parse a matcher expression. |
||
169 | /// |
||
170 | /// \param MatcherCode The matcher expression to parse. |
||
171 | /// |
||
172 | /// \param S The Sema instance that will help the parser |
||
173 | /// construct the matchers. If null, it uses the default registry. |
||
174 | /// |
||
175 | /// \param NamedValues A map of precomputed named values. This provides |
||
176 | /// the dictionary for the <NamedValue> rule of the grammar. |
||
177 | /// If null, it is ignored. |
||
178 | /// |
||
179 | /// \return The matcher object constructed by the processor, or an empty |
||
180 | /// Optional if an error occurred. In that case, \c Error will contain a |
||
181 | /// description of the error. |
||
182 | /// The caller takes ownership of the DynTypedMatcher object returned. |
||
183 | static std::optional<DynTypedMatcher> |
||
184 | parseMatcherExpression(StringRef &MatcherCode, Sema *S, |
||
185 | const NamedValueMap *NamedValues, Diagnostics *Error); |
||
186 | static std::optional<DynTypedMatcher> |
||
187 | parseMatcherExpression(StringRef &MatcherCode, Sema *S, Diagnostics *Error) { |
||
188 | return parseMatcherExpression(MatcherCode, S, nullptr, Error); |
||
189 | } |
||
190 | static std::optional<DynTypedMatcher> |
||
191 | parseMatcherExpression(StringRef &MatcherCode, Diagnostics *Error) { |
||
192 | return parseMatcherExpression(MatcherCode, nullptr, Error); |
||
193 | } |
||
194 | |||
195 | /// Parse an expression. |
||
196 | /// |
||
197 | /// Parses any expression supported by this parser. In general, the |
||
198 | /// \c parseMatcherExpression function is a better approach to get a matcher |
||
199 | /// object. |
||
200 | /// |
||
201 | /// \param S The Sema instance that will help the parser |
||
202 | /// construct the matchers. If null, it uses the default registry. |
||
203 | /// |
||
204 | /// \param NamedValues A map of precomputed named values. This provides |
||
205 | /// the dictionary for the <NamedValue> rule of the grammar. |
||
206 | /// If null, it is ignored. |
||
207 | static bool parseExpression(StringRef &Code, Sema *S, |
||
208 | const NamedValueMap *NamedValues, |
||
209 | VariantValue *Value, Diagnostics *Error); |
||
210 | static bool parseExpression(StringRef &Code, Sema *S, VariantValue *Value, |
||
211 | Diagnostics *Error) { |
||
212 | return parseExpression(Code, S, nullptr, Value, Error); |
||
213 | } |
||
214 | static bool parseExpression(StringRef &Code, VariantValue *Value, |
||
215 | Diagnostics *Error) { |
||
216 | return parseExpression(Code, nullptr, Value, Error); |
||
217 | } |
||
218 | |||
219 | /// Complete an expression at the given offset. |
||
220 | /// |
||
221 | /// \param S The Sema instance that will help the parser |
||
222 | /// construct the matchers. If null, it uses the default registry. |
||
223 | /// |
||
224 | /// \param NamedValues A map of precomputed named values. This provides |
||
225 | /// the dictionary for the <NamedValue> rule of the grammar. |
||
226 | /// If null, it is ignored. |
||
227 | /// |
||
228 | /// \return The list of completions, which may be empty if there are no |
||
229 | /// available completions or if an error occurred. |
||
230 | static std::vector<MatcherCompletion> |
||
231 | completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, |
||
232 | const NamedValueMap *NamedValues); |
||
233 | static std::vector<MatcherCompletion> |
||
234 | completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S) { |
||
235 | return completeExpression(Code, CompletionOffset, S, nullptr); |
||
236 | } |
||
237 | static std::vector<MatcherCompletion> |
||
238 | completeExpression(StringRef &Code, unsigned CompletionOffset) { |
||
239 | return completeExpression(Code, CompletionOffset, nullptr); |
||
240 | } |
||
241 | |||
242 | private: |
||
243 | class CodeTokenizer; |
||
244 | struct ScopedContextEntry; |
||
245 | struct TokenInfo; |
||
246 | |||
247 | Parser(CodeTokenizer *Tokenizer, Sema *S, |
||
248 | const NamedValueMap *NamedValues, |
||
249 | Diagnostics *Error); |
||
250 | |||
251 | bool parseBindID(std::string &BindID); |
||
252 | bool parseExpressionImpl(VariantValue *Value); |
||
253 | bool parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, |
||
254 | const TokenInfo &OpenToken, VariantValue *Value); |
||
255 | bool parseMatcherExpressionImpl(const TokenInfo &NameToken, |
||
256 | const TokenInfo &OpenToken, |
||
257 | std::optional<MatcherCtor> Ctor, |
||
258 | VariantValue *Value); |
||
259 | bool parseIdentifierPrefixImpl(VariantValue *Value); |
||
260 | |||
261 | void addCompletion(const TokenInfo &CompToken, |
||
262 | const MatcherCompletion &Completion); |
||
263 | void addExpressionCompletions(); |
||
264 | |||
265 | std::vector<MatcherCompletion> |
||
266 | getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes); |
||
267 | |||
268 | CodeTokenizer *const Tokenizer; |
||
269 | Sema *const S; |
||
270 | const NamedValueMap *const NamedValues; |
||
271 | Diagnostics *const Error; |
||
272 | |||
273 | using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>; |
||
274 | |||
275 | ContextStackTy ContextStack; |
||
276 | std::vector<MatcherCompletion> Completions; |
||
277 | }; |
||
278 | |||
279 | } // namespace dynamic |
||
280 | } // namespace ast_matchers |
||
281 | } // namespace clang |
||
282 | |||
283 | #endif // LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H |