Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | # -*- coding: utf-8 -*- |
2 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
3 | # See https://llvm.org/LICENSE.txt for license information. |
||
4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
5 | """ This module is responsible for to parse a compiler invocation. """ |
||
6 | |||
7 | import re |
||
8 | import os |
||
9 | import collections |
||
10 | |||
11 | __all__ = ['split_command', 'classify_source', 'compiler_language'] |
||
12 | |||
13 | # Ignored compiler options map for compilation database creation. |
||
14 | # The map is used in `split_command` method. (Which does ignore and classify |
||
15 | # parameters.) Please note, that these are not the only parameters which |
||
16 | # might be ignored. |
||
17 | # |
||
18 | # Keys are the option name, value number of options to skip |
||
19 | IGNORED_FLAGS = { |
||
20 | # compiling only flag, ignored because the creator of compilation |
||
21 | # database will explicitly set it. |
||
22 | '-c': 0, |
||
23 | # preprocessor macros, ignored because would cause duplicate entries in |
||
24 | # the output (the only difference would be these flags). this is actual |
||
25 | # finding from users, who suffered longer execution time caused by the |
||
26 | # duplicates. |
||
27 | '-MD': 0, |
||
28 | '-MMD': 0, |
||
29 | '-MG': 0, |
||
30 | '-MP': 0, |
||
31 | '-MF': 1, |
||
32 | '-MT': 1, |
||
33 | '-MQ': 1, |
||
34 | # linker options, ignored because for compilation database will contain |
||
35 | # compilation commands only. so, the compiler would ignore these flags |
||
36 | # anyway. the benefit to get rid of them is to make the output more |
||
37 | # readable. |
||
38 | '-static': 0, |
||
39 | '-shared': 0, |
||
40 | '-s': 0, |
||
41 | '-rdynamic': 0, |
||
42 | '-l': 1, |
||
43 | '-L': 1, |
||
44 | '-u': 1, |
||
45 | '-z': 1, |
||
46 | '-T': 1, |
||
47 | '-Xlinker': 1 |
||
48 | } |
||
49 | |||
50 | # Known C/C++ compiler executable name patterns |
||
51 | COMPILER_PATTERNS = frozenset([ |
||
52 | re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), |
||
53 | re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), |
||
54 | re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), |
||
55 | re.compile(r'^llvm-g(cc|\+\+)$'), |
||
56 | ]) |
||
57 | |||
58 | |||
59 | def split_command(command): |
||
60 | """ Returns a value when the command is a compilation, None otherwise. |
||
61 | |||
62 | The value on success is a named tuple with the following attributes: |
||
63 | |||
64 | files: list of source files |
||
65 | flags: list of compile options |
||
66 | compiler: string value of 'c' or 'c++' """ |
||
67 | |||
68 | # the result of this method |
||
69 | result = collections.namedtuple('Compilation', |
||
70 | ['compiler', 'flags', 'files']) |
||
71 | result.compiler = compiler_language(command) |
||
72 | result.flags = [] |
||
73 | result.files = [] |
||
74 | # quit right now, if the program was not a C/C++ compiler |
||
75 | if not result.compiler: |
||
76 | return None |
||
77 | # iterate on the compile options |
||
78 | args = iter(command[1:]) |
||
79 | for arg in args: |
||
80 | # quit when compilation pass is not involved |
||
81 | if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: |
||
82 | return None |
||
83 | # ignore some flags |
||
84 | elif arg in IGNORED_FLAGS: |
||
85 | count = IGNORED_FLAGS[arg] |
||
86 | for _ in range(count): |
||
87 | next(args) |
||
88 | elif re.match(r'^-(l|L|Wl,).+', arg): |
||
89 | pass |
||
90 | # some parameters could look like filename, take as compile option |
||
91 | elif arg in {'-D', '-I'}: |
||
92 | result.flags.extend([arg, next(args)]) |
||
93 | # parameter which looks source file is taken... |
||
94 | elif re.match(r'^[^-].+', arg) and classify_source(arg): |
||
95 | result.files.append(arg) |
||
96 | # and consider everything else as compile option. |
||
97 | else: |
||
98 | result.flags.append(arg) |
||
99 | # do extra check on number of source files |
||
100 | return result if result.files else None |
||
101 | |||
102 | |||
103 | def classify_source(filename, c_compiler=True): |
||
104 | """ Return the language from file name extension. """ |
||
105 | |||
106 | mapping = { |
||
107 | '.c': 'c' if c_compiler else 'c++', |
||
108 | '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', |
||
109 | '.ii': 'c++-cpp-output', |
||
110 | '.m': 'objective-c', |
||
111 | '.mi': 'objective-c-cpp-output', |
||
112 | '.mm': 'objective-c++', |
||
113 | '.mii': 'objective-c++-cpp-output', |
||
114 | '.C': 'c++', |
||
115 | '.cc': 'c++', |
||
116 | '.CC': 'c++', |
||
117 | '.cp': 'c++', |
||
118 | '.cpp': 'c++', |
||
119 | '.cxx': 'c++', |
||
120 | '.c++': 'c++', |
||
121 | '.C++': 'c++', |
||
122 | '.txx': 'c++' |
||
123 | } |
||
124 | |||
125 | __, extension = os.path.splitext(os.path.basename(filename)) |
||
126 | return mapping.get(extension) |
||
127 | |||
128 | |||
129 | def compiler_language(command): |
||
130 | """ A predicate to decide the command is a compiler call or not. |
||
131 | |||
132 | Returns 'c' or 'c++' when it match. None otherwise. """ |
||
133 | |||
134 | cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') |
||
135 | |||
136 | if command: |
||
137 | executable = os.path.basename(command[0]) |
||
138 | if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): |
||
139 | return 'c++' if cplusplus.match(executable) else 'c' |
||
140 | return None |