Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | # -*- coding: utf-8 -*- |
2 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
3 | # See https://llvm.org/LICENSE.txt for license information. |
||
4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
5 | """ This module is responsible to capture the compiler invocation of any |
||
6 | build process. The result of that should be a compilation database. |
||
7 | |||
8 | This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES |
||
9 | mechanisms provided by the dynamic linker. The related library is implemented |
||
10 | in C language and can be found under 'libear' directory. |
||
11 | |||
12 | The 'libear' library is capturing all child process creation and logging the |
||
13 | relevant information about it into separate files in a specified directory. |
||
14 | The parameter of this process is the output directory name, where the report |
||
15 | files shall be placed. This parameter is passed as an environment variable. |
||
16 | |||
17 | The module also implements compiler wrappers to intercept the compiler calls. |
||
18 | |||
19 | The module implements the build command execution and the post-processing of |
||
20 | the output files, which will condensates into a compilation database. """ |
||
21 | |||
22 | import sys |
||
23 | import os |
||
24 | import os.path |
||
25 | import re |
||
26 | import itertools |
||
27 | import json |
||
28 | import glob |
||
29 | import logging |
||
30 | from libear import build_libear, TemporaryDirectory |
||
31 | from libscanbuild import command_entry_point, compiler_wrapper, \ |
||
32 | wrapper_environment, run_command, run_build |
||
33 | from libscanbuild import duplicate_check |
||
34 | from libscanbuild.compilation import split_command |
||
35 | from libscanbuild.arguments import parse_args_for_intercept_build |
||
36 | from libscanbuild.shell import encode, decode |
||
37 | |||
38 | __all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper'] |
||
39 | |||
40 | GS = chr(0x1d) |
||
41 | RS = chr(0x1e) |
||
42 | US = chr(0x1f) |
||
43 | |||
44 | COMPILER_WRAPPER_CC = 'intercept-cc' |
||
45 | COMPILER_WRAPPER_CXX = 'intercept-c++' |
||
46 | TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c |
||
47 | WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'}) |
||
48 | |||
49 | |||
50 | @command_entry_point |
||
51 | def intercept_build(): |
||
52 | """ Entry point for 'intercept-build' command. """ |
||
53 | |||
54 | args = parse_args_for_intercept_build() |
||
55 | return capture(args) |
||
56 | |||
57 | |||
58 | def capture(args): |
||
59 | """ The entry point of build command interception. """ |
||
60 | |||
61 | def post_processing(commands): |
||
62 | """ To make a compilation database, it needs to filter out commands |
||
63 | which are not compiler calls. Needs to find the source file name |
||
64 | from the arguments. And do shell escaping on the command. |
||
65 | |||
66 | To support incremental builds, it is desired to read elements from |
||
67 | an existing compilation database from a previous run. These elements |
||
68 | shall be merged with the new elements. """ |
||
69 | |||
70 | # create entries from the current run |
||
71 | current = itertools.chain.from_iterable( |
||
72 | # creates a sequence of entry generators from an exec, |
||
73 | format_entry(command) for command in commands) |
||
74 | # read entries from previous run |
||
75 | if 'append' in args and args.append and os.path.isfile(args.cdb): |
||
76 | with open(args.cdb) as handle: |
||
77 | previous = iter(json.load(handle)) |
||
78 | else: |
||
79 | previous = iter([]) |
||
80 | # filter out duplicate entries from both |
||
81 | duplicate = duplicate_check(entry_hash) |
||
82 | return (entry |
||
83 | for entry in itertools.chain(previous, current) |
||
84 | if os.path.exists(entry['file']) and not duplicate(entry)) |
||
85 | |||
86 | with TemporaryDirectory(prefix='intercept-') as tmp_dir: |
||
87 | # run the build command |
||
88 | environment = setup_environment(args, tmp_dir) |
||
89 | exit_code = run_build(args.build, env=environment) |
||
90 | # read the intercepted exec calls |
||
91 | exec_traces = itertools.chain.from_iterable( |
||
92 | parse_exec_trace(os.path.join(tmp_dir, filename)) |
||
93 | for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) |
||
94 | # do post processing |
||
95 | entries = post_processing(exec_traces) |
||
96 | # dump the compilation database |
||
97 | with open(args.cdb, 'w+') as handle: |
||
98 | json.dump(list(entries), handle, sort_keys=True, indent=4) |
||
99 | return exit_code |
||
100 | |||
101 | |||
102 | def setup_environment(args, destination): |
||
103 | """ Sets up the environment for the build command. |
||
104 | |||
105 | It sets the required environment variables and execute the given command. |
||
106 | The exec calls will be logged by the 'libear' preloaded library or by the |
||
107 | 'wrapper' programs. """ |
||
108 | |||
109 | c_compiler = args.cc if 'cc' in args else 'cc' |
||
110 | cxx_compiler = args.cxx if 'cxx' in args else 'c++' |
||
111 | |||
112 | libear_path = None if args.override_compiler or is_preload_disabled( |
||
113 | sys.platform) else build_libear(c_compiler, destination) |
||
114 | |||
115 | environment = dict(os.environ) |
||
116 | environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination}) |
||
117 | |||
118 | if not libear_path: |
||
119 | logging.debug('intercept gonna use compiler wrappers') |
||
120 | environment.update(wrapper_environment(args)) |
||
121 | environment.update({ |
||
122 | 'CC': COMPILER_WRAPPER_CC, |
||
123 | 'CXX': COMPILER_WRAPPER_CXX |
||
124 | }) |
||
125 | elif sys.platform == 'darwin': |
||
126 | logging.debug('intercept gonna preload libear on OSX') |
||
127 | environment.update({ |
||
128 | 'DYLD_INSERT_LIBRARIES': libear_path, |
||
129 | 'DYLD_FORCE_FLAT_NAMESPACE': '1' |
||
130 | }) |
||
131 | else: |
||
132 | logging.debug('intercept gonna preload libear on UNIX') |
||
133 | environment.update({'LD_PRELOAD': libear_path}) |
||
134 | |||
135 | return environment |
||
136 | |||
137 | |||
138 | @command_entry_point |
||
139 | def intercept_compiler_wrapper(): |
||
140 | """ Entry point for `intercept-cc` and `intercept-c++`. """ |
||
141 | |||
142 | return compiler_wrapper(intercept_compiler_wrapper_impl) |
||
143 | |||
144 | |||
145 | def intercept_compiler_wrapper_impl(_, execution): |
||
146 | """ Implement intercept compiler wrapper functionality. |
||
147 | |||
148 | It does generate execution report into target directory. |
||
149 | The target directory name is from environment variables. """ |
||
150 | |||
151 | message_prefix = 'execution report might be incomplete: %s' |
||
152 | |||
153 | target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR') |
||
154 | if not target_dir: |
||
155 | logging.warning(message_prefix, 'missing target directory') |
||
156 | return |
||
157 | # write current execution info to the pid file |
||
158 | try: |
||
159 | target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION |
||
160 | target_file = os.path.join(target_dir, target_file_name) |
||
161 | logging.debug('writing execution report to: %s', target_file) |
||
162 | write_exec_trace(target_file, execution) |
||
163 | except IOError: |
||
164 | logging.warning(message_prefix, 'io problem') |
||
165 | |||
166 | |||
167 | def write_exec_trace(filename, entry): |
||
168 | """ Write execution report file. |
||
169 | |||
170 | This method shall be sync with the execution report writer in interception |
||
171 | library. The entry in the file is a JSON objects. |
||
172 | |||
173 | :param filename: path to the output execution trace file, |
||
174 | :param entry: the Execution object to append to that file. """ |
||
175 | |||
176 | with open(filename, 'ab') as handler: |
||
177 | pid = str(entry.pid) |
||
178 | command = US.join(entry.cmd) + US |
||
179 | content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS |
||
180 | handler.write(content.encode('utf-8')) |
||
181 | |||
182 | |||
183 | def parse_exec_trace(filename): |
||
184 | """ Parse the file generated by the 'libear' preloaded library. |
||
185 | |||
186 | Given filename points to a file which contains the basic report |
||
187 | generated by the interception library or wrapper command. A single |
||
188 | report file _might_ contain multiple process creation info. """ |
||
189 | |||
190 | logging.debug('parse exec trace file: %s', filename) |
||
191 | with open(filename, 'r') as handler: |
||
192 | content = handler.read() |
||
193 | for group in filter(bool, content.split(GS)): |
||
194 | records = group.split(RS) |
||
195 | yield { |
||
196 | 'pid': records[0], |
||
197 | 'ppid': records[1], |
||
198 | 'function': records[2], |
||
199 | 'directory': records[3], |
||
200 | 'command': records[4].split(US)[:-1] |
||
201 | } |
||
202 | |||
203 | |||
204 | def format_entry(exec_trace): |
||
205 | """ Generate the desired fields for compilation database entries. """ |
||
206 | |||
207 | def abspath(cwd, name): |
||
208 | """ Create normalized absolute path from input filename. """ |
||
209 | fullname = name if os.path.isabs(name) else os.path.join(cwd, name) |
||
210 | return os.path.normpath(fullname) |
||
211 | |||
212 | logging.debug('format this command: %s', exec_trace['command']) |
||
213 | compilation = split_command(exec_trace['command']) |
||
214 | if compilation: |
||
215 | for source in compilation.files: |
||
216 | compiler = 'c++' if compilation.compiler == 'c++' else 'cc' |
||
217 | command = [compiler, '-c'] + compilation.flags + [source] |
||
218 | logging.debug('formated as: %s', command) |
||
219 | yield { |
||
220 | 'directory': exec_trace['directory'], |
||
221 | 'command': encode(command), |
||
222 | 'file': abspath(exec_trace['directory'], source) |
||
223 | } |
||
224 | |||
225 | |||
226 | def is_preload_disabled(platform): |
||
227 | """ Library-based interposition will fail silently if SIP is enabled, |
||
228 | so this should be detected. You can detect whether SIP is enabled on |
||
229 | Darwin by checking whether (1) there is a binary called 'csrutil' in |
||
230 | the path and, if so, (2) whether the output of executing 'csrutil status' |
||
231 | contains 'System Integrity Protection status: enabled'. |
||
232 | |||
233 | :param platform: name of the platform (returned by sys.platform), |
||
234 | :return: True if library preload will fail by the dynamic linker. """ |
||
235 | |||
236 | if platform in WRAPPER_ONLY_PLATFORMS: |
||
237 | return True |
||
238 | elif platform == 'darwin': |
||
239 | command = ['csrutil', 'status'] |
||
240 | pattern = re.compile(r'System Integrity Protection status:\s+enabled') |
||
241 | try: |
||
242 | return any(pattern.match(line) for line in run_command(command)) |
||
243 | except: |
||
244 | return False |
||
245 | else: |
||
246 | return False |
||
247 | |||
248 | |||
249 | def entry_hash(entry): |
||
250 | """ Implement unique hash method for compilation database entries. """ |
||
251 | |||
252 | # For faster lookup in set filename is reverted |
||
253 | filename = entry['file'][::-1] |
||
254 | # For faster lookup in set directory is reverted |
||
255 | directory = entry['directory'][::-1] |
||
256 | # On OS X the 'cc' and 'c++' compilers are wrappers for |
||
257 | # 'clang' therefore both call would be logged. To avoid |
||
258 | # this the hash does not contain the first word of the |
||
259 | # command. |
||
260 | command = ' '.join(decode(entry['command'])[1:]) |
||
261 | |||
262 | return '<>'.join([filename, directory, command]) |