Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | # -*- coding: utf-8 -*- |
| 2 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 3 | # See https://llvm.org/LICENSE.txt for license information. |
||
| 4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 5 | """ This module is responsible to capture the compiler invocation of any |
||
| 6 | build process. The result of that should be a compilation database. |
||
| 7 | |||
| 8 | This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES |
||
| 9 | mechanisms provided by the dynamic linker. The related library is implemented |
||
| 10 | in C language and can be found under 'libear' directory. |
||
| 11 | |||
| 12 | The 'libear' library is capturing all child process creation and logging the |
||
| 13 | relevant information about it into separate files in a specified directory. |
||
| 14 | The parameter of this process is the output directory name, where the report |
||
| 15 | files shall be placed. This parameter is passed as an environment variable. |
||
| 16 | |||
| 17 | The module also implements compiler wrappers to intercept the compiler calls. |
||
| 18 | |||
| 19 | The module implements the build command execution and the post-processing of |
||
| 20 | the output files, which will condensates into a compilation database. """ |
||
| 21 | |||
| 22 | import sys |
||
| 23 | import os |
||
| 24 | import os.path |
||
| 25 | import re |
||
| 26 | import itertools |
||
| 27 | import json |
||
| 28 | import glob |
||
| 29 | import logging |
||
| 30 | from libear import build_libear, TemporaryDirectory |
||
| 31 | from libscanbuild import command_entry_point, compiler_wrapper, \ |
||
| 32 | wrapper_environment, run_command, run_build |
||
| 33 | from libscanbuild import duplicate_check |
||
| 34 | from libscanbuild.compilation import split_command |
||
| 35 | from libscanbuild.arguments import parse_args_for_intercept_build |
||
| 36 | from libscanbuild.shell import encode, decode |
||
| 37 | |||
| 38 | __all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper'] |
||
| 39 | |||
| 40 | GS = chr(0x1d) |
||
| 41 | RS = chr(0x1e) |
||
| 42 | US = chr(0x1f) |
||
| 43 | |||
| 44 | COMPILER_WRAPPER_CC = 'intercept-cc' |
||
| 45 | COMPILER_WRAPPER_CXX = 'intercept-c++' |
||
| 46 | TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c |
||
| 47 | WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'}) |
||
| 48 | |||
| 49 | |||
| 50 | @command_entry_point |
||
| 51 | def intercept_build(): |
||
| 52 | """ Entry point for 'intercept-build' command. """ |
||
| 53 | |||
| 54 | args = parse_args_for_intercept_build() |
||
| 55 | return capture(args) |
||
| 56 | |||
| 57 | |||
| 58 | def capture(args): |
||
| 59 | """ The entry point of build command interception. """ |
||
| 60 | |||
| 61 | def post_processing(commands): |
||
| 62 | """ To make a compilation database, it needs to filter out commands |
||
| 63 | which are not compiler calls. Needs to find the source file name |
||
| 64 | from the arguments. And do shell escaping on the command. |
||
| 65 | |||
| 66 | To support incremental builds, it is desired to read elements from |
||
| 67 | an existing compilation database from a previous run. These elements |
||
| 68 | shall be merged with the new elements. """ |
||
| 69 | |||
| 70 | # create entries from the current run |
||
| 71 | current = itertools.chain.from_iterable( |
||
| 72 | # creates a sequence of entry generators from an exec, |
||
| 73 | format_entry(command) for command in commands) |
||
| 74 | # read entries from previous run |
||
| 75 | if 'append' in args and args.append and os.path.isfile(args.cdb): |
||
| 76 | with open(args.cdb) as handle: |
||
| 77 | previous = iter(json.load(handle)) |
||
| 78 | else: |
||
| 79 | previous = iter([]) |
||
| 80 | # filter out duplicate entries from both |
||
| 81 | duplicate = duplicate_check(entry_hash) |
||
| 82 | return (entry |
||
| 83 | for entry in itertools.chain(previous, current) |
||
| 84 | if os.path.exists(entry['file']) and not duplicate(entry)) |
||
| 85 | |||
| 86 | with TemporaryDirectory(prefix='intercept-') as tmp_dir: |
||
| 87 | # run the build command |
||
| 88 | environment = setup_environment(args, tmp_dir) |
||
| 89 | exit_code = run_build(args.build, env=environment) |
||
| 90 | # read the intercepted exec calls |
||
| 91 | exec_traces = itertools.chain.from_iterable( |
||
| 92 | parse_exec_trace(os.path.join(tmp_dir, filename)) |
||
| 93 | for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) |
||
| 94 | # do post processing |
||
| 95 | entries = post_processing(exec_traces) |
||
| 96 | # dump the compilation database |
||
| 97 | with open(args.cdb, 'w+') as handle: |
||
| 98 | json.dump(list(entries), handle, sort_keys=True, indent=4) |
||
| 99 | return exit_code |
||
| 100 | |||
| 101 | |||
| 102 | def setup_environment(args, destination): |
||
| 103 | """ Sets up the environment for the build command. |
||
| 104 | |||
| 105 | It sets the required environment variables and execute the given command. |
||
| 106 | The exec calls will be logged by the 'libear' preloaded library or by the |
||
| 107 | 'wrapper' programs. """ |
||
| 108 | |||
| 109 | c_compiler = args.cc if 'cc' in args else 'cc' |
||
| 110 | cxx_compiler = args.cxx if 'cxx' in args else 'c++' |
||
| 111 | |||
| 112 | libear_path = None if args.override_compiler or is_preload_disabled( |
||
| 113 | sys.platform) else build_libear(c_compiler, destination) |
||
| 114 | |||
| 115 | environment = dict(os.environ) |
||
| 116 | environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination}) |
||
| 117 | |||
| 118 | if not libear_path: |
||
| 119 | logging.debug('intercept gonna use compiler wrappers') |
||
| 120 | environment.update(wrapper_environment(args)) |
||
| 121 | environment.update({ |
||
| 122 | 'CC': COMPILER_WRAPPER_CC, |
||
| 123 | 'CXX': COMPILER_WRAPPER_CXX |
||
| 124 | }) |
||
| 125 | elif sys.platform == 'darwin': |
||
| 126 | logging.debug('intercept gonna preload libear on OSX') |
||
| 127 | environment.update({ |
||
| 128 | 'DYLD_INSERT_LIBRARIES': libear_path, |
||
| 129 | 'DYLD_FORCE_FLAT_NAMESPACE': '1' |
||
| 130 | }) |
||
| 131 | else: |
||
| 132 | logging.debug('intercept gonna preload libear on UNIX') |
||
| 133 | environment.update({'LD_PRELOAD': libear_path}) |
||
| 134 | |||
| 135 | return environment |
||
| 136 | |||
| 137 | |||
| 138 | @command_entry_point |
||
| 139 | def intercept_compiler_wrapper(): |
||
| 140 | """ Entry point for `intercept-cc` and `intercept-c++`. """ |
||
| 141 | |||
| 142 | return compiler_wrapper(intercept_compiler_wrapper_impl) |
||
| 143 | |||
| 144 | |||
| 145 | def intercept_compiler_wrapper_impl(_, execution): |
||
| 146 | """ Implement intercept compiler wrapper functionality. |
||
| 147 | |||
| 148 | It does generate execution report into target directory. |
||
| 149 | The target directory name is from environment variables. """ |
||
| 150 | |||
| 151 | message_prefix = 'execution report might be incomplete: %s' |
||
| 152 | |||
| 153 | target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR') |
||
| 154 | if not target_dir: |
||
| 155 | logging.warning(message_prefix, 'missing target directory') |
||
| 156 | return |
||
| 157 | # write current execution info to the pid file |
||
| 158 | try: |
||
| 159 | target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION |
||
| 160 | target_file = os.path.join(target_dir, target_file_name) |
||
| 161 | logging.debug('writing execution report to: %s', target_file) |
||
| 162 | write_exec_trace(target_file, execution) |
||
| 163 | except IOError: |
||
| 164 | logging.warning(message_prefix, 'io problem') |
||
| 165 | |||
| 166 | |||
| 167 | def write_exec_trace(filename, entry): |
||
| 168 | """ Write execution report file. |
||
| 169 | |||
| 170 | This method shall be sync with the execution report writer in interception |
||
| 171 | library. The entry in the file is a JSON objects. |
||
| 172 | |||
| 173 | :param filename: path to the output execution trace file, |
||
| 174 | :param entry: the Execution object to append to that file. """ |
||
| 175 | |||
| 176 | with open(filename, 'ab') as handler: |
||
| 177 | pid = str(entry.pid) |
||
| 178 | command = US.join(entry.cmd) + US |
||
| 179 | content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS |
||
| 180 | handler.write(content.encode('utf-8')) |
||
| 181 | |||
| 182 | |||
| 183 | def parse_exec_trace(filename): |
||
| 184 | """ Parse the file generated by the 'libear' preloaded library. |
||
| 185 | |||
| 186 | Given filename points to a file which contains the basic report |
||
| 187 | generated by the interception library or wrapper command. A single |
||
| 188 | report file _might_ contain multiple process creation info. """ |
||
| 189 | |||
| 190 | logging.debug('parse exec trace file: %s', filename) |
||
| 191 | with open(filename, 'r') as handler: |
||
| 192 | content = handler.read() |
||
| 193 | for group in filter(bool, content.split(GS)): |
||
| 194 | records = group.split(RS) |
||
| 195 | yield { |
||
| 196 | 'pid': records[0], |
||
| 197 | 'ppid': records[1], |
||
| 198 | 'function': records[2], |
||
| 199 | 'directory': records[3], |
||
| 200 | 'command': records[4].split(US)[:-1] |
||
| 201 | } |
||
| 202 | |||
| 203 | |||
| 204 | def format_entry(exec_trace): |
||
| 205 | """ Generate the desired fields for compilation database entries. """ |
||
| 206 | |||
| 207 | def abspath(cwd, name): |
||
| 208 | """ Create normalized absolute path from input filename. """ |
||
| 209 | fullname = name if os.path.isabs(name) else os.path.join(cwd, name) |
||
| 210 | return os.path.normpath(fullname) |
||
| 211 | |||
| 212 | logging.debug('format this command: %s', exec_trace['command']) |
||
| 213 | compilation = split_command(exec_trace['command']) |
||
| 214 | if compilation: |
||
| 215 | for source in compilation.files: |
||
| 216 | compiler = 'c++' if compilation.compiler == 'c++' else 'cc' |
||
| 217 | command = [compiler, '-c'] + compilation.flags + [source] |
||
| 218 | logging.debug('formated as: %s', command) |
||
| 219 | yield { |
||
| 220 | 'directory': exec_trace['directory'], |
||
| 221 | 'command': encode(command), |
||
| 222 | 'file': abspath(exec_trace['directory'], source) |
||
| 223 | } |
||
| 224 | |||
| 225 | |||
| 226 | def is_preload_disabled(platform): |
||
| 227 | """ Library-based interposition will fail silently if SIP is enabled, |
||
| 228 | so this should be detected. You can detect whether SIP is enabled on |
||
| 229 | Darwin by checking whether (1) there is a binary called 'csrutil' in |
||
| 230 | the path and, if so, (2) whether the output of executing 'csrutil status' |
||
| 231 | contains 'System Integrity Protection status: enabled'. |
||
| 232 | |||
| 233 | :param platform: name of the platform (returned by sys.platform), |
||
| 234 | :return: True if library preload will fail by the dynamic linker. """ |
||
| 235 | |||
| 236 | if platform in WRAPPER_ONLY_PLATFORMS: |
||
| 237 | return True |
||
| 238 | elif platform == 'darwin': |
||
| 239 | command = ['csrutil', 'status'] |
||
| 240 | pattern = re.compile(r'System Integrity Protection status:\s+enabled') |
||
| 241 | try: |
||
| 242 | return any(pattern.match(line) for line in run_command(command)) |
||
| 243 | except: |
||
| 244 | return False |
||
| 245 | else: |
||
| 246 | return False |
||
| 247 | |||
| 248 | |||
| 249 | def entry_hash(entry): |
||
| 250 | """ Implement unique hash method for compilation database entries. """ |
||
| 251 | |||
| 252 | # For faster lookup in set filename is reverted |
||
| 253 | filename = entry['file'][::-1] |
||
| 254 | # For faster lookup in set directory is reverted |
||
| 255 | directory = entry['directory'][::-1] |
||
| 256 | # On OS X the 'cc' and 'c++' compilers are wrappers for |
||
| 257 | # 'clang' therefore both call would be logged. To avoid |
||
| 258 | # this the hash does not contain the first word of the |
||
| 259 | # command. |
||
| 260 | command = ' '.join(decode(entry['command'])[1:]) |
||
| 261 | |||
| 262 | return '<>'.join([filename, directory, command]) |