Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
# -*- coding: utf-8 -*-
2
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3
# See https://llvm.org/LICENSE.txt for license information.
4
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5
""" This module is responsible to capture the compiler invocation of any
6
build process. The result of that should be a compilation database.
7
 
8
This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
9
mechanisms provided by the dynamic linker. The related library is implemented
10
in C language and can be found under 'libear' directory.
11
 
12
The 'libear' library is capturing all child process creation and logging the
13
relevant information about it into separate files in a specified directory.
14
The parameter of this process is the output directory name, where the report
15
files shall be placed. This parameter is passed as an environment variable.
16
 
17
The module also implements compiler wrappers to intercept the compiler calls.
18
 
19
The module implements the build command execution and the post-processing of
20
the output files, which will condensates into a compilation database. """
21
 
22
import sys
23
import os
24
import os.path
25
import re
26
import itertools
27
import json
28
import glob
29
import logging
30
from libear import build_libear, TemporaryDirectory
31
from libscanbuild import command_entry_point, compiler_wrapper, \
32
    wrapper_environment, run_command, run_build
33
from libscanbuild import duplicate_check
34
from libscanbuild.compilation import split_command
35
from libscanbuild.arguments import parse_args_for_intercept_build
36
from libscanbuild.shell import encode, decode
37
 
38
__all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper']
39
 
40
GS = chr(0x1d)
41
RS = chr(0x1e)
42
US = chr(0x1f)
43
 
44
COMPILER_WRAPPER_CC = 'intercept-cc'
45
COMPILER_WRAPPER_CXX = 'intercept-c++'
46
TRACE_FILE_EXTENSION = '.cmd'  # same as in ear.c
47
WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
48
 
49
 
50
@command_entry_point
51
def intercept_build():
52
    """ Entry point for 'intercept-build' command. """
53
 
54
    args = parse_args_for_intercept_build()
55
    return capture(args)
56
 
57
 
58
def capture(args):
59
    """ The entry point of build command interception. """
60
 
61
    def post_processing(commands):
62
        """ To make a compilation database, it needs to filter out commands
63
        which are not compiler calls. Needs to find the source file name
64
        from the arguments. And do shell escaping on the command.
65
 
66
        To support incremental builds, it is desired to read elements from
67
        an existing compilation database from a previous run. These elements
68
        shall be merged with the new elements. """
69
 
70
        # create entries from the current run
71
        current = itertools.chain.from_iterable(
72
            # creates a sequence of entry generators from an exec,
73
            format_entry(command) for command in commands)
74
        # read entries from previous run
75
        if 'append' in args and args.append and os.path.isfile(args.cdb):
76
            with open(args.cdb) as handle:
77
                previous = iter(json.load(handle))
78
        else:
79
            previous = iter([])
80
        # filter out duplicate entries from both
81
        duplicate = duplicate_check(entry_hash)
82
        return (entry
83
                for entry in itertools.chain(previous, current)
84
                if os.path.exists(entry['file']) and not duplicate(entry))
85
 
86
    with TemporaryDirectory(prefix='intercept-') as tmp_dir:
87
        # run the build command
88
        environment = setup_environment(args, tmp_dir)
89
        exit_code = run_build(args.build, env=environment)
90
        # read the intercepted exec calls
91
        exec_traces = itertools.chain.from_iterable(
92
            parse_exec_trace(os.path.join(tmp_dir, filename))
93
            for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
94
        # do post processing
95
        entries = post_processing(exec_traces)
96
        # dump the compilation database
97
        with open(args.cdb, 'w+') as handle:
98
            json.dump(list(entries), handle, sort_keys=True, indent=4)
99
        return exit_code
100
 
101
 
102
def setup_environment(args, destination):
103
    """ Sets up the environment for the build command.
104
 
105
    It sets the required environment variables and execute the given command.
106
    The exec calls will be logged by the 'libear' preloaded library or by the
107
    'wrapper' programs. """
108
 
109
    c_compiler = args.cc if 'cc' in args else 'cc'
110
    cxx_compiler = args.cxx if 'cxx' in args else 'c++'
111
 
112
    libear_path = None if args.override_compiler or is_preload_disabled(
113
        sys.platform) else build_libear(c_compiler, destination)
114
 
115
    environment = dict(os.environ)
116
    environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
117
 
118
    if not libear_path:
119
        logging.debug('intercept gonna use compiler wrappers')
120
        environment.update(wrapper_environment(args))
121
        environment.update({
122
            'CC': COMPILER_WRAPPER_CC,
123
            'CXX': COMPILER_WRAPPER_CXX
124
        })
125
    elif sys.platform == 'darwin':
126
        logging.debug('intercept gonna preload libear on OSX')
127
        environment.update({
128
            'DYLD_INSERT_LIBRARIES': libear_path,
129
            'DYLD_FORCE_FLAT_NAMESPACE': '1'
130
        })
131
    else:
132
        logging.debug('intercept gonna preload libear on UNIX')
133
        environment.update({'LD_PRELOAD': libear_path})
134
 
135
    return environment
136
 
137
 
138
@command_entry_point
139
def intercept_compiler_wrapper():
140
    """ Entry point for `intercept-cc` and `intercept-c++`. """
141
 
142
    return compiler_wrapper(intercept_compiler_wrapper_impl)
143
 
144
 
145
def intercept_compiler_wrapper_impl(_, execution):
146
    """ Implement intercept compiler wrapper functionality.
147
 
148
    It does generate execution report into target directory.
149
    The target directory name is from environment variables. """
150
 
151
    message_prefix = 'execution report might be incomplete: %s'
152
 
153
    target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
154
    if not target_dir:
155
        logging.warning(message_prefix, 'missing target directory')
156
        return
157
    # write current execution info to the pid file
158
    try:
159
        target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION
160
        target_file = os.path.join(target_dir, target_file_name)
161
        logging.debug('writing execution report to: %s', target_file)
162
        write_exec_trace(target_file, execution)
163
    except IOError:
164
        logging.warning(message_prefix, 'io problem')
165
 
166
 
167
def write_exec_trace(filename, entry):
168
    """ Write execution report file.
169
 
170
    This method shall be sync with the execution report writer in interception
171
    library. The entry in the file is a JSON objects.
172
 
173
    :param filename:    path to the output execution trace file,
174
    :param entry:       the Execution object to append to that file. """
175
 
176
    with open(filename, 'ab') as handler:
177
        pid = str(entry.pid)
178
        command = US.join(entry.cmd) + US
179
        content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS
180
        handler.write(content.encode('utf-8'))
181
 
182
 
183
def parse_exec_trace(filename):
184
    """ Parse the file generated by the 'libear' preloaded library.
185
 
186
    Given filename points to a file which contains the basic report
187
    generated by the interception library or wrapper command. A single
188
    report file _might_ contain multiple process creation info. """
189
 
190
    logging.debug('parse exec trace file: %s', filename)
191
    with open(filename, 'r') as handler:
192
        content = handler.read()
193
        for group in filter(bool, content.split(GS)):
194
            records = group.split(RS)
195
            yield {
196
                'pid': records[0],
197
                'ppid': records[1],
198
                'function': records[2],
199
                'directory': records[3],
200
                'command': records[4].split(US)[:-1]
201
            }
202
 
203
 
204
def format_entry(exec_trace):
205
    """ Generate the desired fields for compilation database entries. """
206
 
207
    def abspath(cwd, name):
208
        """ Create normalized absolute path from input filename. """
209
        fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
210
        return os.path.normpath(fullname)
211
 
212
    logging.debug('format this command: %s', exec_trace['command'])
213
    compilation = split_command(exec_trace['command'])
214
    if compilation:
215
        for source in compilation.files:
216
            compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
217
            command = [compiler, '-c'] + compilation.flags + [source]
218
            logging.debug('formated as: %s', command)
219
            yield {
220
                'directory': exec_trace['directory'],
221
                'command': encode(command),
222
                'file': abspath(exec_trace['directory'], source)
223
            }
224
 
225
 
226
def is_preload_disabled(platform):
227
    """ Library-based interposition will fail silently if SIP is enabled,
228
    so this should be detected. You can detect whether SIP is enabled on
229
    Darwin by checking whether (1) there is a binary called 'csrutil' in
230
    the path and, if so, (2) whether the output of executing 'csrutil status'
231
    contains 'System Integrity Protection status: enabled'.
232
 
233
    :param platform: name of the platform (returned by sys.platform),
234
    :return: True if library preload will fail by the dynamic linker. """
235
 
236
    if platform in WRAPPER_ONLY_PLATFORMS:
237
        return True
238
    elif platform == 'darwin':
239
        command = ['csrutil', 'status']
240
        pattern = re.compile(r'System Integrity Protection status:\s+enabled')
241
        try:
242
            return any(pattern.match(line) for line in run_command(command))
243
        except:
244
            return False
245
    else:
246
        return False
247
 
248
 
249
def entry_hash(entry):
250
    """ Implement unique hash method for compilation database entries. """
251
 
252
    # For faster lookup in set filename is reverted
253
    filename = entry['file'][::-1]
254
    # For faster lookup in set directory is reverted
255
    directory = entry['directory'][::-1]
256
    # On OS X the 'cc' and 'c++' compilers are wrappers for
257
    # 'clang' therefore both call would be logged. To avoid
258
    # this the hash does not contain the first word of the
259
    # command.
260
    command = ' '.join(decode(entry['command'])[1:])
261
 
262
    return '<>'.join([filename, directory, command])