Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | # -*- coding: utf-8 -*- |
| 2 | # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 3 | # See https://llvm.org/LICENSE.txt for license information. |
||
| 4 | # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 5 | """ This module is responsible for to parse a compiler invocation. """ |
||
| 6 | |||
| 7 | import re |
||
| 8 | import os |
||
| 9 | import collections |
||
| 10 | |||
| 11 | __all__ = ['split_command', 'classify_source', 'compiler_language'] |
||
| 12 | |||
| 13 | # Ignored compiler options map for compilation database creation. |
||
| 14 | # The map is used in `split_command` method. (Which does ignore and classify |
||
| 15 | # parameters.) Please note, that these are not the only parameters which |
||
| 16 | # might be ignored. |
||
| 17 | # |
||
| 18 | # Keys are the option name, value number of options to skip |
||
| 19 | IGNORED_FLAGS = { |
||
| 20 | # compiling only flag, ignored because the creator of compilation |
||
| 21 | # database will explicitly set it. |
||
| 22 | '-c': 0, |
||
| 23 | # preprocessor macros, ignored because would cause duplicate entries in |
||
| 24 | # the output (the only difference would be these flags). this is actual |
||
| 25 | # finding from users, who suffered longer execution time caused by the |
||
| 26 | # duplicates. |
||
| 27 | '-MD': 0, |
||
| 28 | '-MMD': 0, |
||
| 29 | '-MG': 0, |
||
| 30 | '-MP': 0, |
||
| 31 | '-MF': 1, |
||
| 32 | '-MT': 1, |
||
| 33 | '-MQ': 1, |
||
| 34 | # linker options, ignored because for compilation database will contain |
||
| 35 | # compilation commands only. so, the compiler would ignore these flags |
||
| 36 | # anyway. the benefit to get rid of them is to make the output more |
||
| 37 | # readable. |
||
| 38 | '-static': 0, |
||
| 39 | '-shared': 0, |
||
| 40 | '-s': 0, |
||
| 41 | '-rdynamic': 0, |
||
| 42 | '-l': 1, |
||
| 43 | '-L': 1, |
||
| 44 | '-u': 1, |
||
| 45 | '-z': 1, |
||
| 46 | '-T': 1, |
||
| 47 | '-Xlinker': 1 |
||
| 48 | } |
||
| 49 | |||
| 50 | # Known C/C++ compiler executable name patterns |
||
| 51 | COMPILER_PATTERNS = frozenset([ |
||
| 52 | re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), |
||
| 53 | re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), |
||
| 54 | re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), |
||
| 55 | re.compile(r'^llvm-g(cc|\+\+)$'), |
||
| 56 | ]) |
||
| 57 | |||
| 58 | |||
| 59 | def split_command(command): |
||
| 60 | """ Returns a value when the command is a compilation, None otherwise. |
||
| 61 | |||
| 62 | The value on success is a named tuple with the following attributes: |
||
| 63 | |||
| 64 | files: list of source files |
||
| 65 | flags: list of compile options |
||
| 66 | compiler: string value of 'c' or 'c++' """ |
||
| 67 | |||
| 68 | # the result of this method |
||
| 69 | result = collections.namedtuple('Compilation', |
||
| 70 | ['compiler', 'flags', 'files']) |
||
| 71 | result.compiler = compiler_language(command) |
||
| 72 | result.flags = [] |
||
| 73 | result.files = [] |
||
| 74 | # quit right now, if the program was not a C/C++ compiler |
||
| 75 | if not result.compiler: |
||
| 76 | return None |
||
| 77 | # iterate on the compile options |
||
| 78 | args = iter(command[1:]) |
||
| 79 | for arg in args: |
||
| 80 | # quit when compilation pass is not involved |
||
| 81 | if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: |
||
| 82 | return None |
||
| 83 | # ignore some flags |
||
| 84 | elif arg in IGNORED_FLAGS: |
||
| 85 | count = IGNORED_FLAGS[arg] |
||
| 86 | for _ in range(count): |
||
| 87 | next(args) |
||
| 88 | elif re.match(r'^-(l|L|Wl,).+', arg): |
||
| 89 | pass |
||
| 90 | # some parameters could look like filename, take as compile option |
||
| 91 | elif arg in {'-D', '-I'}: |
||
| 92 | result.flags.extend([arg, next(args)]) |
||
| 93 | # parameter which looks source file is taken... |
||
| 94 | elif re.match(r'^[^-].+', arg) and classify_source(arg): |
||
| 95 | result.files.append(arg) |
||
| 96 | # and consider everything else as compile option. |
||
| 97 | else: |
||
| 98 | result.flags.append(arg) |
||
| 99 | # do extra check on number of source files |
||
| 100 | return result if result.files else None |
||
| 101 | |||
| 102 | |||
| 103 | def classify_source(filename, c_compiler=True): |
||
| 104 | """ Return the language from file name extension. """ |
||
| 105 | |||
| 106 | mapping = { |
||
| 107 | '.c': 'c' if c_compiler else 'c++', |
||
| 108 | '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', |
||
| 109 | '.ii': 'c++-cpp-output', |
||
| 110 | '.m': 'objective-c', |
||
| 111 | '.mi': 'objective-c-cpp-output', |
||
| 112 | '.mm': 'objective-c++', |
||
| 113 | '.mii': 'objective-c++-cpp-output', |
||
| 114 | '.C': 'c++', |
||
| 115 | '.cc': 'c++', |
||
| 116 | '.CC': 'c++', |
||
| 117 | '.cp': 'c++', |
||
| 118 | '.cpp': 'c++', |
||
| 119 | '.cxx': 'c++', |
||
| 120 | '.c++': 'c++', |
||
| 121 | '.C++': 'c++', |
||
| 122 | '.txx': 'c++' |
||
| 123 | } |
||
| 124 | |||
| 125 | __, extension = os.path.splitext(os.path.basename(filename)) |
||
| 126 | return mapping.get(extension) |
||
| 127 | |||
| 128 | |||
| 129 | def compiler_language(command): |
||
| 130 | """ A predicate to decide the command is a compiler call or not. |
||
| 131 | |||
| 132 | Returns 'c' or 'c++' when it match. None otherwise. """ |
||
| 133 | |||
| 134 | cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') |
||
| 135 | |||
| 136 | if command: |
||
| 137 | executable = os.path.basename(command[0]) |
||
| 138 | if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): |
||
| 139 | return 'c++' if cplusplus.match(executable) else 'c' |
||
| 140 | return None |