- # -*- coding: utf-8 -*- 
- # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
- # See https://llvm.org/LICENSE.txt for license information. 
- # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
- """ This module is responsible for to parse a compiler invocation. """ 
-   
- import re 
- import os 
- import collections 
-   
- __all__ = ['split_command', 'classify_source', 'compiler_language'] 
-   
- # Ignored compiler options map for compilation database creation. 
- # The map is used in `split_command` method. (Which does ignore and classify 
- # parameters.) Please note, that these are not the only parameters which 
- # might be ignored. 
- # 
- # Keys are the option name, value number of options to skip 
- IGNORED_FLAGS = { 
-     # compiling only flag, ignored because the creator of compilation 
-     # database will explicitly set it. 
-     '-c': 0, 
-     # preprocessor macros, ignored because would cause duplicate entries in 
-     # the output (the only difference would be these flags). this is actual 
-     # finding from users, who suffered longer execution time caused by the 
-     # duplicates. 
-     '-MD': 0, 
-     '-MMD': 0, 
-     '-MG': 0, 
-     '-MP': 0, 
-     '-MF': 1, 
-     '-MT': 1, 
-     '-MQ': 1, 
-     # linker options, ignored because for compilation database will contain 
-     # compilation commands only. so, the compiler would ignore these flags 
-     # anyway. the benefit to get rid of them is to make the output more 
-     # readable. 
-     '-static': 0, 
-     '-shared': 0, 
-     '-s': 0, 
-     '-rdynamic': 0, 
-     '-l': 1, 
-     '-L': 1, 
-     '-u': 1, 
-     '-z': 1, 
-     '-T': 1, 
-     '-Xlinker': 1 
- } 
-   
- # Known C/C++ compiler executable name patterns 
- COMPILER_PATTERNS = frozenset([ 
-     re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), 
-     re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), 
-     re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), 
-     re.compile(r'^llvm-g(cc|\+\+)$'), 
- ]) 
-   
-   
- def split_command(command): 
-     """ Returns a value when the command is a compilation, None otherwise. 
-   
-     The value on success is a named tuple with the following attributes: 
-   
-         files:    list of source files 
-         flags:    list of compile options 
-         compiler: string value of 'c' or 'c++' """ 
-   
-     # the result of this method 
-     result = collections.namedtuple('Compilation', 
-                                     ['compiler', 'flags', 'files']) 
-     result.compiler = compiler_language(command) 
-     result.flags = [] 
-     result.files = [] 
-     # quit right now, if the program was not a C/C++ compiler 
-     if not result.compiler: 
-         return None 
-     # iterate on the compile options 
-     args = iter(command[1:]) 
-     for arg in args: 
-         # quit when compilation pass is not involved 
-         if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: 
-             return None 
-         # ignore some flags 
-         elif arg in IGNORED_FLAGS: 
-             count = IGNORED_FLAGS[arg] 
-             for _ in range(count): 
-                 next(args) 
-         elif re.match(r'^-(l|L|Wl,).+', arg): 
-             pass 
-         # some parameters could look like filename, take as compile option 
-         elif arg in {'-D', '-I'}: 
-             result.flags.extend([arg, next(args)]) 
-         # parameter which looks source file is taken... 
-         elif re.match(r'^[^-].+', arg) and classify_source(arg): 
-             result.files.append(arg) 
-         # and consider everything else as compile option. 
-         else: 
-             result.flags.append(arg) 
-     # do extra check on number of source files 
-     return result if result.files else None 
-   
-   
- def classify_source(filename, c_compiler=True): 
-     """ Return the language from file name extension. """ 
-   
-     mapping = { 
-         '.c': 'c' if c_compiler else 'c++', 
-         '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', 
-         '.ii': 'c++-cpp-output', 
-         '.m': 'objective-c', 
-         '.mi': 'objective-c-cpp-output', 
-         '.mm': 'objective-c++', 
-         '.mii': 'objective-c++-cpp-output', 
-         '.C': 'c++', 
-         '.cc': 'c++', 
-         '.CC': 'c++', 
-         '.cp': 'c++', 
-         '.cpp': 'c++', 
-         '.cxx': 'c++', 
-         '.c++': 'c++', 
-         '.C++': 'c++', 
-         '.txx': 'c++' 
-     } 
-   
-     __, extension = os.path.splitext(os.path.basename(filename)) 
-     return mapping.get(extension) 
-   
-   
- def compiler_language(command): 
-     """ A predicate to decide the command is a compiler call or not. 
-   
-     Returns 'c' or 'c++' when it match. None otherwise. """ 
-   
-     cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') 
-   
-     if command: 
-         executable = os.path.basename(command[0]) 
-         if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 
-             return 'c++' if cplusplus.match(executable) else 'c' 
-     return None 
-