gen_compile_commands.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. #!/usr/bin/env python3
  2. # SPDX-License-Identifier: GPL-2.0
  3. #
  4. # Copyright (C) Google LLC, 2018
  5. #
  6. # Author: Tom Roeder <[email protected]>
  7. #
  8. """A tool for generating compile_commands.json in the Linux kernel."""
  9. import argparse
  10. import json
  11. import logging
  12. import os
  13. import re
  14. import subprocess
  15. import sys
  16. _DEFAULT_OUTPUT = 'compile_commands.json'
  17. _DEFAULT_LOG_LEVEL = 'WARNING'
  18. _FILENAME_PATTERN = r'^\..*\.cmd$'
  19. _LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c) *(;|$)'
  20. _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
  21. # The tools/ directory adopts a different build system, and produces .cmd
  22. # files in a different format. Do not support it.
  23. _EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools']
  24. def parse_arguments():
  25. """Sets up and parses command-line arguments.
  26. Returns:
  27. log_level: A logging level to filter log output.
  28. directory: The work directory where the objects were built.
  29. ar: Command used for parsing .a archives.
  30. output: Where to write the compile-commands JSON file.
  31. paths: The list of files/directories to handle to find .cmd files.
  32. """
  33. usage = 'Creates a compile_commands.json database from kernel .cmd files'
  34. parser = argparse.ArgumentParser(description=usage)
  35. directory_help = ('specify the output directory used for the kernel build '
  36. '(defaults to the working directory)')
  37. parser.add_argument('-d', '--directory', type=str, default='.',
  38. help=directory_help)
  39. output_help = ('path to the output command database (defaults to ' +
  40. _DEFAULT_OUTPUT + ')')
  41. parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT,
  42. help=output_help)
  43. log_level_help = ('the level of log messages to produce (defaults to ' +
  44. _DEFAULT_LOG_LEVEL + ')')
  45. parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS,
  46. default=_DEFAULT_LOG_LEVEL, help=log_level_help)
  47. ar_help = 'command used for parsing .a archives'
  48. parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help)
  49. paths_help = ('directories to search or files to parse '
  50. '(files should be *.o, *.a, or modules.order). '
  51. 'If nothing is specified, the current directory is searched')
  52. parser.add_argument('paths', type=str, nargs='*', help=paths_help)
  53. args = parser.parse_args()
  54. return (args.log_level,
  55. os.path.abspath(args.directory),
  56. args.output,
  57. args.ar,
  58. args.paths if len(args.paths) > 0 else [args.directory])
  59. def cmdfiles_in_dir(directory):
  60. """Generate the iterator of .cmd files found under the directory.
  61. Walk under the given directory, and yield every .cmd file found.
  62. Args:
  63. directory: The directory to search for .cmd files.
  64. Yields:
  65. The path to a .cmd file.
  66. """
  67. filename_matcher = re.compile(_FILENAME_PATTERN)
  68. exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ]
  69. for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
  70. # Prune unwanted directories.
  71. if dirpath in exclude_dirs:
  72. dirnames[:] = []
  73. continue
  74. for filename in filenames:
  75. if filename_matcher.match(filename):
  76. yield os.path.join(dirpath, filename)
  77. def to_cmdfile(path):
  78. """Return the path of .cmd file used for the given build artifact
  79. Args:
  80. Path: file path
  81. Returns:
  82. The path to .cmd file
  83. """
  84. dir, base = os.path.split(path)
  85. return os.path.join(dir, '.' + base + '.cmd')
  86. def cmdfiles_for_a(archive, ar):
  87. """Generate the iterator of .cmd files associated with the archive.
  88. Parse the given archive, and yield every .cmd file used to build it.
  89. Args:
  90. archive: The archive to parse
  91. Yields:
  92. The path to every .cmd file found
  93. """
  94. for obj in subprocess.check_output([ar, '-t', archive]).decode().split():
  95. yield to_cmdfile(obj)
  96. def cmdfiles_for_modorder(modorder):
  97. """Generate the iterator of .cmd files associated with the modules.order.
  98. Parse the given modules.order, and yield every .cmd file used to build the
  99. contained modules.
  100. Args:
  101. modorder: The modules.order file to parse
  102. Yields:
  103. The path to every .cmd file found
  104. """
  105. with open(modorder) as f:
  106. for line in f:
  107. ko = line.rstrip()
  108. base, ext = os.path.splitext(ko)
  109. if ext != '.ko':
  110. sys.exit('{}: module path must end with .ko'.format(ko))
  111. mod = base + '.mod'
  112. # Read from *.mod, to get a list of objects that compose the module.
  113. with open(mod) as m:
  114. for mod_line in m:
  115. yield to_cmdfile(mod_line.rstrip())
  116. def process_line(root_directory, command_prefix, file_path):
  117. """Extracts information from a .cmd line and creates an entry from it.
  118. Args:
  119. root_directory: The directory that was searched for .cmd files. Usually
  120. used directly in the "directory" entry in compile_commands.json.
  121. command_prefix: The extracted command line, up to the last element.
  122. file_path: The .c file from the end of the extracted command.
  123. Usually relative to root_directory, but sometimes absolute.
  124. Returns:
  125. An entry to append to compile_commands.
  126. Raises:
  127. ValueError: Could not find the extracted file based on file_path and
  128. root_directory or file_directory.
  129. """
  130. # The .cmd files are intended to be included directly by Make, so they
  131. # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
  132. # kernel version). The compile_commands.json file is not interepreted
  133. # by Make, so this code replaces the escaped version with '#'.
  134. prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#')
  135. # Use os.path.abspath() to normalize the path resolving '.' and '..' .
  136. abs_path = os.path.abspath(os.path.join(root_directory, file_path))
  137. if not os.path.exists(abs_path):
  138. raise ValueError('File %s not found' % abs_path)
  139. return {
  140. 'directory': root_directory,
  141. 'file': abs_path,
  142. 'command': prefix + file_path,
  143. }
  144. def main():
  145. """Walks through the directory and finds and parses .cmd files."""
  146. log_level, directory, output, ar, paths = parse_arguments()
  147. level = getattr(logging, log_level)
  148. logging.basicConfig(format='%(levelname)s: %(message)s', level=level)
  149. line_matcher = re.compile(_LINE_PATTERN)
  150. compile_commands = []
  151. for path in paths:
  152. # If 'path' is a directory, handle all .cmd files under it.
  153. # Otherwise, handle .cmd files associated with the file.
  154. # built-in objects are linked via vmlinux.a
  155. # Modules are listed in modules.order.
  156. if os.path.isdir(path):
  157. cmdfiles = cmdfiles_in_dir(path)
  158. elif path.endswith('.a'):
  159. cmdfiles = cmdfiles_for_a(path, ar)
  160. elif path.endswith('modules.order'):
  161. cmdfiles = cmdfiles_for_modorder(path)
  162. else:
  163. sys.exit('{}: unknown file type'.format(path))
  164. for cmdfile in cmdfiles:
  165. with open(cmdfile, 'rt') as f:
  166. result = line_matcher.match(f.readline())
  167. if result:
  168. try:
  169. entry = process_line(directory, result.group(1),
  170. result.group(2))
  171. compile_commands.append(entry)
  172. except ValueError as err:
  173. logging.info('Could not add line from %s: %s',
  174. cmdfile, err)
  175. with open(output, 'wt') as f:
  176. json.dump(compile_commands, f, indent=2, sort_keys=True)
  177. if __name__ == '__main__':
  178. main()