check_names.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright The Mbed TLS Contributors
  4. # SPDX-License-Identifier: Apache-2.0
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  7. # not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. """
  18. This script confirms that the naming of all symbols and identifiers in Mbed TLS
  19. are consistent with the house style and are also self-consistent. It only runs
  20. on Linux and macOS since it depends on nm.
  21. It contains two major Python classes, CodeParser and NameChecker. They both have
  22. a comprehensive "run-all" function (comprehensive_parse() and perform_checks())
  23. but the individual functions can also be used for specific needs.
  24. CodeParser makes heavy use of regular expressions to parse the code, and is
  25. dependent on the current code formatting. Many Python C parser libraries require
  26. preprocessed C code, which means no macro parsing. Compiler tools are also not
  27. very helpful when we want the exact location in the original source (which
  28. becomes impossible when e.g. comments are stripped).
  29. NameChecker performs the following checks:
  30. - All exported and available symbols in the library object files, are explicitly
  31. declared in the header files. This uses the nm command.
  32. - All macros, constants, and identifiers (function names, struct names, etc)
  33. follow the required regex pattern.
  34. - Typo checking: All words that begin with MBED|PSA exist as macros or constants.
  35. The script returns 0 on success, 1 on test failure, and 2 if there is a script
  36. error. It must be run from Mbed TLS root.
  37. """
  38. import abc
  39. import argparse
  40. import fnmatch
  41. import glob
  42. import textwrap
  43. import os
  44. import sys
  45. import traceback
  46. import re
  47. import enum
  48. import shutil
  49. import subprocess
  50. import logging
  51. import scripts_path # pylint: disable=unused-import
  52. from mbedtls_dev import build_tree
  53. # Naming patterns to check against. These are defined outside the NameCheck
  54. # class for ease of modification.
  55. PUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$"
  56. INTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$"
  57. CONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN
  58. IDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$"
  59. class Match(): # pylint: disable=too-few-public-methods
  60. """
  61. A class representing a match, together with its found position.
  62. Fields:
  63. * filename: the file that the match was in.
  64. * line: the full line containing the match.
  65. * line_no: the line number.
  66. * pos: a tuple of (start, end) positions on the line where the match is.
  67. * name: the match itself.
  68. """
  69. def __init__(self, filename, line, line_no, pos, name):
  70. # pylint: disable=too-many-arguments
  71. self.filename = filename
  72. self.line = line
  73. self.line_no = line_no
  74. self.pos = pos
  75. self.name = name
  76. def __str__(self):
  77. """
  78. Return a formatted code listing representation of the erroneous line.
  79. """
  80. gutter = format(self.line_no, "4d")
  81. underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^"
  82. return (
  83. " {0} |\n".format(" " * len(gutter)) +
  84. " {0} | {1}".format(gutter, self.line) +
  85. " {0} | {1}\n".format(" " * len(gutter), underline)
  86. )
  87. class Problem(abc.ABC): # pylint: disable=too-few-public-methods
  88. """
  89. An abstract parent class representing a form of static analysis error.
  90. It extends an Abstract Base Class, which means it is not instantiable, and
  91. it also mandates certain abstract methods to be implemented in subclasses.
  92. """
  93. # Class variable to control the quietness of all problems
  94. quiet = False
  95. def __init__(self):
  96. self.textwrapper = textwrap.TextWrapper()
  97. self.textwrapper.width = 80
  98. self.textwrapper.initial_indent = " > "
  99. self.textwrapper.subsequent_indent = " "
  100. def __str__(self):
  101. """
  102. Unified string representation method for all Problems.
  103. """
  104. if self.__class__.quiet:
  105. return self.quiet_output()
  106. return self.verbose_output()
  107. @abc.abstractmethod
  108. def quiet_output(self):
  109. """
  110. The output when --quiet is enabled.
  111. """
  112. pass
  113. @abc.abstractmethod
  114. def verbose_output(self):
  115. """
  116. The default output with explanation and code snippet if appropriate.
  117. """
  118. pass
  119. class SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods
  120. """
  121. A problem that occurs when an exported/available symbol in the object file
  122. is not explicitly declared in header files. Created with
  123. NameCheck.check_symbols_declared_in_header()
  124. Fields:
  125. * symbol_name: the name of the symbol.
  126. """
  127. def __init__(self, symbol_name):
  128. self.symbol_name = symbol_name
  129. Problem.__init__(self)
  130. def quiet_output(self):
  131. return "{0}".format(self.symbol_name)
  132. def verbose_output(self):
  133. return self.textwrapper.fill(
  134. "'{0}' was found as an available symbol in the output of nm, "
  135. "however it was not declared in any header files."
  136. .format(self.symbol_name))
  137. class PatternMismatch(Problem): # pylint: disable=too-few-public-methods
  138. """
  139. A problem that occurs when something doesn't match the expected pattern.
  140. Created with NameCheck.check_match_pattern()
  141. Fields:
  142. * pattern: the expected regex pattern
  143. * match: the Match object in question
  144. """
  145. def __init__(self, pattern, match):
  146. self.pattern = pattern
  147. self.match = match
  148. Problem.__init__(self)
  149. def quiet_output(self):
  150. return (
  151. "{0}:{1}:{2}"
  152. .format(self.match.filename, self.match.line_no, self.match.name)
  153. )
  154. def verbose_output(self):
  155. return self.textwrapper.fill(
  156. "{0}:{1}: '{2}' does not match the required pattern '{3}'."
  157. .format(
  158. self.match.filename,
  159. self.match.line_no,
  160. self.match.name,
  161. self.pattern
  162. )
  163. ) + "\n" + str(self.match)
  164. class Typo(Problem): # pylint: disable=too-few-public-methods
  165. """
  166. A problem that occurs when a word using MBED or PSA doesn't
  167. appear to be defined as constants nor enum values. Created with
  168. NameCheck.check_for_typos()
  169. Fields:
  170. * match: the Match object of the MBED|PSA name in question.
  171. """
  172. def __init__(self, match):
  173. self.match = match
  174. Problem.__init__(self)
  175. def quiet_output(self):
  176. return (
  177. "{0}:{1}:{2}"
  178. .format(self.match.filename, self.match.line_no, self.match.name)
  179. )
  180. def verbose_output(self):
  181. return self.textwrapper.fill(
  182. "{0}:{1}: '{2}' looks like a typo. It was not found in any "
  183. "macros or any enums. If this is not a typo, put "
  184. "//no-check-names after it."
  185. .format(self.match.filename, self.match.line_no, self.match.name)
  186. ) + "\n" + str(self.match)
  187. class CodeParser():
  188. """
  189. Class for retrieving files and parsing the code. This can be used
  190. independently of the checks that NameChecker performs, for example for
  191. list_internal_identifiers.py.
  192. """
  193. def __init__(self, log):
  194. self.log = log
  195. build_tree.check_repo_path()
  196. # Memo for storing "glob expression": set(filepaths)
  197. self.files = {}
  198. # Globally excluded filenames.
  199. # Note that "*" can match directory separators in exclude lists.
  200. self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"]
  201. def comprehensive_parse(self):
  202. """
  203. Comprehensive ("default") function to call each parsing function and
  204. retrieve various elements of the code, together with the source location.
  205. Returns a dict of parsed item key to the corresponding List of Matches.
  206. """
  207. self.log.info("Parsing source code...")
  208. self.log.debug(
  209. "The following files are excluded from the search: {}"
  210. .format(str(self.excluded_files))
  211. )
  212. all_macros = {"public": [], "internal": [], "private":[]}
  213. all_macros["public"] = self.parse_macros([
  214. "include/mbedtls/*.h",
  215. "include/psa/*.h",
  216. "3rdparty/everest/include/everest/everest.h",
  217. "3rdparty/everest/include/everest/x25519.h"
  218. ])
  219. all_macros["internal"] = self.parse_macros([
  220. "library/*.h",
  221. "tests/include/test/drivers/*.h",
  222. ])
  223. all_macros["private"] = self.parse_macros([
  224. "library/*.c",
  225. ])
  226. enum_consts = self.parse_enum_consts([
  227. "include/mbedtls/*.h",
  228. "include/psa/*.h",
  229. "library/*.h",
  230. "library/*.c",
  231. "3rdparty/everest/include/everest/everest.h",
  232. "3rdparty/everest/include/everest/x25519.h"
  233. ])
  234. identifiers, excluded_identifiers = self.parse_identifiers([
  235. "include/mbedtls/*.h",
  236. "include/psa/*.h",
  237. "library/*.h",
  238. "3rdparty/everest/include/everest/everest.h",
  239. "3rdparty/everest/include/everest/x25519.h"
  240. ], ["3rdparty/p256-m/p256-m/p256-m.h"])
  241. mbed_psa_words = self.parse_mbed_psa_words([
  242. "include/mbedtls/*.h",
  243. "include/psa/*.h",
  244. "library/*.h",
  245. "3rdparty/everest/include/everest/everest.h",
  246. "3rdparty/everest/include/everest/x25519.h",
  247. "library/*.c",
  248. "3rdparty/everest/library/everest.c",
  249. "3rdparty/everest/library/x25519.c"
  250. ], ["library/psa_crypto_driver_wrappers.c"])
  251. symbols = self.parse_symbols()
  252. # Remove identifier macros like mbedtls_printf or mbedtls_calloc
  253. identifiers_justname = [x.name for x in identifiers]
  254. actual_macros = {"public": [], "internal": []}
  255. for scope in actual_macros:
  256. for macro in all_macros[scope]:
  257. if macro.name not in identifiers_justname:
  258. actual_macros[scope].append(macro)
  259. self.log.debug("Found:")
  260. # Aligns the counts on the assumption that none exceeds 4 digits
  261. for scope in actual_macros:
  262. self.log.debug(" {:4} Total {} Macros"
  263. .format(len(all_macros[scope]), scope))
  264. self.log.debug(" {:4} {} Non-identifier Macros"
  265. .format(len(actual_macros[scope]), scope))
  266. self.log.debug(" {:4} Enum Constants".format(len(enum_consts)))
  267. self.log.debug(" {:4} Identifiers".format(len(identifiers)))
  268. self.log.debug(" {:4} Exported Symbols".format(len(symbols)))
  269. return {
  270. "public_macros": actual_macros["public"],
  271. "internal_macros": actual_macros["internal"],
  272. "private_macros": all_macros["private"],
  273. "enum_consts": enum_consts,
  274. "identifiers": identifiers,
  275. "excluded_identifiers": excluded_identifiers,
  276. "symbols": symbols,
  277. "mbed_psa_words": mbed_psa_words
  278. }
  279. def is_file_excluded(self, path, exclude_wildcards):
  280. """Whether the given file path is excluded."""
  281. # exclude_wildcards may be None. Also, consider the global exclusions.
  282. exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
  283. for pattern in exclude_wildcards:
  284. if fnmatch.fnmatch(path, pattern):
  285. return True
  286. return False
  287. def get_all_files(self, include_wildcards, exclude_wildcards):
  288. """
  289. Get all files that match any of the included UNIX-style wildcards
  290. and filter them into included and excluded lists.
  291. While the check_names script is designed only for use on UNIX/macOS
  292. (due to nm), this function alone will work fine on Windows even with
  293. forward slashes in the wildcard.
  294. Args:
  295. * include_wildcards: a List of shell-style wildcards to match filepaths.
  296. * exclude_wildcards: a List of shell-style wildcards to exclude.
  297. Returns:
  298. * inc_files: A List of relative filepaths for included files.
  299. * exc_files: A List of relative filepaths for excluded files.
  300. """
  301. accumulator = set()
  302. all_wildcards = include_wildcards + (exclude_wildcards or [])
  303. for wildcard in all_wildcards:
  304. accumulator = accumulator.union(glob.iglob(wildcard))
  305. inc_files = []
  306. exc_files = []
  307. for path in accumulator:
  308. if self.is_file_excluded(path, exclude_wildcards):
  309. exc_files.append(path)
  310. else:
  311. inc_files.append(path)
  312. return (inc_files, exc_files)
  313. def get_included_files(self, include_wildcards, exclude_wildcards):
  314. """
  315. Get all files that match any of the included UNIX-style wildcards.
  316. While the check_names script is designed only for use on UNIX/macOS
  317. (due to nm), this function alone will work fine on Windows even with
  318. forward slashes in the wildcard.
  319. Args:
  320. * include_wildcards: a List of shell-style wildcards to match filepaths.
  321. * exclude_wildcards: a List of shell-style wildcards to exclude.
  322. Returns a List of relative filepaths.
  323. """
  324. accumulator = set()
  325. for include_wildcard in include_wildcards:
  326. accumulator = accumulator.union(glob.iglob(include_wildcard))
  327. return list(path for path in accumulator
  328. if not self.is_file_excluded(path, exclude_wildcards))
  329. def parse_macros(self, include, exclude=None):
  330. """
  331. Parse all macros defined by #define preprocessor directives.
  332. Args:
  333. * include: A List of glob expressions to look for files through.
  334. * exclude: A List of glob expressions for excluding files.
  335. Returns a List of Match objects for the found macros.
  336. """
  337. macro_regex = re.compile(r"# *define +(?P<macro>\w+)")
  338. exclusions = (
  339. "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
  340. )
  341. files = self.get_included_files(include, exclude)
  342. self.log.debug("Looking for macros in {} files".format(len(files)))
  343. macros = []
  344. for header_file in files:
  345. with open(header_file, "r", encoding="utf-8") as header:
  346. for line_no, line in enumerate(header):
  347. for macro in macro_regex.finditer(line):
  348. if macro.group("macro").startswith(exclusions):
  349. continue
  350. macros.append(Match(
  351. header_file,
  352. line,
  353. line_no,
  354. macro.span("macro"),
  355. macro.group("macro")))
  356. return macros
  357. def parse_mbed_psa_words(self, include, exclude=None):
  358. """
  359. Parse all words in the file that begin with MBED|PSA, in and out of
  360. macros, comments, anything.
  361. Args:
  362. * include: A List of glob expressions to look for files through.
  363. * exclude: A List of glob expressions for excluding files.
  364. Returns a List of Match objects for words beginning with MBED|PSA.
  365. """
  366. # Typos of TLS are common, hence the broader check below than MBEDTLS.
  367. mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*")
  368. exclusions = re.compile(r"// *no-check-names|#error")
  369. files = self.get_included_files(include, exclude)
  370. self.log.debug(
  371. "Looking for MBED|PSA words in {} files"
  372. .format(len(files))
  373. )
  374. mbed_psa_words = []
  375. for filename in files:
  376. with open(filename, "r", encoding="utf-8") as fp:
  377. for line_no, line in enumerate(fp):
  378. if exclusions.search(line):
  379. continue
  380. for name in mbed_regex.finditer(line):
  381. mbed_psa_words.append(Match(
  382. filename,
  383. line,
  384. line_no,
  385. name.span(0),
  386. name.group(0)))
  387. return mbed_psa_words
  388. def parse_enum_consts(self, include, exclude=None):
  389. """
  390. Parse all enum value constants that are declared.
  391. Args:
  392. * include: A List of glob expressions to look for files through.
  393. * exclude: A List of glob expressions for excluding files.
  394. Returns a List of Match objects for the findings.
  395. """
  396. files = self.get_included_files(include, exclude)
  397. self.log.debug("Looking for enum consts in {} files".format(len(files)))
  398. # Emulate a finite state machine to parse enum declarations.
  399. # OUTSIDE_KEYWORD = outside the enum keyword
  400. # IN_BRACES = inside enum opening braces
  401. # IN_BETWEEN = between enum keyword and opening braces
  402. states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"])
  403. enum_consts = []
  404. for header_file in files:
  405. state = states.OUTSIDE_KEYWORD
  406. with open(header_file, "r", encoding="utf-8") as header:
  407. for line_no, line in enumerate(header):
  408. # Match typedefs and brackets only when they are at the
  409. # beginning of the line -- if they are indented, they might
  410. # be sub-structures within structs, etc.
  411. optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?"
  412. if (state == states.OUTSIDE_KEYWORD and
  413. re.search(r"^(typedef +)?enum " + \
  414. optional_c_identifier + \
  415. r" *{", line)):
  416. state = states.IN_BRACES
  417. elif (state == states.OUTSIDE_KEYWORD and
  418. re.search(r"^(typedef +)?enum", line)):
  419. state = states.IN_BETWEEN
  420. elif (state == states.IN_BETWEEN and
  421. re.search(r"^{", line)):
  422. state = states.IN_BRACES
  423. elif (state == states.IN_BRACES and
  424. re.search(r"^}", line)):
  425. state = states.OUTSIDE_KEYWORD
  426. elif (state == states.IN_BRACES and
  427. not re.search(r"^ *#", line)):
  428. enum_const = re.search(r"^ *(?P<enum_const>\w+)", line)
  429. if not enum_const:
  430. continue
  431. enum_consts.append(Match(
  432. header_file,
  433. line,
  434. line_no,
  435. enum_const.span("enum_const"),
  436. enum_const.group("enum_const")))
  437. return enum_consts
  438. IGNORED_CHUNK_REGEX = re.compile('|'.join([
  439. r'/\*.*?\*/', # block comment entirely on one line
  440. r'//.*', # line comment
  441. r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
  442. ]))
  443. def strip_comments_and_literals(self, line, in_block_comment):
  444. """Strip comments and string literals from line.
  445. Continuation lines are not supported.
  446. If in_block_comment is true, assume that the line starts inside a
  447. block comment.
  448. Return updated values of (line, in_block_comment) where:
  449. * Comments in line have been replaced by a space (or nothing at the
  450. start or end of the line).
  451. * String contents have been removed.
  452. * in_block_comment indicates whether the line ends inside a block
  453. comment that continues on the next line.
  454. """
  455. # Terminate current multiline comment?
  456. if in_block_comment:
  457. m = re.search(r"\*/", line)
  458. if m:
  459. in_block_comment = False
  460. line = line[m.end(0):]
  461. else:
  462. return '', True
  463. # Remove full comments and string literals.
  464. # Do it all together to handle cases like "/*" correctly.
  465. # Note that continuation lines are not supported.
  466. line = re.sub(self.IGNORED_CHUNK_REGEX,
  467. lambda s: '""' if s.group('string') else ' ',
  468. line)
  469. # Start an unfinished comment?
  470. # (If `/*` was part of a complete comment, it's already been removed.)
  471. m = re.search(r"/\*", line)
  472. if m:
  473. in_block_comment = True
  474. line = line[:m.start(0)]
  475. return line, in_block_comment
  476. IDENTIFIER_REGEX = re.compile('|'.join([
  477. # Match " something(a" or " *something(a". Functions.
  478. # Assumptions:
  479. # - function definition from return type to one of its arguments is
  480. # all on one line
  481. # - function definition line only contains alphanumeric, asterisk,
  482. # underscore, and open bracket
  483. r".* \**(\w+) *\( *\w",
  484. # Match "(*something)(".
  485. r".*\( *\* *(\w+) *\) *\(",
  486. # Match names of named data structures.
  487. r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$",
  488. # Match names of typedef instances, after closing bracket.
  489. r"}? *(\w+)[;[].*",
  490. ]))
  491. # The regex below is indented for clarity.
  492. EXCLUSION_LINES = re.compile("|".join([
  493. r"extern +\"C\"",
  494. r"(typedef +)?(struct|union|enum)( *{)?$",
  495. r"} *;?$",
  496. r"$",
  497. r"//",
  498. r"#",
  499. ]))
  500. def parse_identifiers_in_file(self, header_file, identifiers):
  501. """
  502. Parse all lines of a header where a function/enum/struct/union/typedef
  503. identifier is declared, based on some regex and heuristics. Highly
  504. dependent on formatting style.
  505. Append found matches to the list ``identifiers``.
  506. """
  507. with open(header_file, "r", encoding="utf-8") as header:
  508. in_block_comment = False
  509. # The previous line variable is used for concatenating lines
  510. # when identifiers are formatted and spread across multiple
  511. # lines.
  512. previous_line = ""
  513. for line_no, line in enumerate(header):
  514. line, in_block_comment = \
  515. self.strip_comments_and_literals(line, in_block_comment)
  516. if self.EXCLUSION_LINES.match(line):
  517. previous_line = ""
  518. continue
  519. # If the line contains only space-separated alphanumeric
  520. # characters (or underscore, asterisk, or open parenthesis),
  521. # and nothing else, high chance it's a declaration that
  522. # continues on the next line
  523. if re.search(r"^([\w\*\(]+\s+)+$", line):
  524. previous_line += line
  525. continue
  526. # If previous line seemed to start an unfinished declaration
  527. # (as above), concat and treat them as one.
  528. if previous_line:
  529. line = previous_line.strip() + " " + line.strip() + "\n"
  530. previous_line = ""
  531. # Skip parsing if line has a space in front = heuristic to
  532. # skip function argument lines (highly subject to formatting
  533. # changes)
  534. if line[0] == " ":
  535. continue
  536. identifier = self.IDENTIFIER_REGEX.search(line)
  537. if not identifier:
  538. continue
  539. # Find the group that matched, and append it
  540. for group in identifier.groups():
  541. if not group:
  542. continue
  543. identifiers.append(Match(
  544. header_file,
  545. line,
  546. line_no,
  547. identifier.span(),
  548. group))
  549. def parse_identifiers(self, include, exclude=None):
  550. """
  551. Parse all lines of a header where a function/enum/struct/union/typedef
  552. identifier is declared, based on some regex and heuristics. Highly
  553. dependent on formatting style. Identifiers in excluded files are still
  554. parsed
  555. Args:
  556. * include: A List of glob expressions to look for files through.
  557. * exclude: A List of glob expressions for excluding files.
  558. Returns: a Tuple of two Lists of Match objects with identifiers.
  559. * included_identifiers: A List of Match objects with identifiers from
  560. included files.
  561. * excluded_identifiers: A List of Match objects with identifiers from
  562. excluded files.
  563. """
  564. included_files, excluded_files = \
  565. self.get_all_files(include, exclude)
  566. self.log.debug("Looking for included identifiers in {} files".format \
  567. (len(included_files)))
  568. included_identifiers = []
  569. excluded_identifiers = []
  570. for header_file in included_files:
  571. self.parse_identifiers_in_file(header_file, included_identifiers)
  572. for header_file in excluded_files:
  573. self.parse_identifiers_in_file(header_file, excluded_identifiers)
  574. return (included_identifiers, excluded_identifiers)
  575. def parse_symbols(self):
  576. """
  577. Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509
  578. object files using nm to retrieve the list of referenced symbols.
  579. Exceptions thrown here are rethrown because they would be critical
  580. errors that void several tests, and thus needs to halt the program. This
  581. is explicitly done for clarity.
  582. Returns a List of unique symbols defined and used in the libraries.
  583. """
  584. self.log.info("Compiling...")
  585. symbols = []
  586. # Back up the config and atomically compile with the full configuration.
  587. shutil.copy(
  588. "include/mbedtls/mbedtls_config.h",
  589. "include/mbedtls/mbedtls_config.h.bak"
  590. )
  591. try:
  592. # Use check=True in all subprocess calls so that failures are raised
  593. # as exceptions and logged.
  594. subprocess.run(
  595. ["python3", "scripts/config.py", "full"],
  596. universal_newlines=True,
  597. check=True
  598. )
  599. my_environment = os.environ.copy()
  600. my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables"
  601. # Run make clean separately to lib to prevent unwanted behavior when
  602. # make is invoked with parallelism.
  603. subprocess.run(
  604. ["make", "clean"],
  605. universal_newlines=True,
  606. check=True
  607. )
  608. subprocess.run(
  609. ["make", "lib"],
  610. env=my_environment,
  611. universal_newlines=True,
  612. stdout=subprocess.PIPE,
  613. stderr=subprocess.STDOUT,
  614. check=True
  615. )
  616. # Perform object file analysis using nm
  617. symbols = self.parse_symbols_from_nm([
  618. "library/libmbedcrypto.a",
  619. "library/libmbedtls.a",
  620. "library/libmbedx509.a"
  621. ])
  622. subprocess.run(
  623. ["make", "clean"],
  624. universal_newlines=True,
  625. check=True
  626. )
  627. except subprocess.CalledProcessError as error:
  628. self.log.debug(error.output)
  629. raise error
  630. finally:
  631. # Put back the original config regardless of there being errors.
  632. # Works also for keyboard interrupts.
  633. shutil.move(
  634. "include/mbedtls/mbedtls_config.h.bak",
  635. "include/mbedtls/mbedtls_config.h"
  636. )
  637. return symbols
  638. def parse_symbols_from_nm(self, object_files):
  639. """
  640. Run nm to retrieve the list of referenced symbols in each object file.
  641. Does not return the position data since it is of no use.
  642. Args:
  643. * object_files: a List of compiled object filepaths to search through.
  644. Returns a List of unique symbols defined and used in any of the object
  645. files.
  646. """
  647. nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$")
  648. nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)")
  649. exclusions = ("FStar", "Hacl")
  650. symbols = []
  651. # Gather all outputs of nm
  652. nm_output = ""
  653. for lib in object_files:
  654. nm_output += subprocess.run(
  655. ["nm", "-og", lib],
  656. universal_newlines=True,
  657. stdout=subprocess.PIPE,
  658. stderr=subprocess.STDOUT,
  659. check=True
  660. ).stdout
  661. for line in nm_output.splitlines():
  662. if not nm_undefined_regex.search(line):
  663. symbol = nm_valid_regex.search(line)
  664. if (symbol and not symbol.group("symbol").startswith(exclusions)):
  665. symbols.append(symbol.group("symbol"))
  666. else:
  667. self.log.error(line)
  668. return symbols
  669. class NameChecker():
  670. """
  671. Representation of the core name checking operation performed by this script.
  672. """
  673. def __init__(self, parse_result, log):
  674. self.parse_result = parse_result
  675. self.log = log
  676. def perform_checks(self, quiet=False):
  677. """
  678. A comprehensive checker that performs each check in order, and outputs
  679. a final verdict.
  680. Args:
  681. * quiet: whether to hide detailed problem explanation.
  682. """
  683. self.log.info("=============")
  684. Problem.quiet = quiet
  685. problems = 0
  686. problems += self.check_symbols_declared_in_header()
  687. pattern_checks = [
  688. ("public_macros", PUBLIC_MACRO_PATTERN),
  689. ("internal_macros", INTERNAL_MACRO_PATTERN),
  690. ("enum_consts", CONSTANTS_PATTERN),
  691. ("identifiers", IDENTIFIER_PATTERN)
  692. ]
  693. for group, check_pattern in pattern_checks:
  694. problems += self.check_match_pattern(group, check_pattern)
  695. problems += self.check_for_typos()
  696. self.log.info("=============")
  697. if problems > 0:
  698. self.log.info("FAIL: {0} problem(s) to fix".format(str(problems)))
  699. if quiet:
  700. self.log.info("Remove --quiet to see explanations.")
  701. else:
  702. self.log.info("Use --quiet for minimal output.")
  703. return 1
  704. else:
  705. self.log.info("PASS")
  706. return 0
  707. def check_symbols_declared_in_header(self):
  708. """
  709. Perform a check that all detected symbols in the library object files
  710. are properly declared in headers.
  711. Assumes parse_names_in_source() was called before this.
  712. Returns the number of problems that need fixing.
  713. """
  714. problems = []
  715. all_identifiers = self.parse_result["identifiers"] + \
  716. self.parse_result["excluded_identifiers"]
  717. for symbol in self.parse_result["symbols"]:
  718. found_symbol_declared = False
  719. for identifier_match in all_identifiers:
  720. if symbol == identifier_match.name:
  721. found_symbol_declared = True
  722. break
  723. if not found_symbol_declared:
  724. problems.append(SymbolNotInHeader(symbol))
  725. self.output_check_result("All symbols in header", problems)
  726. return len(problems)
  727. def check_match_pattern(self, group_to_check, check_pattern):
  728. """
  729. Perform a check that all items of a group conform to a regex pattern.
  730. Assumes parse_names_in_source() was called before this.
  731. Args:
  732. * group_to_check: string key to index into self.parse_result.
  733. * check_pattern: the regex to check against.
  734. Returns the number of problems that need fixing.
  735. """
  736. problems = []
  737. for item_match in self.parse_result[group_to_check]:
  738. if not re.search(check_pattern, item_match.name):
  739. problems.append(PatternMismatch(check_pattern, item_match))
  740. # Double underscore should not be used for names
  741. if re.search(r".*__.*", item_match.name):
  742. problems.append(
  743. PatternMismatch("no double underscore allowed", item_match))
  744. self.output_check_result(
  745. "Naming patterns of {}".format(group_to_check),
  746. problems)
  747. return len(problems)
  748. def check_for_typos(self):
  749. """
  750. Perform a check that all words in the source code beginning with MBED are
  751. either defined as macros, or as enum constants.
  752. Assumes parse_names_in_source() was called before this.
  753. Returns the number of problems that need fixing.
  754. """
  755. problems = []
  756. # Set comprehension, equivalent to a list comprehension wrapped by set()
  757. all_caps_names = {
  758. match.name
  759. for match
  760. in self.parse_result["public_macros"] +
  761. self.parse_result["internal_macros"] +
  762. self.parse_result["private_macros"] +
  763. self.parse_result["enum_consts"]
  764. }
  765. typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|"
  766. r"MBEDTLS_TEST_LIBTESTDRIVER*|"
  767. r"PSA_CRYPTO_DRIVER_TEST")
  768. for name_match in self.parse_result["mbed_psa_words"]:
  769. found = name_match.name in all_caps_names
  770. # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the
  771. # PSA driver, they will not exist as macros. However, they
  772. # should still be checked for typos using the equivalent
  773. # BUILTINs that exist.
  774. if "MBEDTLS_PSA_ACCEL_" in name_match.name:
  775. found = name_match.name.replace(
  776. "MBEDTLS_PSA_ACCEL_",
  777. "MBEDTLS_PSA_BUILTIN_") in all_caps_names
  778. if not found and not typo_exclusion.search(name_match.name):
  779. problems.append(Typo(name_match))
  780. self.output_check_result("Likely typos", problems)
  781. return len(problems)
  782. def output_check_result(self, name, problems):
  783. """
  784. Write out the PASS/FAIL status of a performed check depending on whether
  785. there were problems.
  786. Args:
  787. * name: the name of the test
  788. * problems: a List of encountered Problems
  789. """
  790. if problems:
  791. self.log.info("{}: FAIL\n".format(name))
  792. for problem in problems:
  793. self.log.warning(str(problem))
  794. else:
  795. self.log.info("{}: PASS".format(name))
  796. def main():
  797. """
  798. Perform argument parsing, and create an instance of CodeParser and
  799. NameChecker to begin the core operation.
  800. """
  801. parser = argparse.ArgumentParser(
  802. formatter_class=argparse.RawDescriptionHelpFormatter,
  803. description=(
  804. "This script confirms that the naming of all symbols and identifiers "
  805. "in Mbed TLS are consistent with the house style and are also "
  806. "self-consistent.\n\n"
  807. "Expected to be run from the MbedTLS root directory.")
  808. )
  809. parser.add_argument(
  810. "-v", "--verbose",
  811. action="store_true",
  812. help="show parse results"
  813. )
  814. parser.add_argument(
  815. "-q", "--quiet",
  816. action="store_true",
  817. help="hide unnecessary text, explanations, and highlights"
  818. )
  819. args = parser.parse_args()
  820. # Configure the global logger, which is then passed to the classes below
  821. log = logging.getLogger()
  822. log.setLevel(logging.DEBUG if args.verbose else logging.INFO)
  823. log.addHandler(logging.StreamHandler())
  824. try:
  825. code_parser = CodeParser(log)
  826. parse_result = code_parser.comprehensive_parse()
  827. except Exception: # pylint: disable=broad-except
  828. traceback.print_exc()
  829. sys.exit(2)
  830. name_checker = NameChecker(parse_result, log)
  831. return_code = name_checker.perform_checks(quiet=args.quiet)
  832. sys.exit(return_code)
  833. if __name__ == "__main__":
  834. main()