I was curious how widespread bad usage of FileCheck prefixes is in the
LLVM/Clang test suite. Running the attached script yields over 200 tests
(~1.5%) which are essentially broken because they contain checks that
are never checked.
Most widespread is the usage of the "CHECK" prefix while all FileCheck
invocations have an explicit check prefix. Then there are quite a few
prefixes that have no corresponding FileCheck invocation. And of course
lots of typos. Oh, and some tests don't even call FileCheck in the first
place!
There are also a lot of different ways people disable checks: some use
explicit CHECK-DISABLED or CHECK-FIXME, others use HECK, CHECKX or
similar, often without a comment. Since there are also typos with the
same spellings and some tests are already broken with corrected typos
this makes it quite cumbersome to fix.
Sadly I can't come up with non-invasive changes to prevent these
mistakes. And since FileCheck's syntax is so common in comments the rate
of false positives is too high for automatic checking.
So I at least want to bring this issue to the attention of others.
-Nico
-------------- next part --------------
import os
import re
import sys
class FileCheckVerifier:
def __init__(self):
self.prefix_re = re.compile(r"^(?:(RUN)| *(?:;|//|\#)
*([A-Za-z0-9_-]+?)(?:-DAG|-LABEL|-NOT|-NEXT)?):(.*)$")
self.check_prefix_re = re.compile(r"-?-check-prefix[= ]([^ ]+)\b")
self.ignored_str = [
'REQUIRES', 'XFAIL', 'rdar', 'radar',
'radr', 'FIXME', 'TODO', 'Todo',
'XXX', 'NOTE', 'Note', 'Bug',
'Fix', 'Shrink', 'http', 'vim',
'IMPORTANT',
'NB', 'WARNING'
]
self.ignored_regs = [
re.compile(r"^PR\d+$"), # PR123
re.compile(r"^(dr|DR)\d+$"), # DR123
re.compile(r"^([A-Z]+[a-z]+)+$"), # FooBar
re.compile(r"^[a-z]+([_-][a-z]+[0-9]*)+$"), # foo_bar
re.compile(r"^[a-z]+[0-9]*$"), # foo
re.compile(r"^[A-Z]$"), # foo
re.compile(r"^_.+?_$")
]
def extract_valid_prefixes(self, runlines):
valid = set()
for runline in runlines:
prefixes = self.check_prefix_re.findall(runline)
if len(prefixes) > 0:
valid |= set(prefixes)
elif 'FileCheck' in runline:
valid.add('CHECK')
return valid
def is_prefix(self, str):
if str in self.ignored_str:
return False
for r in self.ignored_regs:
if re.search(r, str):
return False
return True
def scan_file(self, filename):
used_prefixes = set()
runlines = []
with open(filename) as f:
curr_runline = ''
for line in f.readlines():
match = self.prefix_re.search(line)
if not match:
continue
prefix = match.group(1) or match.group(2)
rest = match.group(3)
if prefix == 'RUN':
if rest.endswith("\\"):
curr_runline += rest[:-1]
elif len(curr_runline) > 0:
runlines.append(curr_runline + rest)
curr_runline = ''
else:
runlines.append(line)
elif not self.is_prefix(prefix):
pass
else:
used_prefixes.add(prefix)
return (runlines, used_prefixes)
def verify_file_check_prefixes(filename):
verifier = FileCheckVerifier()
runlines, used = verifier.scan_file(filename)
valid = verifier.extract_valid_prefixes(runlines)
unused = valid - used
bad = used - valid
if len(bad) > 0:
print(filename)
print("Bad: %s" % ', '.join(str(s) for s in bad))
#print("Unused: %s" % ', '.join(str(s) for s in unused))
#print("Valid: %s" % ', '.join(str(s) for s in valid))
#print("Used: %s" % ', '.join(str(s) for s in used))
#print("Runlines: %s" % ', '.join(runlines))
print
return False
return True
class LitTestTracker:
def __init__(self):
self.exts = dict()
def is_test(self, path):
path = os.path.abspath(path)
if not os.path.isfile(path):
return False
dir = os.path.dirname(path)
ext = os.path.splitext(path)[1]
return ext in self.exts_for_dir(dir)
def exts_for_dir(self, dir):
self.add_dir(dir)
return self.exts[dir]
def add_dir(self, dir):
if dir in self.exts:
return
exts = self.load_exts_from_config(dir)
if exts:
self.exts[dir] = exts
return
parent = os.path.dirname(dir)
self.add_dir(parent)
self.exts[dir] = self.exts[parent]
def load_exts_from_config(self, dir):
for cfg in ['lit.local.cfg', 'lit.cfg']:
cfg = os.path.join(dir, cfg)
if os.path.exists(cfg):
with open(cfg) as f:
src = f.read()
match = re.search(r"\bconfig\.suffixes = \[(.*?)\]", src)
if match:
return [f[1:-1] for f in re.split(r" *, *", match.group(1))
]
def walk(args):
ltt = LitTestTracker()
for arg in args:
if os.path.isfile(arg):
yield arg
else:
for path, dirs, files in os.walk(arg):
for filepath in [os.path.join(path, f) for f in files]:
if ltt.is_test(filepath):
yield filepath
if __name__ == '__main__':
total = 0
ok_count = 0
bad_count = 0
for filepath in walk(sys.argv[1:]):
ok = verify_file_check_prefixes(filepath)
if ok: ok_count += 1
else: bad_count += 1
total += 1
print("Tests: %d, ok: %d, bad: %d (%.1f%%)" % (total, ok_count,
bad_count, (float(bad_count) / max(1, total) * 100)))