mirror of
git://git.yoctoproject.org/poky.git
synced 2025-07-19 21:09:03 +02:00

Whenever a test result is present in base test result but absent from target test results, we have an entry in regression report looking like the following one: ptestresult.apr.testfile: PASSED -> None The "None" status may be a bit confusing, so replace it with a custom string which really states what "None" means in this context: ptestresult.apr.testfile: PASSED -> No matching test result (From OE-Core rev: b5f6da6e13f3484f51dfa07f6b3500aea7a21825) Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
407 lines
18 KiB
Python
407 lines
18 KiB
Python
# resulttool - regression analysis
|
|
#
|
|
# Copyright (c) 2019, Intel Corporation.
|
|
# Copyright (c) 2019, Linux Foundation
|
|
#
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
|
|
import resulttool.resultutils as resultutils
|
|
|
|
from oeqa.utils.git import GitRepo
|
|
import oeqa.utils.gitarchive as gitarchive
|
|
|
|
METADATA_MATCH_TABLE = {
|
|
"oeselftest": "OESELFTEST_METADATA"
|
|
}
|
|
|
|
OESELFTEST_METADATA_GUESS_TABLE={
|
|
"trigger-build-posttrigger": {
|
|
"run_all_tests": False,
|
|
"run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": None
|
|
},
|
|
"reproducible": {
|
|
"run_all_tests": False,
|
|
"run_tests":["reproducible"],
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": None
|
|
},
|
|
"arch-qemu-quick": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":["machine"],
|
|
"exclude_tags": None
|
|
},
|
|
"arch-qemu-full-x86-or-x86_64": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":["machine", "toolchain-system"],
|
|
"exclude_tags": None
|
|
},
|
|
"arch-qemu-full-others": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":["machine", "toolchain-user"],
|
|
"exclude_tags": None
|
|
},
|
|
"selftest": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
|
|
},
|
|
"bringup": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
|
|
}
|
|
}
|
|
|
|
STATUS_STRINGS = {
|
|
"None": "No matching test result"
|
|
}
|
|
|
|
def test_has_at_least_one_matching_tag(test, tag_list):
|
|
return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
|
|
|
|
def all_tests_have_at_least_one_matching_tag(results, tag_list):
|
|
return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
|
|
|
|
def any_test_have_any_matching_tag(results, tag_list):
|
|
return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
|
|
|
|
def have_skipped_test(result, test_prefix):
|
|
return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
|
|
|
|
def have_all_tests_skipped(result, test_prefixes_list):
|
|
return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
|
|
|
|
def guess_oeselftest_metadata(results):
|
|
"""
|
|
When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
|
|
Check results for specific values (absence/presence of oetags, number and name of executed tests...),
|
|
and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
|
|
to it to allow proper test filtering.
|
|
This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
|
|
as new tests will have OESELFTEST_METADATA properly appended at test reporting time
|
|
"""
|
|
|
|
if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
|
|
return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
|
|
elif all(result.startswith("reproducible") for result in results):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
|
|
elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
|
|
elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
|
|
elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
|
|
elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
|
|
if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['selftest']
|
|
elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['bringup']
|
|
|
|
return None
|
|
|
|
|
|
def metadata_matches(base_configuration, target_configuration):
|
|
"""
|
|
For passed base and target, check test type. If test type matches one of
|
|
properties described in METADATA_MATCH_TABLE, compare metadata if it is
|
|
present in base. Return true if metadata matches, or if base lacks some
|
|
data (either TEST_TYPE or the corresponding metadata)
|
|
"""
|
|
test_type = base_configuration.get('TEST_TYPE')
|
|
if test_type not in METADATA_MATCH_TABLE:
|
|
return True
|
|
|
|
metadata_key = METADATA_MATCH_TABLE.get(test_type)
|
|
if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def machine_matches(base_configuration, target_configuration):
|
|
return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
|
|
|
|
|
|
def can_be_compared(logger, base, target):
|
|
"""
|
|
Some tests are not relevant to be compared, for example some oeselftest
|
|
run with different tests sets or parameters. Return true if tests can be
|
|
compared
|
|
"""
|
|
ret = True
|
|
base_configuration = base['configuration']
|
|
target_configuration = target['configuration']
|
|
|
|
# Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
|
|
if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
|
|
guess = guess_oeselftest_metadata(base['result'])
|
|
if guess is None:
|
|
logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
|
|
else:
|
|
logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
|
|
base_configuration['OESELFTEST_METADATA'] = guess
|
|
if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
|
|
guess = guess_oeselftest_metadata(target['result'])
|
|
if guess is None:
|
|
logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
|
|
else:
|
|
logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
|
|
target_configuration['OESELFTEST_METADATA'] = guess
|
|
|
|
# Test runs with LTP results in should only be compared with other runs with LTP tests in them
|
|
if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
|
|
ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
|
|
|
|
return ret and metadata_matches(base_configuration, target_configuration) \
|
|
and machine_matches(base_configuration, target_configuration)
|
|
|
|
def get_status_str(raw_status):
|
|
raw_status_lower = raw_status.lower() if raw_status else "None"
|
|
return STATUS_STRINGS.get(raw_status_lower, raw_status)
|
|
|
|
def compare_result(logger, base_name, target_name, base_result, target_result):
|
|
base_result = base_result.get('result')
|
|
target_result = target_result.get('result')
|
|
result = {}
|
|
new_tests = 0
|
|
|
|
if base_result and target_result:
|
|
for k in base_result:
|
|
base_testcase = base_result[k]
|
|
base_status = base_testcase.get('status')
|
|
if base_status:
|
|
target_testcase = target_result.get(k, {})
|
|
target_status = target_testcase.get('status')
|
|
if base_status != target_status:
|
|
result[k] = {'base': base_status, 'target': target_status}
|
|
else:
|
|
logger.error('Failed to retrieved base test case status: %s' % k)
|
|
|
|
# Also count new tests that were not present in base results: it
|
|
# could be newly added tests, but it could also highlights some tests
|
|
# renames or fixed faulty ptests
|
|
for k in target_result:
|
|
if k not in base_result:
|
|
new_tests += 1
|
|
if result:
|
|
new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
|
|
# Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
|
|
if new_pass_count < len(result):
|
|
resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
|
|
for k in sorted(result):
|
|
if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
|
|
resultstring += ' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))
|
|
if new_pass_count > 0:
|
|
resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
|
|
else:
|
|
resultstring = "Improvement: %s\n %s\n (+%d test(s) passing)\n" % (base_name, target_name, new_pass_count)
|
|
result = None
|
|
else:
|
|
resultstring = "Match: %s\n %s\n" % (base_name, target_name)
|
|
|
|
if new_tests > 0:
|
|
resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
|
|
return result, resultstring
|
|
|
|
def get_results(logger, source):
|
|
return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
|
|
|
|
def regression(args, logger):
|
|
base_results = get_results(logger, args.base_result)
|
|
target_results = get_results(logger, args.target_result)
|
|
|
|
regression_common(args, logger, base_results, target_results)
|
|
|
|
# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
|
|
# Truncating the test names works since they contain file and line number identifiers
|
|
# which allows us to match them without the random components.
|
|
def fixup_ptest_names(results, logger):
|
|
for r in results:
|
|
for i in results[r]:
|
|
tests = list(results[r][i]['result'].keys())
|
|
for test in tests:
|
|
new = None
|
|
if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
|
|
new = test.split("_-_")[0]
|
|
elif test.startswith(("ptestresult.curl.")) and "__" in test:
|
|
new = test.split("__")[0]
|
|
elif test.startswith(("ptestresult.dbus.")) and "__" in test:
|
|
new = test.split("__")[0]
|
|
elif test.startswith("ptestresult.binutils") and "build-st-" in test:
|
|
new = test.split(" ")[0]
|
|
elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
|
|
new = ".".join(test.split(".")[:2])
|
|
if new:
|
|
results[r][i]['result'][new] = results[r][i]['result'][test]
|
|
del results[r][i]['result'][test]
|
|
|
|
def regression_common(args, logger, base_results, target_results):
|
|
if args.base_result_id:
|
|
base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
|
|
if args.target_result_id:
|
|
target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
|
|
|
|
fixup_ptest_names(base_results, logger)
|
|
fixup_ptest_names(target_results, logger)
|
|
|
|
matches = []
|
|
regressions = []
|
|
notfound = []
|
|
|
|
for a in base_results:
|
|
if a in target_results:
|
|
base = list(base_results[a].keys())
|
|
target = list(target_results[a].keys())
|
|
# We may have multiple base/targets which are for different configurations. Start by
|
|
# removing any pairs which match
|
|
for c in base.copy():
|
|
for b in target.copy():
|
|
if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
|
|
continue
|
|
res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
|
|
if not res:
|
|
matches.append(resstr)
|
|
base.remove(c)
|
|
target.remove(b)
|
|
break
|
|
# Should only now see regressions, we may not be able to match multiple pairs directly
|
|
for c in base:
|
|
for b in target:
|
|
if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
|
|
continue
|
|
res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b])
|
|
if res:
|
|
regressions.append(resstr)
|
|
else:
|
|
notfound.append("%s not found in target" % a)
|
|
print("\n".join(sorted(matches)))
|
|
print("\n")
|
|
print("\n".join(sorted(regressions)))
|
|
print("\n".join(sorted(notfound)))
|
|
return 0
|
|
|
|
def regression_git(args, logger):
|
|
base_results = {}
|
|
target_results = {}
|
|
|
|
tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
|
|
repo = GitRepo(args.repo)
|
|
|
|
revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
|
|
|
|
if args.branch2:
|
|
revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
|
|
if not len(revs2):
|
|
logger.error("No revisions found to compare against")
|
|
return 1
|
|
if not len(revs):
|
|
logger.error("No revision to report on found")
|
|
return 1
|
|
else:
|
|
if len(revs) < 2:
|
|
logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
|
|
return 1
|
|
|
|
# Pick revisions
|
|
if args.commit:
|
|
if args.commit_number:
|
|
logger.warning("Ignoring --commit-number as --commit was specified")
|
|
index1 = gitarchive.rev_find(revs, 'commit', args.commit)
|
|
elif args.commit_number:
|
|
index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
|
|
else:
|
|
index1 = len(revs) - 1
|
|
|
|
if args.branch2:
|
|
revs2.append(revs[index1])
|
|
index1 = len(revs2) - 1
|
|
revs = revs2
|
|
|
|
if args.commit2:
|
|
if args.commit_number2:
|
|
logger.warning("Ignoring --commit-number2 as --commit2 was specified")
|
|
index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
|
|
elif args.commit_number2:
|
|
index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
|
|
else:
|
|
if index1 > 0:
|
|
index2 = index1 - 1
|
|
# Find the closest matching commit number for comparision
|
|
# In future we could check the commit is a common ancestor and
|
|
# continue back if not but this good enough for now
|
|
while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
|
|
index2 = index2 - 1
|
|
else:
|
|
logger.error("Unable to determine the other commit, use "
|
|
"--commit2 or --commit-number2 to specify it")
|
|
return 1
|
|
|
|
logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
|
|
|
|
base_results = resultutils.git_get_result(repo, revs[index1][2])
|
|
target_results = resultutils.git_get_result(repo, revs[index2][2])
|
|
|
|
regression_common(args, logger, base_results, target_results)
|
|
|
|
return 0
|
|
|
|
def register_commands(subparsers):
|
|
"""Register subcommands from this plugin"""
|
|
|
|
parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
|
|
description='regression analysis comparing the base set of results to the target results',
|
|
group='analysis')
|
|
parser_build.set_defaults(func=regression)
|
|
parser_build.add_argument('base_result',
|
|
help='base result file/directory/URL for the comparison')
|
|
parser_build.add_argument('target_result',
|
|
help='target result file/directory/URL to compare with')
|
|
parser_build.add_argument('-b', '--base-result-id', default='',
|
|
help='(optional) filter the base results to this result ID')
|
|
parser_build.add_argument('-t', '--target-result-id', default='',
|
|
help='(optional) filter the target results to this result ID')
|
|
|
|
parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
|
|
description='regression analysis comparing base result set to target '
|
|
'result set',
|
|
group='analysis')
|
|
parser_build.set_defaults(func=regression_git)
|
|
parser_build.add_argument('repo',
|
|
help='the git repository containing the data')
|
|
parser_build.add_argument('-b', '--base-result-id', default='',
|
|
help='(optional) default select regression based on configurations unless base result '
|
|
'id was provided')
|
|
parser_build.add_argument('-t', '--target-result-id', default='',
|
|
help='(optional) default select regression based on configurations unless target result '
|
|
'id was provided')
|
|
|
|
parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
|
|
parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
|
|
parser_build.add_argument('--commit', help="Revision to search for")
|
|
parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
|
|
parser_build.add_argument('--commit2', help="Revision to compare with")
|
|
parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
|
|
|