mirror of
git://git.yoctoproject.org/poky.git
synced 2025-07-19 21:09:03 +02:00

Commit c304fcbe05
introduced a grouping when
listing regressions. This grouping has been added only for ptests. It has
been observed that any other kind of tests could benefit from it. For
example, current regression reports can show the following:
1 regression(s) for oescripts.OEGitproxyTests.test_oegitproxy_proxy_dash
oescripts.OEGitproxyTests.test_oegitproxy_proxy_dash: PASSED -> SKIPPED
1 regression(s) for oescripts.OEPybootchartguyTests.test_pybootchartguy_help
oescripts.OEPybootchartguyTests.test_pybootchartguy_help: PASSED -> SKIPPED
1 regression(s) for oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_pdf_output
oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_pdf_output: PASSED -> SKIPPED
1 regression(s) for oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_png_output
oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_png_output: PASSED -> SKIPPED
1 regression(s) for oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_svg_output
oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_svg_output: PASSED -> SKIPPED
[...]
This output is not so useful in its current state and should be grouped per
test type too.
Enable grouping for all kind of tests, to make it llok like the following
in reports:
5 regression(s) for oescripts
oescripts.OEGitproxyTests.test_oegitproxy_proxy_dash: PASSED -> SKIPPED
oescripts.OEPybootchartguyTests.test_pybootchartguy_help: PASSED -> SKIPPED
oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_pdf_output: PASSED -> SKIPPED
oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_png_output: PASSED -> SKIPPED
oescripts.OEPybootchartguyTests.test_pybootchartguy_to_generate_build_svg_output: PASSED -> SKIPPED
(From OE-Core rev: 982798ef96e3a32bf15341bdd3bb7c4356709412)
Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
448 lines
20 KiB
Python
448 lines
20 KiB
Python
# resulttool - regression analysis
|
|
#
|
|
# Copyright (c) 2019, Intel Corporation.
|
|
# Copyright (c) 2019, Linux Foundation
|
|
#
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
|
|
import resulttool.resultutils as resultutils
|
|
|
|
from oeqa.utils.git import GitRepo
|
|
import oeqa.utils.gitarchive as gitarchive
|
|
|
|
METADATA_MATCH_TABLE = {
|
|
"oeselftest": "OESELFTEST_METADATA"
|
|
}
|
|
|
|
OESELFTEST_METADATA_GUESS_TABLE={
|
|
"trigger-build-posttrigger": {
|
|
"run_all_tests": False,
|
|
"run_tests":["buildoptions.SourceMirroring.test_yocto_source_mirror"],
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": None
|
|
},
|
|
"reproducible": {
|
|
"run_all_tests": False,
|
|
"run_tests":["reproducible"],
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": None
|
|
},
|
|
"arch-qemu-quick": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":["machine"],
|
|
"exclude_tags": None
|
|
},
|
|
"arch-qemu-full-x86-or-x86_64": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":["machine", "toolchain-system"],
|
|
"exclude_tags": None
|
|
},
|
|
"arch-qemu-full-others": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": None,
|
|
"machine": None,
|
|
"select_tags":["machine", "toolchain-user"],
|
|
"exclude_tags": None
|
|
},
|
|
"selftest": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"],
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
|
|
},
|
|
"bringup": {
|
|
"run_all_tests": True,
|
|
"run_tests":None,
|
|
"skips": ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"],
|
|
"machine": None,
|
|
"select_tags":None,
|
|
"exclude_tags": ["machine", "toolchain-system", "toolchain-user"]
|
|
}
|
|
}
|
|
|
|
STATUS_STRINGS = {
|
|
"None": "No matching test result"
|
|
}
|
|
|
|
REGRESSIONS_DISPLAY_LIMIT=50
|
|
|
|
MISSING_TESTS_BANNER = "-------------------------- Missing tests --------------------------"
|
|
ADDITIONAL_DATA_BANNER = "--------------------- Matches and improvements --------------------"
|
|
|
|
def test_has_at_least_one_matching_tag(test, tag_list):
|
|
return "oetags" in test and any(oetag in tag_list for oetag in test["oetags"])
|
|
|
|
def all_tests_have_at_least_one_matching_tag(results, tag_list):
|
|
return all(test_has_at_least_one_matching_tag(test_result, tag_list) or test_name.startswith("ptestresult") for (test_name, test_result) in results.items())
|
|
|
|
def any_test_have_any_matching_tag(results, tag_list):
|
|
return any(test_has_at_least_one_matching_tag(test, tag_list) for test in results.values())
|
|
|
|
def have_skipped_test(result, test_prefix):
|
|
return all( result[test]['status'] == "SKIPPED" for test in result if test.startswith(test_prefix))
|
|
|
|
def have_all_tests_skipped(result, test_prefixes_list):
|
|
return all(have_skipped_test(result, test_prefix) for test_prefix in test_prefixes_list)
|
|
|
|
def guess_oeselftest_metadata(results):
|
|
"""
|
|
When an oeselftest test result is lacking OESELFTEST_METADATA, we can try to guess it based on results content.
|
|
Check results for specific values (absence/presence of oetags, number and name of executed tests...),
|
|
and if it matches one of known configuration from autobuilder configuration, apply guessed OSELFTEST_METADATA
|
|
to it to allow proper test filtering.
|
|
This guessing process is tightly coupled to config.json in autobuilder. It should trigger less and less,
|
|
as new tests will have OESELFTEST_METADATA properly appended at test reporting time
|
|
"""
|
|
|
|
if len(results) == 1 and "buildoptions.SourceMirroring.test_yocto_source_mirror" in results:
|
|
return OESELFTEST_METADATA_GUESS_TABLE['trigger-build-posttrigger']
|
|
elif all(result.startswith("reproducible") for result in results):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['reproducible']
|
|
elif all_tests_have_at_least_one_matching_tag(results, ["machine"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-quick']
|
|
elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-system"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-x86-or-x86_64']
|
|
elif all_tests_have_at_least_one_matching_tag(results, ["machine", "toolchain-user"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['arch-qemu-full-others']
|
|
elif not any_test_have_any_matching_tag(results, ["machine", "toolchain-user", "toolchain-system"]):
|
|
if have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror", "reproducible"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['selftest']
|
|
elif have_all_tests_skipped(results, ["distrodata.Distrodata.test_checkpkg", "buildoptions.SourceMirroring.test_yocto_source_mirror"]):
|
|
return OESELFTEST_METADATA_GUESS_TABLE['bringup']
|
|
|
|
return None
|
|
|
|
|
|
def metadata_matches(base_configuration, target_configuration):
|
|
"""
|
|
For passed base and target, check test type. If test type matches one of
|
|
properties described in METADATA_MATCH_TABLE, compare metadata if it is
|
|
present in base. Return true if metadata matches, or if base lacks some
|
|
data (either TEST_TYPE or the corresponding metadata)
|
|
"""
|
|
test_type = base_configuration.get('TEST_TYPE')
|
|
if test_type not in METADATA_MATCH_TABLE:
|
|
return True
|
|
|
|
metadata_key = METADATA_MATCH_TABLE.get(test_type)
|
|
if target_configuration.get(metadata_key) != base_configuration.get(metadata_key):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def machine_matches(base_configuration, target_configuration):
|
|
return base_configuration.get('MACHINE') == target_configuration.get('MACHINE')
|
|
|
|
|
|
def can_be_compared(logger, base, target):
|
|
"""
|
|
Some tests are not relevant to be compared, for example some oeselftest
|
|
run with different tests sets or parameters. Return true if tests can be
|
|
compared
|
|
"""
|
|
ret = True
|
|
base_configuration = base['configuration']
|
|
target_configuration = target['configuration']
|
|
|
|
# Older test results lack proper OESELFTEST_METADATA: if not present, try to guess it based on tests results.
|
|
if base_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in base_configuration:
|
|
guess = guess_oeselftest_metadata(base['result'])
|
|
if guess is None:
|
|
logger.error(f"ERROR: did not manage to guess oeselftest metadata for {base_configuration['STARTTIME']}")
|
|
else:
|
|
logger.debug(f"Enriching {base_configuration['STARTTIME']} with {guess}")
|
|
base_configuration['OESELFTEST_METADATA'] = guess
|
|
if target_configuration.get('TEST_TYPE') == 'oeselftest' and 'OESELFTEST_METADATA' not in target_configuration:
|
|
guess = guess_oeselftest_metadata(target['result'])
|
|
if guess is None:
|
|
logger.error(f"ERROR: did not manage to guess oeselftest metadata for {target_configuration['STARTTIME']}")
|
|
else:
|
|
logger.debug(f"Enriching {target_configuration['STARTTIME']} with {guess}")
|
|
target_configuration['OESELFTEST_METADATA'] = guess
|
|
|
|
# Test runs with LTP results in should only be compared with other runs with LTP tests in them
|
|
if base_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in base['result']):
|
|
ret = target_configuration.get('TEST_TYPE') == 'runtime' and any(result.startswith("ltpresult") for result in target['result'])
|
|
|
|
return ret and metadata_matches(base_configuration, target_configuration) \
|
|
and machine_matches(base_configuration, target_configuration)
|
|
|
|
def get_status_str(raw_status):
|
|
raw_status_lower = raw_status.lower() if raw_status else "None"
|
|
return STATUS_STRINGS.get(raw_status_lower, raw_status)
|
|
|
|
def get_additional_info_line(new_pass_count, new_tests):
|
|
result=[]
|
|
if new_tests:
|
|
result.append(f'+{new_tests} test(s) present')
|
|
if new_pass_count:
|
|
result.append(f'+{new_pass_count} test(s) now passing')
|
|
|
|
if not result:
|
|
return ""
|
|
|
|
return ' -> ' + ', '.join(result) + '\n'
|
|
|
|
def compare_result(logger, base_name, target_name, base_result, target_result, display_limit=None):
|
|
base_result = base_result.get('result')
|
|
target_result = target_result.get('result')
|
|
result = {}
|
|
new_tests = 0
|
|
regressions = {}
|
|
resultstring = ""
|
|
new_tests = 0
|
|
new_pass_count = 0
|
|
|
|
display_limit = int(display_limit) if display_limit else REGRESSIONS_DISPLAY_LIMIT
|
|
|
|
if base_result and target_result:
|
|
for k in base_result:
|
|
base_testcase = base_result[k]
|
|
base_status = base_testcase.get('status')
|
|
if base_status:
|
|
target_testcase = target_result.get(k, {})
|
|
target_status = target_testcase.get('status')
|
|
if base_status != target_status:
|
|
result[k] = {'base': base_status, 'target': target_status}
|
|
else:
|
|
logger.error('Failed to retrieved base test case status: %s' % k)
|
|
|
|
# Also count new tests that were not present in base results: it
|
|
# could be newly added tests, but it could also highlights some tests
|
|
# renames or fixed faulty ptests
|
|
for k in target_result:
|
|
if k not in base_result:
|
|
new_tests += 1
|
|
if result:
|
|
new_pass_count = sum(test['target'] is not None and test['target'].startswith("PASS") for test in result.values())
|
|
# Print a regression report only if at least one test has a regression status (FAIL, SKIPPED, absent...)
|
|
if new_pass_count < len(result):
|
|
resultstring = "Regression: %s\n %s\n" % (base_name, target_name)
|
|
for k in sorted(result):
|
|
if not result[k]['target'] or not result[k]['target'].startswith("PASS"):
|
|
# Differentiate each ptest kind when listing regressions
|
|
key_parts = k.split('.')
|
|
key = '.'.join(key_parts[:2]) if k.startswith('ptest') else key_parts[0]
|
|
# Append new regression to corresponding test family
|
|
regressions[key] = regressions.setdefault(key, []) + [' %s: %s -> %s\n' % (k, get_status_str(result[k]['base']), get_status_str(result[k]['target']))]
|
|
resultstring += f" Total: {sum([len(regressions[r]) for r in regressions])} new regression(s):\n"
|
|
for k in regressions:
|
|
resultstring += f" {len(regressions[k])} regression(s) for {k}\n"
|
|
count_to_print=min([display_limit, len(regressions[k])]) if display_limit > 0 else len(regressions[k])
|
|
resultstring += ''.join(regressions[k][:count_to_print])
|
|
if count_to_print < len(regressions[k]):
|
|
resultstring+=' [...]\n'
|
|
if new_pass_count > 0:
|
|
resultstring += f' Additionally, {new_pass_count} previously failing test(s) is/are now passing\n'
|
|
if new_tests > 0:
|
|
resultstring += f' Additionally, {new_tests} new test(s) is/are present\n'
|
|
else:
|
|
resultstring = "%s\n%s\n" % (base_name, target_name)
|
|
result = None
|
|
else:
|
|
resultstring = "%s\n%s\n" % (base_name, target_name)
|
|
|
|
if not result:
|
|
additional_info = get_additional_info_line(new_pass_count, new_tests)
|
|
if additional_info:
|
|
resultstring += additional_info
|
|
|
|
return result, resultstring
|
|
|
|
def get_results(logger, source):
|
|
return resultutils.load_resultsdata(source, configmap=resultutils.regression_map)
|
|
|
|
def regression(args, logger):
|
|
base_results = get_results(logger, args.base_result)
|
|
target_results = get_results(logger, args.target_result)
|
|
|
|
regression_common(args, logger, base_results, target_results)
|
|
|
|
# Some test case naming is poor and contains random strings, particularly lttng/babeltrace.
|
|
# Truncating the test names works since they contain file and line number identifiers
|
|
# which allows us to match them without the random components.
|
|
def fixup_ptest_names(results, logger):
|
|
for r in results:
|
|
for i in results[r]:
|
|
tests = list(results[r][i]['result'].keys())
|
|
for test in tests:
|
|
new = None
|
|
if test.startswith(("ptestresult.lttng-tools.", "ptestresult.babeltrace.", "ptestresult.babeltrace2")) and "_-_" in test:
|
|
new = test.split("_-_")[0]
|
|
elif test.startswith(("ptestresult.curl.")) and "__" in test:
|
|
new = test.split("__")[0]
|
|
elif test.startswith(("ptestresult.dbus.")) and "__" in test:
|
|
new = test.split("__")[0]
|
|
elif test.startswith("ptestresult.binutils") and "build-st-" in test:
|
|
new = test.split(" ")[0]
|
|
elif test.startswith("ptestresult.gcc") and "/tmp/runtest." in test:
|
|
new = ".".join(test.split(".")[:2])
|
|
if new:
|
|
results[r][i]['result'][new] = results[r][i]['result'][test]
|
|
del results[r][i]['result'][test]
|
|
|
|
def regression_common(args, logger, base_results, target_results):
|
|
if args.base_result_id:
|
|
base_results = resultutils.filter_resultsdata(base_results, args.base_result_id)
|
|
if args.target_result_id:
|
|
target_results = resultutils.filter_resultsdata(target_results, args.target_result_id)
|
|
|
|
fixup_ptest_names(base_results, logger)
|
|
fixup_ptest_names(target_results, logger)
|
|
|
|
matches = []
|
|
regressions = []
|
|
notfound = []
|
|
|
|
for a in base_results:
|
|
if a in target_results:
|
|
base = list(base_results[a].keys())
|
|
target = list(target_results[a].keys())
|
|
# We may have multiple base/targets which are for different configurations. Start by
|
|
# removing any pairs which match
|
|
for c in base.copy():
|
|
for b in target.copy():
|
|
if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
|
|
continue
|
|
res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
|
|
if not res:
|
|
matches.append(resstr)
|
|
base.remove(c)
|
|
target.remove(b)
|
|
break
|
|
# Should only now see regressions, we may not be able to match multiple pairs directly
|
|
for c in base:
|
|
for b in target:
|
|
if not can_be_compared(logger, base_results[a][c], target_results[a][b]):
|
|
continue
|
|
res, resstr = compare_result(logger, c, b, base_results[a][c], target_results[a][b], args.limit)
|
|
if res:
|
|
regressions.append(resstr)
|
|
else:
|
|
notfound.append("%s not found in target" % a)
|
|
print("\n".join(sorted(regressions)))
|
|
print("\n" + MISSING_TESTS_BANNER + "\n")
|
|
print("\n".join(sorted(notfound)))
|
|
print("\n" + ADDITIONAL_DATA_BANNER + "\n")
|
|
print("\n".join(sorted(matches)))
|
|
return 0
|
|
|
|
def regression_git(args, logger):
|
|
base_results = {}
|
|
target_results = {}
|
|
|
|
tag_name = "{branch}/{commit_number}-g{commit}/{tag_number}"
|
|
repo = GitRepo(args.repo)
|
|
|
|
revs = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch)
|
|
|
|
if args.branch2:
|
|
revs2 = gitarchive.get_test_revs(logger, repo, tag_name, branch=args.branch2)
|
|
if not len(revs2):
|
|
logger.error("No revisions found to compare against")
|
|
return 1
|
|
if not len(revs):
|
|
logger.error("No revision to report on found")
|
|
return 1
|
|
else:
|
|
if len(revs) < 2:
|
|
logger.error("Only %d tester revisions found, unable to generate report" % len(revs))
|
|
return 1
|
|
|
|
# Pick revisions
|
|
if args.commit:
|
|
if args.commit_number:
|
|
logger.warning("Ignoring --commit-number as --commit was specified")
|
|
index1 = gitarchive.rev_find(revs, 'commit', args.commit)
|
|
elif args.commit_number:
|
|
index1 = gitarchive.rev_find(revs, 'commit_number', args.commit_number)
|
|
else:
|
|
index1 = len(revs) - 1
|
|
|
|
if args.branch2:
|
|
revs2.append(revs[index1])
|
|
index1 = len(revs2) - 1
|
|
revs = revs2
|
|
|
|
if args.commit2:
|
|
if args.commit_number2:
|
|
logger.warning("Ignoring --commit-number2 as --commit2 was specified")
|
|
index2 = gitarchive.rev_find(revs, 'commit', args.commit2)
|
|
elif args.commit_number2:
|
|
index2 = gitarchive.rev_find(revs, 'commit_number', args.commit_number2)
|
|
else:
|
|
if index1 > 0:
|
|
index2 = index1 - 1
|
|
# Find the closest matching commit number for comparision
|
|
# In future we could check the commit is a common ancestor and
|
|
# continue back if not but this good enough for now
|
|
while index2 > 0 and revs[index2].commit_number > revs[index1].commit_number:
|
|
index2 = index2 - 1
|
|
else:
|
|
logger.error("Unable to determine the other commit, use "
|
|
"--commit2 or --commit-number2 to specify it")
|
|
return 1
|
|
|
|
logger.info("Comparing:\n%s\nto\n%s\n" % (revs[index1], revs[index2]))
|
|
|
|
base_results = resultutils.git_get_result(repo, revs[index1][2])
|
|
target_results = resultutils.git_get_result(repo, revs[index2][2])
|
|
|
|
regression_common(args, logger, base_results, target_results)
|
|
|
|
return 0
|
|
|
|
def register_commands(subparsers):
|
|
"""Register subcommands from this plugin"""
|
|
|
|
parser_build = subparsers.add_parser('regression', help='regression file/directory analysis',
|
|
description='regression analysis comparing the base set of results to the target results',
|
|
group='analysis')
|
|
parser_build.set_defaults(func=regression)
|
|
parser_build.add_argument('base_result',
|
|
help='base result file/directory/URL for the comparison')
|
|
parser_build.add_argument('target_result',
|
|
help='target result file/directory/URL to compare with')
|
|
parser_build.add_argument('-b', '--base-result-id', default='',
|
|
help='(optional) filter the base results to this result ID')
|
|
parser_build.add_argument('-t', '--target-result-id', default='',
|
|
help='(optional) filter the target results to this result ID')
|
|
|
|
parser_build = subparsers.add_parser('regression-git', help='regression git analysis',
|
|
description='regression analysis comparing base result set to target '
|
|
'result set',
|
|
group='analysis')
|
|
parser_build.set_defaults(func=regression_git)
|
|
parser_build.add_argument('repo',
|
|
help='the git repository containing the data')
|
|
parser_build.add_argument('-b', '--base-result-id', default='',
|
|
help='(optional) default select regression based on configurations unless base result '
|
|
'id was provided')
|
|
parser_build.add_argument('-t', '--target-result-id', default='',
|
|
help='(optional) default select regression based on configurations unless target result '
|
|
'id was provided')
|
|
|
|
parser_build.add_argument('--branch', '-B', default='master', help="Branch to find commit in")
|
|
parser_build.add_argument('--branch2', help="Branch to find comparision revisions in")
|
|
parser_build.add_argument('--commit', help="Revision to search for")
|
|
parser_build.add_argument('--commit-number', help="Revision number to search for, redundant if --commit is specified")
|
|
parser_build.add_argument('--commit2', help="Revision to compare with")
|
|
parser_build.add_argument('--commit-number2', help="Revision number to compare with, redundant if --commit2 is specified")
|
|
parser_build.add_argument('-l', '--limit', default=REGRESSIONS_DISPLAY_LIMIT, help="Maximum number of changes to display per test. Can be set to 0 to print all changes")
|
|
|