mirror of
git://git.yoctoproject.org/poky.git
synced 2025-07-19 12:59:02 +02:00
sstate-cache-management: Rewrite in python
This (should be) a drop in replacement for sstate-cache-management.sh. (From OE-Core rev: 2fa1b25d7485bfbb92bcc33067beb6751218b36a) Signed-off-by: Alex Kiernan <alex.kiernan@gmail.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
parent
2d8e080231
commit
b723fcaac5
329
scripts/sstate-cache-management.py
Executable file
329
scripts/sstate-cache-management.py
Executable file
|
@ -0,0 +1,329 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright OpenEmbedded Contributors
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
if sys.version_info < (3, 8, 0):
|
||||
raise RuntimeError("Sorry, python 3.8.0 or later is required for this script.")
|
||||
|
||||
SSTATE_PREFIX = "sstate:"
|
||||
SSTATE_EXTENSION = ".tar.zst"
|
||||
# SSTATE_EXTENSION = ".tgz"
|
||||
# .siginfo.done files are mentioned in the original script?
|
||||
SSTATE_SUFFIXES = (
|
||||
SSTATE_EXTENSION,
|
||||
f"{SSTATE_EXTENSION}.siginfo",
|
||||
f"{SSTATE_EXTENSION}.done",
|
||||
)
|
||||
|
||||
RE_SSTATE_PKGSPEC = re.compile(
|
||||
rf"""sstate:(?P<pn>[^:]*):
|
||||
(?P<package_target>[^:]*):
|
||||
(?P<pv>[^:]*):
|
||||
(?P<pr>[^:]*):
|
||||
(?P<sstate_pkgarch>[^:]*):
|
||||
(?P<sstate_version>[^_]*):
|
||||
(?P<bb_unihash>[^_]*)_
|
||||
(?P<bb_task>[^:]*)
|
||||
(?P<ext>({"|".join([re.escape(s) for s in SSTATE_SUFFIXES])}))$""",
|
||||
re.X,
|
||||
)
|
||||
|
||||
|
||||
# Really we'd like something like a Path subclass which implements a stat
|
||||
# cache here, unfortunately there's no good way to do that transparently
|
||||
# (yet); see:
|
||||
#
|
||||
# https://github.com/python/cpython/issues/70219
|
||||
# https://discuss.python.org/t/make-pathlib-extensible/3428/77
|
||||
@dataclass
|
||||
class SstateEntry:
|
||||
"""Class for keeping track of an entry in sstate-cache."""
|
||||
|
||||
path: Path
|
||||
match: re.Match
|
||||
stat_result: os.stat_result = None
|
||||
|
||||
def __hash__(self):
|
||||
return self.path.__hash__()
|
||||
|
||||
def __getattr__(self, name):
|
||||
return self.match.group(name)
|
||||
|
||||
|
||||
# this is what's in the original script; as far as I can tell, it's an
|
||||
# implementation artefact which we don't need?
|
||||
def find_archs():
|
||||
# all_archs
|
||||
builder_arch = os.uname().machine
|
||||
|
||||
# FIXME
|
||||
layer_paths = [Path("../..")]
|
||||
|
||||
tune_archs = set()
|
||||
re_tune = re.compile(r'AVAILTUNES .*=.*"(.*)"')
|
||||
for path in layer_paths:
|
||||
for tunefile in [
|
||||
p for p in path.glob("meta*/conf/machine/include/**/*") if p.is_file()
|
||||
]:
|
||||
with open(tunefile) as f:
|
||||
for line in f:
|
||||
m = re_tune.match(line)
|
||||
if m:
|
||||
tune_archs.update(m.group(1).split())
|
||||
|
||||
# all_machines
|
||||
machine_archs = set()
|
||||
for path in layer_paths:
|
||||
for machine_file in path.glob("meta*/conf/machine/*.conf"):
|
||||
machine_archs.add(machine_file.parts[-1][:-5])
|
||||
|
||||
extra_archs = set()
|
||||
all_archs = (
|
||||
set(
|
||||
arch.replace("-", "_")
|
||||
for arch in machine_archs | tune_archs | set(["allarch", builder_arch])
|
||||
)
|
||||
| extra_archs
|
||||
)
|
||||
|
||||
print(all_archs)
|
||||
|
||||
|
||||
# again, not needed?
|
||||
def find_tasks():
|
||||
print(set([p.bb_task for p in paths]))
|
||||
|
||||
|
||||
def collect_sstate_paths(args):
|
||||
def scandir(path, paths):
|
||||
# Assume everything is a directory; by not checking we avoid needing an
|
||||
# additional stat which is potentially a synchronous roundtrip over NFS
|
||||
try:
|
||||
for p in path.iterdir():
|
||||
filename = p.parts[-1]
|
||||
if filename.startswith(SSTATE_PREFIX):
|
||||
if filename.endswith(SSTATE_SUFFIXES):
|
||||
m = RE_SSTATE_PKGSPEC.match(p.parts[-1])
|
||||
assert m
|
||||
paths.add(SstateEntry(p, m))
|
||||
# ignore other things (includes things like lockfiles)
|
||||
else:
|
||||
scandir(p, paths)
|
||||
|
||||
except NotADirectoryError:
|
||||
pass
|
||||
|
||||
paths = set()
|
||||
# TODO: parellise scandir
|
||||
scandir(Path(args.cache_dir), paths)
|
||||
|
||||
def path_stat(p):
|
||||
p.stat_result = p.path.lstat()
|
||||
|
||||
if args.remove_duplicated:
|
||||
# This is probably slightly performance negative on a local filesystem
|
||||
# when we interact with the GIL; over NFS it's a massive win.
|
||||
with ThreadPoolExecutor(max_workers=args.jobs) as executor:
|
||||
executor.map(path_stat, paths)
|
||||
|
||||
return paths
|
||||
|
||||
|
||||
def remove_by_stamps(args, paths):
|
||||
all_sums = set()
|
||||
for stamps_dir in args.stamps_dir:
|
||||
stamps_path = Path(stamps_dir)
|
||||
assert stamps_path.is_dir()
|
||||
re_sigdata = re.compile(r"do_.*.sigdata\.([^.]*)")
|
||||
all_sums |= set(
|
||||
[
|
||||
re_sigdata.search(x.parts[-1]).group(1)
|
||||
for x in stamps_path.glob("*/*/*.do_*.sigdata.*")
|
||||
]
|
||||
)
|
||||
re_setscene = re.compile(r"do_.*_setscene\.([^.]*)")
|
||||
all_sums |= set(
|
||||
[
|
||||
re_setscene.search(x.parts[-1]).group(1)
|
||||
for x in stamps_path.glob("*/*/*.do_*_setscene.*")
|
||||
]
|
||||
)
|
||||
return [p for p in paths if p.bb_unihash not in all_sums]
|
||||
|
||||
|
||||
def remove_duplicated(args, paths):
|
||||
# Skip populate_lic as it produces duplicates in a normal build
|
||||
#
|
||||
# 9ae16469e707 sstate-cache-management: skip populate_lic archives when removing duplicates
|
||||
valid_paths = [p for p in paths if p.bb_task != "populate_lic"]
|
||||
|
||||
keep = dict()
|
||||
remove = list()
|
||||
for p in valid_paths:
|
||||
sstate_sig = ":".join([p.pn, p.sstate_pkgarch, p.bb_task, p.ext])
|
||||
if sstate_sig not in keep:
|
||||
keep[sstate_sig] = p
|
||||
elif p.stat_result.st_mtime > keep[sstate_sig].stat_result.st_mtime:
|
||||
remove.append(keep[sstate_sig])
|
||||
keep[sstate_sig] = p
|
||||
else:
|
||||
remove.append(p)
|
||||
|
||||
return remove
|
||||
|
||||
|
||||
def remove_orphans(args, paths):
|
||||
remove = list()
|
||||
pathsigs = defaultdict(list)
|
||||
for p in paths:
|
||||
sstate_sig = ":".join([p.pn, p.sstate_pkgarch, p.bb_task])
|
||||
pathsigs[sstate_sig].append(p)
|
||||
for k, v in pathsigs.items():
|
||||
if len([p for p in v if p.ext == SSTATE_EXTENSION]) == 0:
|
||||
remove.extend(v)
|
||||
return remove
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(description="sstate cache management utility.")
|
||||
|
||||
parser.add_argument(
|
||||
"--cache-dir",
|
||||
default=os.environ.get("SSTATE_CACHE_DIR"),
|
||||
help="""Specify sstate cache directory, will use the environment
|
||||
variable SSTATE_CACHE_DIR if it is not specified.""",
|
||||
)
|
||||
|
||||
# parser.add_argument(
|
||||
# "--extra-archs",
|
||||
# help="""Specify list of architectures which should be tested, this list
|
||||
# will be extended with native arch, allarch and empty arch. The
|
||||
# script won't be trying to generate list of available archs from
|
||||
# AVAILTUNES in tune files.""",
|
||||
# )
|
||||
|
||||
# parser.add_argument(
|
||||
# "--extra-layer",
|
||||
# help="""Specify the layer which will be used for searching the archs,
|
||||
# it will search the meta and meta-* layers in the top dir by
|
||||
# default, and will search meta, meta-*, <layer1>, <layer2>,
|
||||
# ...<layern> when specified. Use "," as the separator.
|
||||
#
|
||||
# This is useless for --stamps-dir or when --extra-archs is used.""",
|
||||
# )
|
||||
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--remove-duplicated",
|
||||
action="store_true",
|
||||
help="""Remove the duplicated sstate cache files of one package, only
|
||||
the newest one will be kept. The duplicated sstate cache files
|
||||
of one package must have the same arch, which means sstate cache
|
||||
files with multiple archs are not considered duplicate.
|
||||
|
||||
Conflicts with --stamps-dir.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--remove-orphans",
|
||||
action="store_true",
|
||||
help=f"""Remove orphan siginfo files from the sstate cache, i.e. those
|
||||
where this is no {SSTATE_EXTENSION} file but there are associated
|
||||
tracking files.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--stamps-dir",
|
||||
action="append",
|
||||
help="""Specify the build directory's stamps directories, the sstate
|
||||
cache file which IS USED by these build diretories will be KEPT,
|
||||
other sstate cache files in cache-dir will be removed. Can be
|
||||
specified multiple times for several directories.
|
||||
|
||||
Conflicts with --remove-duplicated.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-j", "--jobs", default=8, type=int, help="Run JOBS jobs in parallel."
|
||||
)
|
||||
|
||||
# parser.add_argument(
|
||||
# "-L",
|
||||
# "--follow-symlink",
|
||||
# action="store_true",
|
||||
# help="Remove both the symbol link and the destination file, default: no.",
|
||||
# )
|
||||
|
||||
parser.add_argument(
|
||||
"-y",
|
||||
"--yes",
|
||||
action="store_true",
|
||||
help="""Automatic yes to prompts; assume "yes" as answer to all prompts
|
||||
and run non-interactively.""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Explain what is being done."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-D",
|
||||
"--debug",
|
||||
action="count",
|
||||
default=0,
|
||||
help="Show debug info, repeat for more debug info.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.cache_dir is None or (
|
||||
not args.remove_duplicated and not args.stamps_dir and not args.remove_orphans
|
||||
):
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
|
||||
paths = collect_sstate_paths(args)
|
||||
if args.remove_duplicated:
|
||||
remove = remove_duplicated(args, paths)
|
||||
elif args.stamps_dir:
|
||||
remove = remove_by_stamps(args, paths)
|
||||
else:
|
||||
remove = list()
|
||||
|
||||
if args.remove_orphans:
|
||||
remove = set(remove) | set(remove_orphans(args, paths))
|
||||
|
||||
if args.debug >= 1:
|
||||
print("\n".join([str(p.path) for p in remove]))
|
||||
print(f"{len(remove)} out of {len(paths)} files will be removed!")
|
||||
if not args.yes:
|
||||
print("Do you want to continue (y/n)?")
|
||||
confirm = input() in ("y", "Y")
|
||||
else:
|
||||
confirm = True
|
||||
if confirm:
|
||||
# TODO: parallelise remove
|
||||
for p in remove:
|
||||
p.path.unlink()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user