mirror of
git://git.yoctoproject.org/poky.git
synced 2025-07-19 21:09:03 +02:00
bitbake: git-make-shallow: add script to make a git repo shallow
This script will be used by the git fetcher to create shallow mirror tarballs. usage: git-make-shallow [-h] [--ref REF] [--shrink] REVISION [REVISION ...] Remove the history of the specified revisions, then optionally filter the available refs to those specified. positional arguments: REVISION a git revision/commit optional arguments: -h, --help show this help message and exit --ref REF, -r REF remove all but the specified refs (cumulative) --shrink, -s shrink the git repository by repacking and pruning While git does provide the ability to clone at a specific depth, and fetch all remote refs at a particular depth, the depth is across all branches/tags, and doesn't provide the flexibility we need, hence this script. Refs (branches+tags) can be filtered, as the process of history removal scales up rapidly with the number of refs. Even the existing `git fetch --depth=` is extremely slow on an upstream kernel repository with all the branches and tags kept. This uses the same underlying mechanism to implement the history removal which git itself uses (.git/shallow), and the results, when configured similarly, are in line with the results git itself produces with `fetch --depth`. (Bitbake rev: 0254020f0e1911c0eaf99111b91828d2a74a4ee1) Signed-off-by: Christopher Larson <chris_larson@mentor.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
parent
ab4e578b86
commit
2a60c40637
165
bitbake/bin/git-make-shallow
Executable file
165
bitbake/bin/git-make-shallow
Executable file
|
@ -0,0 +1,165 @@
|
|||
#!/usr/bin/env python3
|
||||
"""git-make-shallow: make the current git repository shallow
|
||||
|
||||
Remove the history of the specified revisions, then optionally filter the
|
||||
available refs to those specified.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import errno
|
||||
import itertools
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
version = 1.0
|
||||
|
||||
|
||||
def main():
|
||||
if sys.version_info < (3, 4, 0):
|
||||
sys.exit('Python 3.4 or greater is required')
|
||||
|
||||
git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip()
|
||||
shallow_file = os.path.join(git_dir, 'shallow')
|
||||
if os.path.exists(shallow_file):
|
||||
try:
|
||||
check_output(['git', 'fetch', '--unshallow'])
|
||||
except subprocess.CalledProcessError:
|
||||
try:
|
||||
os.unlink(shallow_file)
|
||||
except OSError as exc:
|
||||
if exc.errno != errno.ENOENT:
|
||||
raise
|
||||
|
||||
args = process_args()
|
||||
revs = check_output(['git', 'rev-list'] + args.revisions).splitlines()
|
||||
|
||||
make_shallow(shallow_file, args.revisions, args.refs)
|
||||
|
||||
ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines()
|
||||
remaining_history = set(revs) & set(ref_revs)
|
||||
for rev in remaining_history:
|
||||
if check_output(['git', 'rev-parse', '{}^@'.format(rev)]):
|
||||
sys.exit('Error: %s was not made shallow' % rev)
|
||||
|
||||
filter_refs(args.refs)
|
||||
|
||||
if args.shrink:
|
||||
shrink_repo(git_dir)
|
||||
subprocess.check_call(['git', 'fsck', '--unreachable'])
|
||||
|
||||
|
||||
def process_args():
|
||||
# TODO: add argument to automatically keep local-only refs, since they
|
||||
# can't be easily restored with a git fetch.
|
||||
parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
|
||||
parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
|
||||
parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
|
||||
parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
|
||||
if len(sys.argv) < 2:
|
||||
parser.print_help()
|
||||
sys.exit(2)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.refs:
|
||||
args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
|
||||
else:
|
||||
args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
|
||||
|
||||
args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
|
||||
args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
|
||||
return args
|
||||
|
||||
|
||||
def check_output(cmd, input=None):
|
||||
return subprocess.check_output(cmd, universal_newlines=True, input=input)
|
||||
|
||||
|
||||
def make_shallow(shallow_file, revisions, refs):
|
||||
"""Remove the history of the specified revisions."""
|
||||
for rev in follow_history_intersections(revisions, refs):
|
||||
print("Processing %s" % rev)
|
||||
with open(shallow_file, 'a') as f:
|
||||
f.write(rev + '\n')
|
||||
|
||||
|
||||
def get_all_refs(ref_filter=None):
|
||||
"""Return all the existing refs in this repository, optionally filtering the refs."""
|
||||
ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
|
||||
ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
|
||||
if ref_filter:
|
||||
ref_split = (e for e in ref_split if ref_filter(*e))
|
||||
refs = [r[0] for r in ref_split]
|
||||
return refs
|
||||
|
||||
|
||||
def iter_extend(iterable, length, obj=None):
|
||||
"""Ensure that iterable is the specified length by extending with obj."""
|
||||
return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
|
||||
|
||||
|
||||
def filter_refs(refs):
|
||||
"""Remove all but the specified refs from the git repository."""
|
||||
all_refs = get_all_refs()
|
||||
to_remove = set(all_refs) - set(refs)
|
||||
if to_remove:
|
||||
check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'],
|
||||
input=''.join(l + '\0' for l in to_remove))
|
||||
|
||||
|
||||
def follow_history_intersections(revisions, refs):
|
||||
"""Determine all the points where the history of the specified revisions intersects the specified refs."""
|
||||
queue = collections.deque(revisions)
|
||||
seen = set()
|
||||
|
||||
for rev in iter_except(queue.popleft, IndexError):
|
||||
if rev in seen:
|
||||
continue
|
||||
|
||||
parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines()
|
||||
|
||||
yield rev
|
||||
seen.add(rev)
|
||||
|
||||
if not parents:
|
||||
continue
|
||||
|
||||
check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines()
|
||||
for parent in parents:
|
||||
for ref in check_refs:
|
||||
print("Checking %s vs %s" % (parent, ref))
|
||||
try:
|
||||
merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip()
|
||||
except subprocess.CalledProcessError:
|
||||
continue
|
||||
else:
|
||||
queue.append(merge_base)
|
||||
|
||||
|
||||
def iter_except(func, exception, start=None):
|
||||
"""Yield a function repeatedly until it raises an exception."""
|
||||
try:
|
||||
if start is not None:
|
||||
yield start()
|
||||
while True:
|
||||
yield func()
|
||||
except exception:
|
||||
pass
|
||||
|
||||
|
||||
def shrink_repo(git_dir):
|
||||
"""Shrink the newly shallow repository, removing the unreachable objects."""
|
||||
subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all'])
|
||||
subprocess.check_call(['git', 'repack', '-ad'])
|
||||
try:
|
||||
os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
|
||||
except OSError as exc:
|
||||
if exc.errno != errno.ENOENT:
|
||||
raise
|
||||
subprocess.check_call(['git', 'prune', '--expire', 'now'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -852,3 +852,130 @@ class FetchCheckStatusTest(FetcherTest):
|
|||
self.assertTrue(ret, msg="URI %s, can't check status" % (u))
|
||||
|
||||
connection_cache.close_connections()
|
||||
|
||||
|
||||
class GitMakeShallowTest(FetcherTest):
|
||||
bitbake_dir = os.path.join(os.path.dirname(os.path.join(__file__)), '..', '..', '..')
|
||||
make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
|
||||
|
||||
def setUp(self):
|
||||
FetcherTest.setUp(self)
|
||||
self.gitdir = os.path.join(self.tempdir, 'gitshallow')
|
||||
bb.utils.mkdirhier(self.gitdir)
|
||||
bb.process.run('git init', cwd=self.gitdir)
|
||||
|
||||
def assertRefs(self, expected_refs):
|
||||
actual_refs = self.git(['for-each-ref', '--format=%(refname)']).splitlines()
|
||||
full_expected = self.git(['rev-parse', '--symbolic-full-name'] + expected_refs).splitlines()
|
||||
self.assertEqual(sorted(full_expected), sorted(actual_refs))
|
||||
|
||||
def assertRevCount(self, expected_count, args=None):
|
||||
if args is None:
|
||||
args = ['HEAD']
|
||||
revs = self.git(['rev-list'] + args)
|
||||
actual_count = len(revs.splitlines())
|
||||
self.assertEqual(expected_count, actual_count, msg='Object count `%d` is not the expected `%d`' % (actual_count, expected_count))
|
||||
|
||||
def git(self, cmd):
|
||||
if isinstance(cmd, str):
|
||||
cmd = 'git ' + cmd
|
||||
else:
|
||||
cmd = ['git'] + cmd
|
||||
return bb.process.run(cmd, cwd=self.gitdir)[0]
|
||||
|
||||
def make_shallow(self, args=None):
|
||||
if args is None:
|
||||
args = ['HEAD']
|
||||
return bb.process.run([self.make_shallow_path] + args, cwd=self.gitdir)
|
||||
|
||||
def add_empty_file(self, path, msg=None):
|
||||
if msg is None:
|
||||
msg = path
|
||||
open(os.path.join(self.gitdir, path), 'w').close()
|
||||
self.git(['add', path])
|
||||
self.git(['commit', '-m', msg, path])
|
||||
|
||||
def test_make_shallow_single_branch_no_merge(self):
|
||||
self.add_empty_file('a')
|
||||
self.add_empty_file('b')
|
||||
self.assertRevCount(2)
|
||||
self.make_shallow()
|
||||
self.assertRevCount(1)
|
||||
|
||||
def test_make_shallow_single_branch_one_merge(self):
|
||||
self.add_empty_file('a')
|
||||
self.add_empty_file('b')
|
||||
self.git('checkout -b a_branch')
|
||||
self.add_empty_file('c')
|
||||
self.git('checkout master')
|
||||
self.add_empty_file('d')
|
||||
self.git('merge --no-ff --no-edit a_branch')
|
||||
self.git('branch -d a_branch')
|
||||
self.add_empty_file('e')
|
||||
self.assertRevCount(6)
|
||||
self.make_shallow(['HEAD~2'])
|
||||
self.assertRevCount(5)
|
||||
|
||||
def test_make_shallow_at_merge(self):
|
||||
self.add_empty_file('a')
|
||||
self.git('checkout -b a_branch')
|
||||
self.add_empty_file('b')
|
||||
self.git('checkout master')
|
||||
self.git('merge --no-ff --no-edit a_branch')
|
||||
self.git('branch -d a_branch')
|
||||
self.assertRevCount(3)
|
||||
self.make_shallow()
|
||||
self.assertRevCount(1)
|
||||
|
||||
def test_make_shallow_annotated_tag(self):
|
||||
self.add_empty_file('a')
|
||||
self.add_empty_file('b')
|
||||
self.git('tag -a -m a_tag a_tag')
|
||||
self.assertRevCount(2)
|
||||
self.make_shallow(['a_tag'])
|
||||
self.assertRevCount(1)
|
||||
|
||||
def test_make_shallow_multi_ref(self):
|
||||
self.add_empty_file('a')
|
||||
self.add_empty_file('b')
|
||||
self.git('checkout -b a_branch')
|
||||
self.add_empty_file('c')
|
||||
self.git('checkout master')
|
||||
self.add_empty_file('d')
|
||||
self.git('checkout -b a_branch_2')
|
||||
self.add_empty_file('a_tag')
|
||||
self.git('tag a_tag')
|
||||
self.git('checkout master')
|
||||
self.git('branch -D a_branch_2')
|
||||
self.add_empty_file('e')
|
||||
self.assertRevCount(6, ['--all'])
|
||||
self.make_shallow()
|
||||
self.assertRevCount(5, ['--all'])
|
||||
|
||||
def test_make_shallow_multi_ref_trim(self):
|
||||
self.add_empty_file('a')
|
||||
self.git('checkout -b a_branch')
|
||||
self.add_empty_file('c')
|
||||
self.git('checkout master')
|
||||
self.assertRevCount(1)
|
||||
self.assertRevCount(2, ['--all'])
|
||||
self.assertRefs(['master', 'a_branch'])
|
||||
self.make_shallow(['-r', 'master', 'HEAD'])
|
||||
self.assertRevCount(1, ['--all'])
|
||||
self.assertRefs(['master'])
|
||||
|
||||
def test_make_shallow_noop(self):
|
||||
self.add_empty_file('a')
|
||||
self.assertRevCount(1)
|
||||
self.make_shallow()
|
||||
self.assertRevCount(1)
|
||||
|
||||
if os.environ.get("BB_SKIP_NETTESTS") == "yes":
|
||||
print("Unset BB_SKIP_NETTESTS to run network tests")
|
||||
else:
|
||||
def test_make_shallow_bitbake(self):
|
||||
self.git('remote add origin https://github.com/openembedded/bitbake')
|
||||
self.git('fetch --tags origin')
|
||||
orig_revs = len(self.git('rev-list --all').splitlines())
|
||||
self.make_shallow(['refs/tags/1.10.0'])
|
||||
self.assertRevCount(orig_revs - 1746, ['--all'])
|
||||
|
|
Loading…
Reference in New Issue
Block a user