bitbake: fetch/git: add support for shallow mirror tarballs

This adds support to the git fetcher for fetching, using, and generating
mirror tarballs of shallow git repositories. The external git-make-shallow
script is used for shallow mirror tarball creation.

This implements support for shallow mirror tarballs, not shallow clones.
Supporting shallow clones directly is not really doable for us, as we'd need
to hardcode the depth between branch HEAD and the SRCREV, and that depth would
change as the branch is updated.

When BB_GIT_SHALLOW is enabled, we will always attempt to fetch a shallow
mirror tarball. If the shallow mirror tarball cannot be fetched, it will try
to fetch the full mirror tarball and use that. If a shallow tarball is to be
used, it will be unpacked directly at `do_unpack` time, rather than extracting
it to DL_DIR at `do_fetch` time and cloning from there, to keep things simple.
There's no value in keeping a shallow repository in DL_DIR, and dealing with
the state for when to convert the clonedir to/from shallow is not worthwhile.

To clarify when shallow is used vs a real repository, a current clone is
preferred to either tarball, a shallow tarball is preferred to an out of date
clone, and a missing clone will use either tarball (attempting the shallow one
first).

All referenced branches are truncated to SRCREV (that is, commits *after*
SRCREV but before HEAD are removed) to further shrink the repository. By
default, the shallow construction process removes all unused refs
(branches/tags) from the repository, other than those referenced by the URL.

Example usage:

    BB_GIT_SHALLOW ?= "1"

    # Keep only the top commit
    BB_GIT_SHALLOW_DEPTH ?= "1"

    # This defaults to enabled if both BB_GIT_SHALLOW and
    # BB_GENERATE_MIRROR_TARBALLS are enabled
    BB_GENERATE_SHALLOW_TARBALLS ?= "1"

(Bitbake rev: 5ed7d85fda7c671be10ec24d7981b87a7d0d3366)

Signed-off-by: Christopher Larson <chris_larson@mentor.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Christopher Larson 2017-05-13 02:46:28 +05:00 committed by Richard Purdie
parent 2a60c40637
commit 27d56982c7
2 changed files with 418 additions and 16 deletions

View File

@ -73,8 +73,9 @@ Supported SRC_URI options are:
import errno import errno
import os import os
import re import re
import subprocess
import tempfile
import bb import bb
import errno
import bb.progress import bb.progress
from bb.fetch2 import FetchMethod from bb.fetch2 import FetchMethod
from bb.fetch2 import runfetchcmd from bb.fetch2 import runfetchcmd
@ -172,6 +173,11 @@ class Git(FetchMethod):
branches = ud.parm.get("branch", "master").split(',') branches = ud.parm.get("branch", "master").split(',')
if len(branches) != len(ud.names): if len(branches) != len(ud.names):
raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url) raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url)
ud.cloneflags = "-s -n"
if ud.bareclone:
ud.cloneflags += " --mirror"
ud.branches = {} ud.branches = {}
for pos, name in enumerate(ud.names): for pos, name in enumerate(ud.names):
branch = branches[pos] branch = branches[pos]
@ -183,7 +189,9 @@ class Git(FetchMethod):
ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0" ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0"
ud.write_tarballs = ((d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0") != "0") or ud.rebaseable write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
ud.write_shallow_tarballs = (d.getVar("BB_GENERATE_SHALLOW_TARBALLS") or write_tarballs) != "0"
ud.setup_revisions(d) ud.setup_revisions(d)
@ -205,14 +213,49 @@ class Git(FetchMethod):
if ud.rebaseable: if ud.rebaseable:
for name in ud.names: for name in ud.names:
gitsrcname = gitsrcname + '_' + ud.revisions[name] gitsrcname = gitsrcname + '_' + ud.revisions[name]
mirrortarball = 'git2_%s.tar.gz' % gitsrcname
ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball)
ud.mirrortarballs = [mirrortarball]
gitdir = d.getVar("GITDIR") or (d.getVar("DL_DIR") + "/git2/")
ud.clonedir = os.path.join(gitdir, gitsrcname)
dl_dir = d.getVar("DL_DIR")
gitdir = d.getVar("GITDIR") or (dl_dir + "/git2/")
ud.clonedir = os.path.join(gitdir, gitsrcname)
ud.localfile = ud.clonedir ud.localfile = ud.clonedir
mirrortarball = 'git2_%s.tar.gz' % gitsrcname
ud.fullmirror = os.path.join(dl_dir, mirrortarball)
ud.mirrortarballs = [mirrortarball]
ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
if ud.shallow:
ud.shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH")
if ud.shallow_depth is not None:
try:
ud.shallow_depth = int(ud.shallow_depth or 0)
except ValueError:
raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % ud.shallow_depth)
else:
if not ud.shallow_depth:
ud.shallow = False
elif ud.shallow_depth < 0:
raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % ud.shallow_depth)
else:
ud.shallow_depth = 1
if ud.shallow:
tarballname = gitsrcname
if ud.bareclone:
tarballname = "%s_bare" % tarballname
for name, revision in sorted(ud.revisions.items()):
tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
if not ud.nobranch:
tarballname = "%s-%s" % (tarballname, ud.branches[name])
tarballname = "%s-%s" % (tarballname, ud.shallow_depth)
fetcher = self.__class__.__name__.lower()
ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
ud.fullshallow = os.path.join(dl_dir, ud.shallowtarball)
ud.mirrortarballs.insert(0, ud.shallowtarball)
def localpath(self, ud, d): def localpath(self, ud, d):
return ud.clonedir return ud.clonedir
@ -222,6 +265,8 @@ class Git(FetchMethod):
for name in ud.names: for name in ud.names:
if not self._contains_ref(ud, d, name, ud.clonedir): if not self._contains_ref(ud, d, name, ud.clonedir):
return True return True
if ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow):
return True
if ud.write_tarballs and not os.path.exists(ud.fullmirror): if ud.write_tarballs and not os.path.exists(ud.fullmirror):
return True return True
return False return False
@ -238,8 +283,16 @@ class Git(FetchMethod):
def download(self, ud, d): def download(self, ud, d):
"""Fetch url""" """Fetch url"""
# If the checkout doesn't exist and the mirror tarball does, extract it no_clone = not os.path.exists(ud.clonedir)
if not os.path.exists(ud.clonedir) and os.path.exists(ud.fullmirror): need_update = no_clone or self.need_update(ud, d)
# A current clone is preferred to either tarball, a shallow tarball is
# preferred to an out of date clone, and a missing clone will use
# either tarball.
if ud.shallow and os.path.exists(ud.fullshallow) and need_update:
ud.localpath = ud.fullshallow
return
elif os.path.exists(ud.fullmirror) and no_clone:
bb.utils.mkdirhier(ud.clonedir) bb.utils.mkdirhier(ud.clonedir)
runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir) runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
@ -285,9 +338,21 @@ class Git(FetchMethod):
raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name])) raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name]))
def build_mirror_data(self, ud, d): def build_mirror_data(self, ud, d):
# Generate a mirror tarball if needed if ud.shallow and ud.write_shallow_tarballs:
if ud.write_tarballs and not os.path.exists(ud.fullmirror): if not os.path.exists(ud.fullshallow):
# it's possible that this symlink points to read-only filesystem with PREMIRROR if os.path.islink(ud.fullshallow):
os.unlink(ud.fullshallow)
tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
shallowclone = os.path.join(tempdir, 'git')
try:
self.clone_shallow_local(ud, shallowclone, d)
logger.info("Creating tarball of git repository")
runfetchcmd("tar -czf %s ." % ud.fullshallow, d, workdir=shallowclone)
runfetchcmd("touch %s.done" % ud.fullshallow, d)
finally:
bb.utils.remove(tempdir, recurse=True)
elif ud.write_tarballs and not os.path.exists(ud.fullmirror):
if os.path.islink(ud.fullmirror): if os.path.islink(ud.fullmirror):
os.unlink(ud.fullmirror) os.unlink(ud.fullmirror)
@ -295,6 +360,43 @@ class Git(FetchMethod):
runfetchcmd("tar -czf %s ." % ud.fullmirror, d, workdir=ud.clonedir) runfetchcmd("tar -czf %s ." % ud.fullmirror, d, workdir=ud.clonedir)
runfetchcmd("touch %s.done" % ud.fullmirror, d) runfetchcmd("touch %s.done" % ud.fullmirror, d)
def clone_shallow_local(self, ud, dest, d):
"""Clone the repo and make it shallow.
The upstream url of the new clone isn't set at this time, as it'll be
set correctly when unpacked."""
runfetchcmd("%s clone %s %s %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, dest), d)
to_parse, shallow_branches = [], []
for name in ud.names:
revision = ud.revisions[name]
to_parse.append('%s~%d^{}' % (revision, ud.shallow_depth - 1))
# For nobranch, we need a ref, otherwise the commits will be
# removed, and for non-nobranch, we truncate the branch to our
# srcrev, to avoid keeping unnecessary history beyond that.
branch = ud.branches[name]
if ud.nobranch:
ref = "refs/shallow/%s" % name
elif ud.bareclone:
ref = "refs/heads/%s" % branch
else:
ref = "refs/remotes/origin/%s" % branch
shallow_branches.append(ref)
runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
# Map srcrev+depths to revisions
shallow_revisions = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest).splitlines()
# Make the repository shallow
shallow_cmd = ['git', 'make-shallow', '-s']
for b in shallow_branches:
shallow_cmd.append('-r')
shallow_cmd.append(b)
shallow_cmd.extend(shallow_revisions)
runfetchcmd(subprocess.list2cmdline(shallow_cmd), d, workdir=dest)
def unpack(self, ud, destdir, d): def unpack(self, ud, destdir, d):
""" unpack the downloaded src to destdir""" """ unpack the downloaded src to destdir"""
@ -311,11 +413,12 @@ class Git(FetchMethod):
if os.path.exists(destdir): if os.path.exists(destdir):
bb.utils.prunedir(destdir) bb.utils.prunedir(destdir)
cloneflags = "-s -n" if ud.shallow and (not os.path.exists(ud.clonedir) or self.need_update(ud, d)):
if ud.bareclone: bb.utils.mkdirhier(destdir)
cloneflags += " --mirror" runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir)
else:
runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, cloneflags, ud.clonedir, destdir), d)
repourl = self._get_repo_url(ud) repourl = self._get_repo_url(ud)
runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, repourl), d, workdir=destdir) runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, repourl), d, workdir=destdir)
if not ud.nocheckout: if not ud.nocheckout:

View File

@ -979,3 +979,302 @@ class GitMakeShallowTest(FetcherTest):
orig_revs = len(self.git('rev-list --all').splitlines()) orig_revs = len(self.git('rev-list --all').splitlines())
self.make_shallow(['refs/tags/1.10.0']) self.make_shallow(['refs/tags/1.10.0'])
self.assertRevCount(orig_revs - 1746, ['--all']) self.assertRevCount(orig_revs - 1746, ['--all'])
class GitShallowTest(FetcherTest):
def setUp(self):
FetcherTest.setUp(self)
self.gitdir = os.path.join(self.tempdir, 'git')
self.srcdir = os.path.join(self.tempdir, 'gitsource')
bb.utils.mkdirhier(self.srcdir)
self.git('init', cwd=self.srcdir)
self.d.setVar('WORKDIR', self.tempdir)
self.d.setVar('S', self.gitdir)
self.d.delVar('PREMIRRORS')
self.d.delVar('MIRRORS')
uri = 'git://%s;protocol=file;subdir=${S}' % self.srcdir
self.d.setVar('SRC_URI', uri)
self.d.setVar('SRCREV', '${AUTOREV}')
self.d.setVar('AUTOREV', '${@bb.fetch2.get_autorev(d)}')
self.d.setVar('BB_GIT_SHALLOW', '1')
self.d.setVar('BB_GENERATE_MIRROR_TARBALLS', '0')
self.d.setVar('BB_GENERATE_SHALLOW_TARBALLS', '1')
def assertRefs(self, expected_refs, cwd=None):
if cwd is None:
cwd = self.gitdir
actual_refs = self.git(['for-each-ref', '--format=%(refname)'], cwd=cwd).splitlines()
full_expected = self.git(['rev-parse', '--symbolic-full-name'] + expected_refs, cwd=cwd).splitlines()
self.assertEqual(sorted(set(full_expected)), sorted(set(actual_refs)))
def assertRevCount(self, expected_count, args=None, cwd=None):
if args is None:
args = ['HEAD']
if cwd is None:
cwd = self.gitdir
revs = self.git(['rev-list'] + args, cwd=cwd)
actual_count = len(revs.splitlines())
self.assertEqual(expected_count, actual_count, msg='Object count `%d` is not the expected `%d`' % (actual_count, expected_count))
def git(self, cmd, cwd=None):
if isinstance(cmd, str):
cmd = 'git ' + cmd
else:
cmd = ['git'] + cmd
if cwd is None:
cwd = self.gitdir
return bb.process.run(cmd, cwd=cwd)[0]
def add_empty_file(self, path, msg=None):
if msg is None:
msg = path
open(os.path.join(self.srcdir, path), 'w').close()
self.git(['add', path], self.srcdir)
self.git(['commit', '-m', msg, path], self.srcdir)
def fetch(self, uri=None):
if uri is None:
uris = self.d.getVar('SRC_URI', True).split()
uri = uris[0]
d = self.d
else:
d = self.d.createCopy()
d.setVar('SRC_URI', uri)
uri = d.expand(uri)
uris = [uri]
fetcher = bb.fetch2.Fetch(uris, d)
fetcher.download()
ud = fetcher.ud[uri]
return fetcher, ud
def fetch_and_unpack(self, uri=None):
fetcher, ud = self.fetch(uri)
fetcher.unpack(self.d.getVar('WORKDIR'))
assert os.path.exists(self.d.getVar('S'))
return fetcher, ud
def fetch_shallow(self, uri=None, disabled=False, keepclone=False):
"""Fetch a uri, generating a shallow tarball, then unpack using it"""
fetcher, ud = self.fetch_and_unpack(uri)
assert os.path.exists(ud.clonedir), 'Git clone in DLDIR (%s) does not exist for uri %s' % (ud.clonedir, uri)
# Confirm that the unpacked repo is unshallow
if not disabled:
assert os.path.exists(os.path.join(self.dldir, ud.mirrortarballs[0]))
# fetch and unpack, from the shallow tarball
bb.utils.remove(self.gitdir, recurse=True)
bb.utils.remove(ud.clonedir, recurse=True)
# confirm that the unpacked repo is used when no git clone or git
# mirror tarball is available
fetcher, ud = self.fetch_and_unpack(uri)
if not disabled:
assert os.path.exists(os.path.join(self.gitdir, '.git', 'shallow')), 'Unpacked git repository at %s is not shallow' % self.gitdir
else:
assert not os.path.exists(os.path.join(self.gitdir, '.git', 'shallow')), 'Unpacked git repository at %s is shallow' % self.gitdir
return fetcher, ud
def test_shallow_disabled(self):
self.add_empty_file('a')
self.add_empty_file('b')
self.assertRevCount(2, cwd=self.srcdir)
self.d.setVar('BB_GIT_SHALLOW', '0')
self.fetch_shallow(disabled=True)
self.assertRevCount(2)
def test_shallow_nobranch(self):
self.add_empty_file('a')
self.add_empty_file('b')
self.assertRevCount(2, cwd=self.srcdir)
srcrev = self.git('rev-parse HEAD', cwd=self.srcdir).strip()
self.d.setVar('SRCREV', srcrev)
uri = self.d.getVar('SRC_URI', True).split()[0]
uri = '%s;nobranch=1;bare=1' % uri
self.fetch_shallow(uri)
self.assertRevCount(1)
# shallow refs are used to ensure the srcrev sticks around when we
# have no other branches referencing it
self.assertRefs(['refs/shallow/default'])
def test_shallow_default_depth_1(self):
# Create initial git repo
self.add_empty_file('a')
self.add_empty_file('b')
self.assertRevCount(2, cwd=self.srcdir)
self.fetch_shallow()
self.assertRevCount(1)
def test_shallow_depth_0_disables(self):
self.add_empty_file('a')
self.add_empty_file('b')
self.assertRevCount(2, cwd=self.srcdir)
self.d.setVar('BB_GIT_SHALLOW_DEPTH', '0')
self.fetch_shallow(disabled=True)
self.assertRevCount(2)
def test_current_shallow_out_of_date_clone(self):
# Create initial git repo
self.add_empty_file('a')
self.add_empty_file('b')
self.add_empty_file('c')
self.assertRevCount(3, cwd=self.srcdir)
# Clone and generate mirror tarball
fetcher, ud = self.fetch()
# Ensure we have a current mirror tarball, but an out of date clone
self.git('update-ref refs/heads/master refs/heads/master~1', cwd=ud.clonedir)
self.assertRevCount(2, cwd=ud.clonedir)
# Fetch and unpack, from the current tarball, not the out of date clone
bb.utils.remove(self.gitdir, recurse=True)
fetcher, ud = self.fetch()
fetcher.unpack(self.d.getVar('WORKDIR'))
self.assertRevCount(1)
def test_shallow_single_branch_no_merge(self):
self.add_empty_file('a')
self.add_empty_file('b')
self.assertRevCount(2, cwd=self.srcdir)
self.fetch_shallow()
self.assertRevCount(1)
assert os.path.exists(os.path.join(self.gitdir, 'a'))
assert os.path.exists(os.path.join(self.gitdir, 'b'))
def test_shallow_no_dangling(self):
self.add_empty_file('a')
self.add_empty_file('b')
self.assertRevCount(2, cwd=self.srcdir)
self.fetch_shallow()
self.assertRevCount(1)
assert not self.git('fsck --dangling')
def test_shallow_srcrev_branch_truncation(self):
self.add_empty_file('a')
self.add_empty_file('b')
b_commit = self.git('rev-parse HEAD', cwd=self.srcdir).rstrip()
self.add_empty_file('c')
self.assertRevCount(3, cwd=self.srcdir)
self.d.setVar('SRCREV', b_commit)
self.fetch_shallow()
# The 'c' commit was removed entirely, and 'a' was removed from history
self.assertRevCount(1, ['--all'])
self.assertEqual(self.git('rev-parse HEAD').strip(), b_commit)
assert os.path.exists(os.path.join(self.gitdir, 'a'))
assert os.path.exists(os.path.join(self.gitdir, 'b'))
assert not os.path.exists(os.path.join(self.gitdir, 'c'))
def test_shallow_ref_pruning(self):
self.add_empty_file('a')
self.add_empty_file('b')
self.git('branch a_branch', cwd=self.srcdir)
self.assertRefs(['master', 'a_branch'], cwd=self.srcdir)
self.assertRevCount(2, cwd=self.srcdir)
self.fetch_shallow()
self.assertRefs(['master', 'origin/master'])
self.assertRevCount(1)
def test_shallow_multi_one_uri(self):
# Create initial git repo
self.add_empty_file('a')
self.add_empty_file('b')
self.git('checkout -b a_branch', cwd=self.srcdir)
self.add_empty_file('c')
self.add_empty_file('d')
self.git('checkout master', cwd=self.srcdir)
self.add_empty_file('e')
self.git('merge --no-ff --no-edit a_branch', cwd=self.srcdir)
self.add_empty_file('f')
self.assertRevCount(7, cwd=self.srcdir)
uri = self.d.getVar('SRC_URI', True).split()[0]
uri = '%s;branch=master,a_branch;name=master,a_branch' % uri
self.d.setVar('BB_GIT_SHALLOW_DEPTH', '2')
self.d.setVar('SRCREV_master', '${AUTOREV}')
self.d.setVar('SRCREV_a_branch', '${AUTOREV}')
self.fetch_shallow(uri)
self.assertRevCount(3, ['--all'])
self.assertRefs(['master', 'origin/master', 'origin/a_branch'])
def test_shallow_clone_preferred_over_shallow(self):
self.add_empty_file('a')
self.add_empty_file('b')
# Fetch once to generate the shallow tarball
fetcher, ud = self.fetch()
assert os.path.exists(os.path.join(self.dldir, ud.mirrortarballs[0]))
# Fetch and unpack with both the clonedir and shallow tarball available
bb.utils.remove(self.gitdir, recurse=True)
fetcher, ud = self.fetch_and_unpack()
# The unpacked tree should *not* be shallow
self.assertRevCount(2)
assert not os.path.exists(os.path.join(self.gitdir, '.git', 'shallow'))
def test_shallow_mirrors(self):
self.add_empty_file('a')
self.add_empty_file('b')
# Fetch once to generate the shallow tarball
fetcher, ud = self.fetch()
mirrortarball = ud.mirrortarballs[0]
assert os.path.exists(os.path.join(self.dldir, mirrortarball))
# Set up the mirror
mirrordir = os.path.join(self.tempdir, 'mirror')
bb.utils.mkdirhier(mirrordir)
self.d.setVar('PREMIRRORS', 'git://.*/.* file://%s/\n' % mirrordir)
os.rename(os.path.join(self.dldir, mirrortarball),
os.path.join(mirrordir, mirrortarball))
# Fetch from the mirror
bb.utils.remove(self.dldir, recurse=True)
bb.utils.remove(self.gitdir, recurse=True)
self.fetch_and_unpack()
self.assertRevCount(1)
def test_shallow_invalid_depth(self):
self.add_empty_file('a')
self.add_empty_file('b')
self.d.setVar('BB_GIT_SHALLOW_DEPTH', '-12')
with self.assertRaises(bb.fetch2.FetchError):
self.fetch()
if os.environ.get("BB_SKIP_NETTESTS") == "yes":
print("Unset BB_SKIP_NETTESTS to run network tests")
else:
def test_bitbake(self):
self.git('remote add --mirror=fetch origin git://github.com/openembedded/bitbake', cwd=self.srcdir)
self.git('config core.bare true', cwd=self.srcdir)
self.git('fetch --tags', cwd=self.srcdir)
self.d.setVar('BB_GIT_SHALLOW_DEPTH', '100')
self.fetch_shallow()
orig_revs = len(self.git('rev-list master', cwd=self.srcdir).splitlines())
revs = len(self.git('rev-list master').splitlines())
self.assertNotEqual(orig_revs, revs)
self.assertRefs(['master', 'origin/master'])