go-mod-fetcher: fix shallow clone handling, duplicates, and discovery workflow

oe-go-mod-fetcher.py:
  - Remove BB_GIT_SHALLOW_EXTRA_REFS generation - refs must be present in
    ALL repositories which isn't the case for module dependencies. Instead,
    use tag= parameter in individual SRC_URI entries.
  - Add tag=<tagname> to SRC_URI when ref is a tag, allowing BitBake's
    shallow clone to include the necessary tag (with BB_GIT_SHALLOW=1)
  - Remove premature _ref_points_to_commit() check that was clearing
    ref_hints before repos were fetched, preventing tag= from being added
  - Fix pseudo-version verification: only use shallow fetch for actual
    tags (refs/tags/...), not branch refs. Pseudo-versions with branch
    refs (refs/heads/...) now correctly use unshallow path to reach
    historical commits that aren't fetchable with depth=1

  oe-go-mod-fetcher-hybrid.py:
  - Fix duplicate SRC_URI entries when multiple modules share the same
    git repo/commit (e.g., errdefs and errdefs/pkg). Track added vcs_hashes
    to skip duplicates.
  - Add --discovery-cache option to calculate module sizes from discovery
    cache .zip files, enabling size recommendations during discover_and_generate

  go-mod-discovery.bbclass:
  - Add automatic hybrid mode recommendations after generate_modules,
    showing module sizes and suggested --git prefixes for conversion
  - Add GO_MOD_DISCOVERY_SKIP_VERIFY variable to skip commit verification
    on retries (useful after fixing verification issues)
  - Pass --discovery-cache to hybrid script for accurate size calculations

Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
This commit is contained in:
Bruce Ashfield 2026-01-04 11:40:15 -05:00
parent d743412952
commit dbf720ccb0
3 changed files with 135 additions and 24 deletions

View File

@ -117,6 +117,11 @@ GO_MOD_DISCOVERY_GIT_REF ?= "${SRCREV}"
# Recipe directory for generated .inc files - defaults to recipe's directory
GO_MOD_DISCOVERY_RECIPEDIR ?= "${FILE_DIRNAME}"
# Skip commit verification during generation (use cached results only)
# Set to "1" to skip verification on retries after initial discovery
# Usage: GO_MOD_DISCOVERY_SKIP_VERIFY = "1" in local.conf or recipe
GO_MOD_DISCOVERY_SKIP_VERIFY ?= ""
# Empty default for TAGS if not set by recipe (avoids undefined variable errors)
TAGS ?= ""
@ -384,11 +389,19 @@ Or run 'bitbake ${PN} -c show_upgrade_commands' to see manual options."
bbfatal "Could not find oe-go-mod-fetcher.py in any layer"
fi
# Build fetcher command with optional flags
SKIP_VERIFY_FLAG=""
if [ "${GO_MOD_DISCOVERY_SKIP_VERIFY}" = "1" ]; then
echo "NOTE: Skipping commit verification (GO_MOD_DISCOVERY_SKIP_VERIFY=1)"
SKIP_VERIFY_FLAG="--skip-verify"
fi
python3 "${FETCHER_SCRIPT}" \
--discovered-modules "${GO_MOD_DISCOVERY_MODULES_JSON}" \
--git-repo "${GO_MOD_DISCOVERY_GIT_REPO}" \
--git-ref "${GO_MOD_DISCOVERY_GIT_REF}" \
--recipedir "${GO_MOD_DISCOVERY_RECIPEDIR}"
--recipedir "${GO_MOD_DISCOVERY_RECIPEDIR}" \
${SKIP_VERIFY_FLAG}
if [ $? -eq 0 ]; then
echo ""
@ -411,6 +424,70 @@ addtask generate_modules
do_generate_modules[nostamp] = "1"
do_generate_modules[vardeps] += "GO_MOD_DISCOVERY_MODULES_JSON GO_MOD_DISCOVERY_GIT_REPO \
GO_MOD_DISCOVERY_GIT_REF GO_MOD_DISCOVERY_RECIPEDIR"
do_generate_modules[postfuncs] = "do_show_hybrid_recommendation"
# Show hybrid conversion recommendations after VCS generation
python do_show_hybrid_recommendation() {
"""
Show recommendations for converting to hybrid gomod:// + git:// mode.
Runs automatically after generate_modules completes.
"""
import subprocess
from pathlib import Path
recipedir = d.getVar('GO_MOD_DISCOVERY_RECIPEDIR')
git_inc = Path(recipedir) / 'go-mod-git.inc'
if not git_inc.exists():
return
# Find the hybrid script
layerdir = None
for layer in d.getVar('BBLAYERS').split():
if 'meta-virtualization' in layer:
layerdir = layer
break
if not layerdir:
return
scriptpath = Path(layerdir) / "scripts" / "oe-go-mod-fetcher-hybrid.py"
if not scriptpath.exists():
return
bb.plain("")
bb.plain("=" * 70)
bb.plain("HYBRID MODE RECOMMENDATION")
bb.plain("=" * 70)
cmd = ['python3', str(scriptpath), '--recipedir', recipedir, '--recommend']
# Try to find module sizes from discovery cache or vcs_cache
discovery_dir = d.getVar('GO_MOD_DISCOVERY_DIR')
workdir = d.getVar('WORKDIR')
# Check discovery cache first (has .zip files with accurate sizes)
if discovery_dir:
discovery_cache = Path(discovery_dir) / 'cache' / 'cache' / 'download'
if discovery_cache.exists():
cmd.extend(['--discovery-cache', str(discovery_cache)])
# Also check vcs_cache if it exists (from a previous build)
if workdir:
vcs_cache = Path(workdir) / 'sources' / 'vcs_cache'
if vcs_cache.exists():
cmd.extend(['--workdir', workdir])
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.stdout:
for line in result.stdout.splitlines():
bb.plain(line)
bb.plain("")
bb.plain("")
except Exception as e:
bb.note(f"Could not run hybrid recommendation: {e}")
}
# =============================================================================
# TASK 4: do_discover_and_generate - All-in-one convenience task
@ -443,7 +520,7 @@ addtask discover_and_generate after do_unpack
do_discover_and_generate[depends] = "${PN}:do_prepare_recipe_sysroot"
do_discover_and_generate[network] = "1"
do_discover_and_generate[nostamp] = "1"
do_discover_and_generate[postfuncs] = "do_discover_modules do_extract_modules do_generate_modules"
do_discover_and_generate[postfuncs] = "do_discover_modules do_extract_modules do_generate_modules do_show_hybrid_recommendation"
# =============================================================================
# TASK: do_clean_discovery - Clean the persistent cache

View File

@ -241,6 +241,35 @@ def get_repo_sizes(vcs_info: dict, workdir: Optional[Path] = None) -> dict[str,
return sizes
def get_discovery_sizes(modules: list[dict], discovery_cache: Optional[Path] = None) -> dict[str, int]:
"""Get sizes of modules from discovery cache .zip files."""
sizes = {}
if discovery_cache is None or not discovery_cache.exists():
return sizes
for mod in modules:
module_path = mod.get('module', '')
version = mod.get('version', '')
vcs_hash = mod.get('vcs_hash', '')
if not module_path or not version or not vcs_hash:
continue
# Build path to .zip file: discovery_cache/<module>/@v/<version>.zip
zip_path = discovery_cache / module_path / '@v' / f'{version}.zip'
if zip_path.exists():
try:
size = zip_path.stat().st_size
# Accumulate size by vcs_hash (same repo may have multiple modules)
sizes[vcs_hash] = sizes.get(vcs_hash, 0) + size
except OSError:
pass
return sizes
def format_size(size_bytes: int) -> str:
"""Format bytes as human readable."""
for unit in ['B', 'KB', 'MB', 'GB']:
@ -604,6 +633,9 @@ def main():
parser.add_argument('--workdir', type=Path, default=None,
help='BitBake workdir containing vcs_cache (for size calculations)')
parser.add_argument('--discovery-cache', type=Path, default=None,
help='Discovery cache directory containing module .zip files (for size calculations)')
# Actions
parser.add_argument('--list', action='store_true',
help='List all modules with sizes')
@ -650,12 +682,20 @@ def main():
vcs_info = parse_go_mod_git_inc(git_inc)
print(f" Found {len(vcs_info)} VCS entries")
# Get sizes if workdir provided
# Get sizes from discovery cache and/or workdir
sizes = {}
if args.discovery_cache:
print(f"Calculating sizes from discovery cache {args.discovery_cache}...")
sizes = get_discovery_sizes(modules, args.discovery_cache)
print(f" Got sizes for {len(sizes)} modules from discovery cache")
if args.workdir:
print(f"Calculating sizes from {args.workdir}...")
sizes = get_repo_sizes(vcs_info, args.workdir)
print(f" Got sizes for {len(sizes)} repos")
vcs_sizes = get_repo_sizes(vcs_info, args.workdir)
print(f" Got sizes for {len(vcs_sizes)} repos from vcs_cache")
# Merge vcs_sizes into sizes (vcs_cache sizes override discovery if both exist)
for k, v in vcs_sizes.items():
sizes[k] = v
# Handle actions
if args.list:

View File

@ -815,7 +815,10 @@ def verify_commit_accessible(vcs_url: str, commit: str, ref_hint: str = "", vers
# Strategy depends on whether this is a tagged version or pseudo-version
commit_fetched = commit_present # If already present, no need to fetch
if ref_hint and not commit_present:
# Only use shallow fetch for actual tags - pseudo-versions with branch refs need unshallow
is_tag_ref = ref_hint and ref_hint.startswith('refs/tags/')
if is_tag_ref and not commit_present:
# Tagged version: try shallow fetch of the specific commit (only if not already present)
try:
fetch_cmd = ["git", "fetch", "--depth=1", "origin", commit]
@ -3924,9 +3927,9 @@ def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[
else:
commit_sha = repo_info['commits'][commit_hash]['commit_sha']
# Trust the ref_hint from discovery - it will be validated/corrected during
# the verification pass if needed (e.g., force-pushed tags are auto-corrected)
ref_hint = module.get('vcs_ref', '')
if ref_hint and not _ref_points_to_commit(vcs_url, ref_hint, commit_hash):
ref_hint = ''
entry = repo_info['commits'][commit_hash]
entry['modules'].append(module)
@ -4080,7 +4083,9 @@ def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[
# For branches, use the branch name directly
if ref_hint.startswith('refs/tags/'):
# Tags: BitBake can fetch tagged commits with nobranch=1
branch_param = ';nobranch=1'
# Add tag= so shallow clones include this tag (with BB_GIT_SHALLOW=1 in recipe)
tag_name = ref_hint[10:] # Strip "refs/tags/"
branch_param = f';nobranch=1;tag={tag_name}'
elif ref_hint.startswith('refs/heads/'):
# Branches: use the actual branch name
branch_name = ref_hint[11:] # Strip "refs/heads/"
@ -4161,21 +4166,10 @@ def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[
f.write(f'SRC_URI += "{entry}"\n')
f.write('\n')
# Collect all tag references for shallow cloning
# BB_GIT_SHALLOW_EXTRA_REFS ensures these refs are included in shallow clones
tag_refs = set()
for module in modules:
vcs_ref = module.get('vcs_ref', '')
if vcs_ref and 'refs/tags/' in vcs_ref:
tag_refs.add(vcs_ref)
if tag_refs:
f.write("# Tag references for shallow cloning\n")
f.write("# Ensures shallow clones include all necessary tags\n")
f.write("BB_GIT_SHALLOW_EXTRA_REFS = \"\\\n")
for tag_ref in sorted(tag_refs):
f.write(f" {tag_ref} \\\n")
f.write('"\n')
# Note: BB_GIT_SHALLOW_EXTRA_REFS is NOT used here because those refs must be
# present in ALL repositories, which isn't the case for module dependencies.
# Instead, we use tag= in individual SRC_URI entries when the ref is a tag.
# The recipe should set BB_GIT_SHALLOW = "1" to enable shallow clones globally.
# Note: SRCREV_* variables are not needed since rev= is embedded directly in SRC_URI