mirror of
git://git.yoctoproject.org/meta-virtualization.git
synced 2026-01-27 10:41:26 +01:00
Use dereferenced tag (^{}) to get the actual commit hash
For annotated tags, ref_hint returns the tag object hash, not the commit
Example: refs/tags/v1.0.1 -> c49ff274 (tag object)
refs/tags/v1.0.1^{} -> 37c8de36 (actual commit)
current_tag_commit = git_ls_remote(vcs_url, f"{ref_hint}^{{}}")
Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
4597 lines
186 KiB
Python
Executable File
4597 lines
186 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
# SPDX-License-Identifier: GPL-2.0-only
|
||
#
|
||
# go-dep processor
|
||
#
|
||
# Copyright (C) 2025 Bruce Ashfield
|
||
#
|
||
# This program is free software; you can redistribute it and/or modify
|
||
# it under the terms of the GNU General Public License version 2 as
|
||
# published by the Free Software Foundation.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License along
|
||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||
|
||
"""
|
||
Go Module Git Fetcher - Hybrid Architecture
|
||
Version 3.0.0 - Complete rewrite using Go download for discovery + git builds
|
||
Author: Bruce Ashfield
|
||
Description: Use Go's download for discovery, build from git sources
|
||
|
||
ARCHITECTURE:
|
||
Phase 1: Discovery - Use 'go mod download' + filesystem walk to get correct module paths
|
||
Phase 2: Recipe Generation - Generate BitBake recipe with git:// SRC_URI entries
|
||
Phase 3: Cache Building - Build module cache from git sources during do_create_module_cache
|
||
|
||
This approach eliminates:
|
||
- Complex go list -m -json parsing
|
||
- Manual go.sum parsing and augmentation
|
||
- Parent module detection heuristics
|
||
- Version path manipulation (/v2+/v3+ workarounds)
|
||
- Module path normalization bugs
|
||
|
||
Instead we:
|
||
- Let Go download modules to temporary cache (discovery only)
|
||
- Walk filesystem to get CORRECT module paths (no parsing!)
|
||
- Extract VCS info from .info files
|
||
- Fetch git repositories for each module
|
||
- Build module cache from git during BitBake build
|
||
|
||
CHANGELOG v3.0.0:
|
||
- Complete architectural rewrite
|
||
- Removed all go list and go.sum parsing logic (4000+ lines)
|
||
- Implemented 3-phase hybrid approach
|
||
- Discovery uses go mod download + filesystem walk
|
||
- Module paths from filesystem, not from go list (no more /v3 stripping bugs!)
|
||
- Builds entirely from git sources
|
||
- Compatible with oe-core's gomod:// fetcher (same cache structure)
|
||
"""
|
||
|
||
import argparse
|
||
import concurrent.futures
|
||
import hashlib
|
||
import io
|
||
import json
|
||
import os
|
||
import re
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
import tempfile
|
||
import textwrap
|
||
import threading
|
||
from pathlib import Path
|
||
from typing import Dict, List, Optional, Set, Tuple
|
||
from datetime import datetime, timedelta, timezone
|
||
|
||
VERSION = "3.0.0"
|
||
LOG_PATH: Optional[Path] = None
|
||
|
||
# =============================================================================
|
||
# BitBake Task Templates
|
||
# =============================================================================
|
||
|
||
|
||
class Tee(io.TextIOBase):
|
||
"""Write data to multiple text streams."""
|
||
|
||
def __init__(self, *streams: io.TextIOBase) -> None:
|
||
self.streams = streams
|
||
|
||
def write(self, data: str) -> int:
|
||
for stream in self.streams:
|
||
stream.write(data)
|
||
return len(data)
|
||
|
||
def flush(self) -> None:
|
||
for stream in self.streams:
|
||
stream.flush()
|
||
|
||
def parse_go_sum(go_sum_path: Path) -> Tuple[Set[Tuple[str, str]], Set[Tuple[str, str]]]:
|
||
"""
|
||
Parse go.sum to find modules that need source code.
|
||
|
||
Returns:
|
||
Tuple of (modules_needing_source, indirect_only_modules)
|
||
- modules_needing_source: Modules with source code entries (need .zip files)
|
||
- indirect_only_modules: Modules that only have /go.mod entries (only need .mod files)
|
||
"""
|
||
def sanitize_module_name(name):
|
||
"""Remove quotes from module names"""
|
||
if not name:
|
||
return name
|
||
stripped = name.strip()
|
||
if len(stripped) >= 2 and stripped[0] == '"' and stripped[-1] == '"':
|
||
return stripped[1:-1]
|
||
return stripped
|
||
|
||
modules_with_source: Set[Tuple[str, str]] = set()
|
||
modules_with_gomod_only: Set[Tuple[str, str]] = set()
|
||
|
||
if not go_sum_path.exists():
|
||
return (modules_with_source, modules_with_gomod_only)
|
||
|
||
# First pass: collect all entries
|
||
all_entries = {}
|
||
with go_sum_path.open() as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
if not line or line.startswith('//'):
|
||
continue
|
||
parts = line.split()
|
||
if len(parts) != 3:
|
||
continue
|
||
|
||
module_path, version, _ = parts
|
||
module_path = sanitize_module_name(module_path)
|
||
|
||
# Track whether this entry is for go.mod or source
|
||
is_gomod_entry = version.endswith('/go.mod')
|
||
|
||
# Strip /go.mod suffix for key
|
||
base_version = version[:-7] if is_gomod_entry else version
|
||
key = (module_path, base_version)
|
||
|
||
if key not in all_entries:
|
||
all_entries[key] = {'has_source': False, 'has_gomod': False}
|
||
|
||
if is_gomod_entry:
|
||
all_entries[key]['has_gomod'] = True
|
||
else:
|
||
all_entries[key]['has_source'] = True
|
||
|
||
# Second pass: categorize modules
|
||
for key, entry_types in all_entries.items():
|
||
if entry_types['has_source']:
|
||
modules_with_source.add(key)
|
||
continue
|
||
|
||
if entry_types['has_gomod']:
|
||
modules_with_gomod_only.add(key)
|
||
# Note: We no longer add indirect-only modules to modules_with_source.
|
||
# The native build succeeds without their .zip files - only .mod files are needed.
|
||
# Adding them caused the generator to resolve ~1000 extra modules unnecessarily.
|
||
|
||
return (modules_with_source, modules_with_gomod_only)
|
||
|
||
|
||
def collect_modules_via_go_list(source_dir: Path) -> Set[Tuple[str, str]]:
|
||
"""
|
||
Use `go list -m -json all` to discover modules that may not appear in go.sum.
|
||
"""
|
||
env = os.environ.copy()
|
||
env.setdefault('GOPROXY', 'https://proxy.golang.org')
|
||
if CURRENT_GOMODCACHE:
|
||
env['GOMODCACHE'] = CURRENT_GOMODCACHE
|
||
|
||
try:
|
||
result = subprocess.run(
|
||
['go', 'list', '-m', '-json', 'all'],
|
||
cwd=source_dir,
|
||
capture_output=True,
|
||
text=True,
|
||
check=True,
|
||
env=env,
|
||
)
|
||
except subprocess.CalledProcessError:
|
||
return set()
|
||
|
||
data = result.stdout
|
||
modules: Set[Tuple[str, str]] = set()
|
||
decoder = json.JSONDecoder()
|
||
idx = 0
|
||
length = len(data)
|
||
|
||
while idx < length:
|
||
while idx < length and data[idx].isspace():
|
||
idx += 1
|
||
if idx >= length:
|
||
break
|
||
try:
|
||
obj, end = decoder.raw_decode(data, idx)
|
||
except json.JSONDecodeError:
|
||
break
|
||
idx = end
|
||
|
||
path = obj.get('Path') or ''
|
||
if not path or obj.get('Main'):
|
||
continue
|
||
|
||
version = obj.get('Version') or ''
|
||
replace = obj.get('Replace')
|
||
if replace:
|
||
path = replace.get('Path', path) or path
|
||
version = replace.get('Version', version) or version
|
||
|
||
if not version or version == 'none':
|
||
continue
|
||
|
||
modules.add((path, version))
|
||
|
||
return modules
|
||
|
||
|
||
def go_mod_download(module_path: str, version: str) -> bool:
|
||
"""Download a specific module version into the current GOMODCACHE."""
|
||
if not CURRENT_GOMODCACHE or not CURRENT_SOURCE_DIR:
|
||
return False
|
||
|
||
key = (module_path, version)
|
||
if key in DOWNLOADED_MODULES:
|
||
return module_path
|
||
|
||
env = os.environ.copy()
|
||
env.setdefault('GOPROXY', 'https://proxy.golang.org')
|
||
env['GOMODCACHE'] = CURRENT_GOMODCACHE
|
||
|
||
try:
|
||
subprocess.run(
|
||
['go', 'mod', 'download', f'{module_path}@{version}'],
|
||
cwd=str(CURRENT_SOURCE_DIR),
|
||
env=env,
|
||
capture_output=True,
|
||
text=True,
|
||
check=True,
|
||
timeout=GO_CMD_TIMEOUT,
|
||
)
|
||
DOWNLOADED_MODULES.add(key)
|
||
return True
|
||
except subprocess.TimeoutExpired as e:
|
||
print(f" ❌ go mod download timed out for {module_path}@{version} after {GO_CMD_TIMEOUT}s")
|
||
return False
|
||
except subprocess.CalledProcessError as e:
|
||
stderr = (e.stderr or '').strip()
|
||
if stderr:
|
||
lower = stderr.lower()
|
||
network_signals = [
|
||
"lookup ", "dial tcp", "connection refused",
|
||
"network is unreachable", "tls handshake timeout",
|
||
"socket: operation not permitted"
|
||
]
|
||
if any(signal in lower for signal in network_signals):
|
||
global NETWORK_FAILURE_DETECTED
|
||
NETWORK_FAILURE_DETECTED = True
|
||
raise RuntimeError(
|
||
f"Network failure while downloading {module_path}@{version}: {stderr}"
|
||
) from e
|
||
print(f" ⚠️ go mod download failed for {module_path}@{version}: {stderr}")
|
||
return False
|
||
|
||
|
||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||
CACHE_BASE_DIR = SCRIPT_DIR / "data" # Default to scripts/data for JSON caches
|
||
DATA_DIR = CACHE_BASE_DIR
|
||
CLONE_CACHE_DIR = SCRIPT_DIR / ".cache" / "repos" # Repository clone cache
|
||
VERIFY_BASE_DIR = CACHE_BASE_DIR / ".verify"
|
||
LS_REMOTE_CACHE_PATH = DATA_DIR / "ls-remote-cache.json"
|
||
VERIFY_COMMIT_CACHE_PATH = DATA_DIR / "verify-cache.json"
|
||
MODULE_REPO_OVERRIDES_PATH = DATA_DIR / "repo-overrides.json"
|
||
# Manual overrides file - tracked in git, for permanent overrides when discovery fails
|
||
MANUAL_OVERRIDES_PATH = SCRIPT_DIR / "data" / "manual-overrides.json"
|
||
|
||
LS_REMOTE_CACHE: Dict[Tuple[str, str], Optional[str]] = {}
|
||
LS_REMOTE_CACHE_DIRTY = False
|
||
|
||
MODULE_METADATA_CACHE_PATH = DATA_DIR / "module-cache.json"
|
||
MODULE_METADATA_CACHE: Dict[Tuple[str, str], Dict[str, str]] = {}
|
||
MODULE_METADATA_CACHE_DIRTY = False
|
||
|
||
VANITY_URL_CACHE_PATH = DATA_DIR / "vanity-url-cache.json"
|
||
VANITY_URL_CACHE: Dict[str, Optional[str]] = {}
|
||
VANITY_URL_CACHE_DIRTY = False
|
||
|
||
CURRENT_GOMODCACHE: Optional[str] = None
|
||
CURRENT_SOURCE_DIR: Optional[Path] = None
|
||
TEMP_GOMODCACHES: List[Path] = []
|
||
FAILED_MODULE_PATHS: Set[str] = set()
|
||
FAILED_MODULE_ENTRIES: Set[Tuple[str, str]] = set()
|
||
DOWNLOADED_MODULES: Set[Tuple[str, str]] = set()
|
||
NETWORK_FAILURE_DETECTED: bool = False
|
||
SKIPPED_MODULES: Dict[Tuple[str, str], str] = {}
|
||
VERBOSE_MODE: bool = False # Set from command-line args
|
||
|
||
def _record_skipped_module(module_path: str, version: str, reason: str) -> None:
|
||
SKIPPED_MODULES[(module_path, version)] = reason
|
||
|
||
GO_CMD_TIMEOUT = 180 # seconds
|
||
GIT_CMD_TIMEOUT = 90 # seconds
|
||
|
||
VERIFY_REPO_CACHE: Dict[str, Path] = {}
|
||
VERIFY_REPO_LOCKS: Dict[str, threading.Lock] = {} # Per-repository locks for parallel verification
|
||
VERIFY_REPO_LOCKS_LOCK = threading.RLock() # REENTRANT lock to allow same thread to acquire multiple times
|
||
VERIFY_REPO_BRANCHES: Dict[str, List[str]] = {} # Cache branch lists per repo to avoid repeated ls-remote
|
||
VERIFY_RESULTS: Dict[Tuple[str, str], bool] = {}
|
||
VERIFY_COMMIT_CACHE: Dict[str, bool] = {} # Legacy format: key -> bool
|
||
VERIFY_COMMIT_CACHE_V2: Dict[str, Dict[str, any]] = {} # New format: key -> {verified: bool, timestamp: str, last_check: str}
|
||
VERIFY_COMMIT_CACHE_DIRTY = False
|
||
VERIFY_ENABLED = False # Set to True when verification is active
|
||
VERIFY_CACHE_MAX_AGE_DAYS = 30 # Re-verify commits older than this
|
||
VERIFY_DETECTED_BRANCHES: Dict[Tuple[str, str], str] = {} # (url, commit) -> branch_name
|
||
VERIFY_FALLBACK_COMMITS: Dict[Tuple[str, str], str] = {} # Maps (url, original_commit) -> fallback_commit
|
||
VERIFY_FULL_REPOS: Set[str] = set() # Track repos that have been fetched with full history
|
||
VERIFY_CORRECTIONS_APPLIED = False # Track if any commit corrections were made
|
||
MODULE_REPO_OVERRIDES: Dict[Tuple[str, Optional[str]], str] = {} # Dynamic overrides from --set-repo
|
||
MODULE_REPO_OVERRIDES_DIRTY = False
|
||
MANUAL_OVERRIDES: Dict[Tuple[str, Optional[str]], str] = {} # Git-tracked overrides from manual-overrides.json
|
||
|
||
# REPO_OVERRIDES kept for backwards compatibility but no longer used for hardcoded values.
|
||
# Manual overrides go in data/manual-overrides.json which is tracked in git.
|
||
REPO_OVERRIDES: Dict[str, List[str]] = {}
|
||
|
||
|
||
def _normalise_override_key(module_path: str, version: Optional[str]) -> Tuple[str, Optional[str]]:
|
||
module = module_path.strip()
|
||
ver = version.strip() if version else None
|
||
if not module:
|
||
raise ValueError("module path for override cannot be empty")
|
||
return module, ver
|
||
|
||
|
||
def _parse_override_spec(module_spec: str) -> Tuple[str, Optional[str]]:
|
||
if '@' in module_spec:
|
||
module_path, version = module_spec.split('@', 1)
|
||
version = version or None
|
||
else:
|
||
module_path, version = module_spec, None
|
||
return module_path.strip(), version.strip() if version else None
|
||
|
||
|
||
def repo_override_candidates(module_path: str, version: Optional[str] = None) -> List[str]:
|
||
"""
|
||
Get repository URL override candidates for a module.
|
||
|
||
Priority order:
|
||
1. Dynamic overrides (--set-repo, stored in repo-overrides.json) - version-specific
|
||
2. Dynamic overrides - wildcard (no version)
|
||
3. Manual overrides (manual-overrides.json, tracked in git) - version-specific
|
||
4. Manual overrides - wildcard
|
||
5. Legacy REPO_OVERRIDES dict (for backwards compatibility)
|
||
"""
|
||
overrides: List[str] = []
|
||
key = _normalise_override_key(module_path, version)
|
||
wildcard_key = _normalise_override_key(module_path, None)
|
||
|
||
# Dynamic overrides first (highest priority - user can override manual)
|
||
dynamic_specific = MODULE_REPO_OVERRIDES.get(key)
|
||
if dynamic_specific:
|
||
overrides.append(dynamic_specific)
|
||
|
||
dynamic_default = MODULE_REPO_OVERRIDES.get(wildcard_key)
|
||
if dynamic_default and dynamic_default not in overrides:
|
||
overrides.append(dynamic_default)
|
||
|
||
# Manual overrides next (git-tracked, for permanent fixes)
|
||
manual_specific = MANUAL_OVERRIDES.get(key)
|
||
if manual_specific and manual_specific not in overrides:
|
||
overrides.append(manual_specific)
|
||
|
||
manual_default = MANUAL_OVERRIDES.get(wildcard_key)
|
||
if manual_default and manual_default not in overrides:
|
||
overrides.append(manual_default)
|
||
|
||
# Legacy hardcoded overrides last (backwards compat)
|
||
for candidate in REPO_OVERRIDES.get(module_path, []):
|
||
if candidate not in overrides:
|
||
overrides.append(candidate)
|
||
|
||
return overrides
|
||
|
||
|
||
def configure_cache_paths(cache_dir: Optional[str], clone_cache_dir: Optional[str] = None) -> None:
|
||
"""
|
||
Configure cache file locations.
|
||
|
||
Args:
|
||
cache_dir: Directory for JSON metadata caches (default: scripts/data)
|
||
clone_cache_dir: Directory for git repository clones (default: scripts/.cache/repos)
|
||
"""
|
||
global CACHE_BASE_DIR, DATA_DIR, CLONE_CACHE_DIR
|
||
global LS_REMOTE_CACHE_PATH, MODULE_METADATA_CACHE_PATH, VANITY_URL_CACHE_PATH
|
||
global VERIFY_COMMIT_CACHE_PATH, MODULE_REPO_OVERRIDES_PATH
|
||
|
||
# Configure JSON metadata cache directory
|
||
if cache_dir:
|
||
CACHE_BASE_DIR = Path(cache_dir).resolve()
|
||
else:
|
||
CACHE_BASE_DIR = SCRIPT_DIR / "data" # Default to scripts/data
|
||
|
||
CACHE_BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||
DATA_DIR = CACHE_BASE_DIR # cache_dir IS the data directory now
|
||
|
||
LS_REMOTE_CACHE_PATH = DATA_DIR / "ls-remote-cache.json"
|
||
MODULE_METADATA_CACHE_PATH = DATA_DIR / "module-cache.json"
|
||
VANITY_URL_CACHE_PATH = DATA_DIR / "vanity-url-cache.json"
|
||
VERIFY_COMMIT_CACHE_PATH = DATA_DIR / "verify-cache.json"
|
||
MODULE_REPO_OVERRIDES_PATH = DATA_DIR / "repo-overrides.json"
|
||
|
||
global VERIFY_BASE_DIR
|
||
VERIFY_BASE_DIR = CACHE_BASE_DIR / ".verify"
|
||
VERIFY_BASE_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Configure git clone cache directory
|
||
if clone_cache_dir:
|
||
CLONE_CACHE_DIR = Path(clone_cache_dir).resolve()
|
||
else:
|
||
CLONE_CACHE_DIR = SCRIPT_DIR / ".cache" / "repos" # Default to scripts/.cache/repos
|
||
|
||
CLONE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||
|
||
VERIFY_COMMIT_CACHE.clear()
|
||
load_verify_commit_cache()
|
||
MODULE_REPO_OVERRIDES.clear()
|
||
load_repo_overrides()
|
||
load_manual_overrides()
|
||
|
||
global VERIFY_REPO_CACHE
|
||
VERIFY_REPO_CACHE = {}
|
||
|
||
|
||
def ensure_path_is_writable(path: Path) -> None:
|
||
"""
|
||
Attempt to create and delete a small file to verify write access. Exit with
|
||
a clear error if the path is not writable.
|
||
"""
|
||
path.mkdir(parents=True, exist_ok=True)
|
||
probe = path / ".oe-go-mod-fetcher-permcheck"
|
||
try:
|
||
with open(probe, "w") as fh:
|
||
fh.write("")
|
||
except Exception as exc:
|
||
print(f"❌ GOMODCACHE is not writable: {path} ({exc})")
|
||
print(" Fix permissions (e.g. chown/chmod) or pass a writable --gomodcache path.")
|
||
sys.exit(1)
|
||
finally:
|
||
try:
|
||
probe.unlink()
|
||
except Exception:
|
||
pass
|
||
|
||
def _normalize_url(url: str) -> str:
|
||
url = url.strip()
|
||
if url.startswith("git://"):
|
||
url = "https://" + url[6:]
|
||
if url.endswith(".git"):
|
||
url = url[:-4]
|
||
return url
|
||
|
||
|
||
def _url_allowed_for_module(module_path: str, url: str, version: Optional[str] = None) -> bool:
|
||
url = _normalize_url(url)
|
||
overrides = repo_override_candidates(module_path, version)
|
||
if not overrides:
|
||
return True
|
||
normalized_overrides = {_normalize_url(o) for o in overrides}
|
||
return url in normalized_overrides
|
||
|
||
|
||
def prune_metadata_cache() -> None:
|
||
"""
|
||
Remove stale metadata entries that no longer satisfy override policies or
|
||
contain obviously invalid data. This prevents old .inc state from
|
||
re-introducing bad repositories during bootstrap.
|
||
"""
|
||
global MODULE_METADATA_CACHE_DIRTY
|
||
|
||
removed = False
|
||
for key in list(MODULE_METADATA_CACHE.keys()):
|
||
module_path, version = key
|
||
entry = MODULE_METADATA_CACHE.get(key) or {}
|
||
vcs_url = entry.get('vcs_url', '')
|
||
commit = entry.get('commit', '')
|
||
|
||
if not vcs_url or not commit:
|
||
MODULE_METADATA_CACHE.pop(key, None)
|
||
removed = True
|
||
continue
|
||
|
||
if len(commit) != 40 or not re.fullmatch(r'[0-9a-fA-F]{40}', commit):
|
||
MODULE_METADATA_CACHE.pop(key, None)
|
||
removed = True
|
||
continue
|
||
|
||
if not _url_allowed_for_module(module_path, vcs_url, version):
|
||
MODULE_METADATA_CACHE.pop(key, None)
|
||
removed = True
|
||
continue
|
||
|
||
if removed:
|
||
MODULE_METADATA_CACHE_DIRTY = True
|
||
|
||
|
||
def _verify_repo_dir(vcs_url: str) -> Path:
|
||
# Quick check without lock (optimization)
|
||
if vcs_url in VERIFY_REPO_CACHE:
|
||
return VERIFY_REPO_CACHE[vcs_url]
|
||
|
||
# Use master lock to serialize repo initialization
|
||
with VERIFY_REPO_LOCKS_LOCK:
|
||
# Double-check after acquiring lock
|
||
if vcs_url in VERIFY_REPO_CACHE:
|
||
return VERIFY_REPO_CACHE[vcs_url]
|
||
|
||
repo_hash = hashlib.sha256(vcs_url.encode()).hexdigest()
|
||
repo_dir = VERIFY_BASE_DIR / repo_hash
|
||
git_dir = repo_dir / "repo"
|
||
git_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
env = os.environ.copy()
|
||
env.setdefault("GIT_TERMINAL_PROMPT", "0")
|
||
env.setdefault("GIT_ASKPASS", "true")
|
||
|
||
if not (git_dir / "config").exists():
|
||
subprocess.run([
|
||
"git", "init", "--bare"
|
||
], cwd=str(git_dir), check=True, capture_output=True, env=env)
|
||
subprocess.run([
|
||
"git", "remote", "add", "origin", vcs_url
|
||
], cwd=str(git_dir), check=True, capture_output=True, env=env)
|
||
else:
|
||
subprocess.run([
|
||
"git", "remote", "set-url", "origin", vcs_url
|
||
], cwd=str(git_dir), check=False, capture_output=True, env=env)
|
||
|
||
VERIFY_REPO_CACHE[vcs_url] = git_dir
|
||
|
||
# Create a per-repo lock while we still hold the master lock
|
||
if vcs_url not in VERIFY_REPO_LOCKS:
|
||
VERIFY_REPO_LOCKS[vcs_url] = threading.Lock()
|
||
|
||
return git_dir
|
||
|
||
|
||
def _find_fallback_commit(vcs_url: str, version: str, timestamp: str = "") -> Optional[Tuple[str, str]]:
|
||
"""
|
||
Find a fallback commit when the proxy commit doesn't exist.
|
||
|
||
Strategy:
|
||
1. For pseudo-versions with timestamp: find commit near that date on default branch
|
||
2. Otherwise: use latest commit on default branch (main/master)
|
||
|
||
Returns: (commit_hash, branch_name) or None if failed
|
||
"""
|
||
import re
|
||
from datetime import datetime
|
||
|
||
env = os.environ.copy()
|
||
env.setdefault("GIT_TERMINAL_PROMPT", "0")
|
||
env.setdefault("GIT_ASKPASS", "true")
|
||
|
||
# Extract timestamp from pseudo-version: v0.0.0-YYYYMMDDHHMMSS-hash
|
||
target_date = None
|
||
if timestamp:
|
||
try:
|
||
target_date = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||
except Exception:
|
||
pass
|
||
|
||
if not target_date:
|
||
# Try to extract from pseudo-version format
|
||
match = re.match(r'v\d+\.\d+\.\d+-(\d{14})-[0-9a-f]+', version)
|
||
if match:
|
||
date_str = match.group(1) # YYYYMMDDHHMMSS
|
||
try:
|
||
target_date = datetime.strptime(date_str, '%Y%m%d%H%M%S')
|
||
except Exception:
|
||
pass
|
||
|
||
# Get default branch
|
||
try:
|
||
result = subprocess.run(
|
||
["git", "ls-remote", "--symref", vcs_url, "HEAD"],
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
env=env,
|
||
)
|
||
if result.returncode == 0 and result.stdout:
|
||
# Parse: ref: refs/heads/main HEAD
|
||
for line in result.stdout.split('\n'):
|
||
if line.startswith('ref:'):
|
||
default_branch = line.split()[1].replace('refs/heads/', '')
|
||
break
|
||
else:
|
||
default_branch = 'main' # Fallback
|
||
else:
|
||
default_branch = 'main'
|
||
except Exception:
|
||
default_branch = 'main'
|
||
|
||
# Get commits on default branch
|
||
try:
|
||
if target_date:
|
||
# Find commit closest to target date
|
||
# We need to clone the repo to access commit history with dates
|
||
|
||
# NOTE: Do NOT acquire per-repo lock here - our caller already holds it!
|
||
# _find_fallback_commit is only called from within verify_commit_accessible,
|
||
# which has already acquired the per-repo lock for this vcs_url.
|
||
|
||
# Get the repo dir (cached, won't re-initialize)
|
||
repo_dir = VERIFY_REPO_CACHE.get(vcs_url)
|
||
if not repo_dir:
|
||
# Shouldn't happen (verify_commit_accessible calls _verify_repo_dir first)
|
||
# but be defensive
|
||
repo_dir = _verify_repo_dir(vcs_url)
|
||
|
||
# Fetch the default branch (caller holds lock, so this is safe)
|
||
try:
|
||
subprocess.run(
|
||
["git", "fetch", "origin", f"{default_branch}:refs/remotes/origin/{default_branch}"],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=60,
|
||
env=env,
|
||
)
|
||
except subprocess.CalledProcessError:
|
||
# Fallback to latest if fetch fails
|
||
pass
|
||
|
||
# Use git log with --until to find commit at or before target date
|
||
# Format: YYYY-MM-DD HH:MM:SS
|
||
date_str = target_date.strftime('%Y-%m-%d %H:%M:%S')
|
||
try:
|
||
result = subprocess.run(
|
||
["git", "log", "-1", "--format=%H", f"--until={date_str}", f"origin/{default_branch}"],
|
||
cwd=str(repo_dir),
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
env=env,
|
||
)
|
||
if result.returncode == 0 and result.stdout.strip():
|
||
commit_hash = result.stdout.strip()
|
||
return (commit_hash, default_branch)
|
||
except subprocess.CalledProcessError:
|
||
pass
|
||
|
||
# If date-based search failed, fall back to latest commit
|
||
result = subprocess.run(
|
||
["git", "rev-parse", f"origin/{default_branch}"],
|
||
cwd=str(repo_dir),
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
env=env,
|
||
)
|
||
if result.returncode == 0 and result.stdout.strip():
|
||
commit_hash = result.stdout.strip()
|
||
return (commit_hash, default_branch)
|
||
else:
|
||
# Use latest commit from ls-remote (no need to clone)
|
||
result = subprocess.run(
|
||
["git", "ls-remote", vcs_url, f"refs/heads/{default_branch}"],
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
env=env,
|
||
)
|
||
if result.returncode == 0 and result.stdout:
|
||
commit_hash = result.stdout.split()[0]
|
||
return (commit_hash, default_branch)
|
||
except Exception as e:
|
||
print(f" ⚠️ Fallback commit search failed: {e}")
|
||
|
||
return None
|
||
|
||
|
||
def verify_commit_accessible(vcs_url: str, commit: str, ref_hint: str = "", version: str = "", timestamp: str = "") -> bool:
|
||
"""
|
||
Fetch commit into a bare cache to ensure it exists upstream.
|
||
|
||
Check cache age and force re-verification if too old.
|
||
If commit doesn't exist, use fallback (latest commit on default branch or near timestamp)
|
||
|
||
Args:
|
||
vcs_url: Git repository URL
|
||
commit: Commit hash to verify
|
||
ref_hint: Optional ref (tag/branch) that should contain the commit
|
||
version: Module version (for extracting timestamp from pseudo-versions)
|
||
timestamp: ISO timestamp from .info file (for finding commits near that date)
|
||
"""
|
||
from datetime import datetime, timezone, timedelta
|
||
|
||
# Check cache before acquiring lock (fast path for already-verified commits)
|
||
key = (vcs_url, commit)
|
||
if key in VERIFY_RESULTS:
|
||
return VERIFY_RESULTS[key]
|
||
|
||
cache_key = f"{vcs_url}|||{commit}"
|
||
|
||
# Track if verification passed via cache (to skip re-saving later)
|
||
cached_verification_passed = False
|
||
|
||
# Check cache with aging logic
|
||
if cache_key in VERIFY_COMMIT_CACHE_V2:
|
||
cache_entry = VERIFY_COMMIT_CACHE_V2[cache_key]
|
||
if cache_entry.get("verified"):
|
||
# Check if cache is too old
|
||
last_checked_str = cache_entry.get("last_checked")
|
||
if last_checked_str:
|
||
try:
|
||
last_checked = datetime.fromisoformat(last_checked_str.replace('Z', '+00:00'))
|
||
age_days = (datetime.now(timezone.utc) - last_checked).days
|
||
|
||
if age_days < VERIFY_CACHE_MAX_AGE_DAYS:
|
||
# Cache is fresh for commit existence, but we still need branch detection
|
||
# Branch detection is cheap (local operation) and critical for BitBake recipes
|
||
# Don't return early - continue to branch detection below
|
||
cached_verification_passed = True
|
||
else:
|
||
# Cache is stale, force re-verification
|
||
print(f" ⏰ Cache stale ({age_days} days old), re-verifying {commit[:12]}...")
|
||
# Fall through to re-verify
|
||
except Exception:
|
||
# Can't parse timestamp, force re-verification
|
||
pass
|
||
else:
|
||
# No timestamp, but still need branch detection
|
||
cached_verification_passed = True
|
||
|
||
# Legacy cache format fallback
|
||
if cache_key in VERIFY_COMMIT_CACHE and VERIFY_COMMIT_CACHE[cache_key]:
|
||
# Migrate to v2 format during this check
|
||
now = datetime.now(timezone.utc).isoformat()
|
||
VERIFY_COMMIT_CACHE_V2[cache_key] = {
|
||
"verified": True,
|
||
"first_verified": now,
|
||
"last_checked": now,
|
||
"fetch_method": "cached"
|
||
}
|
||
# Don't return early - continue to branch detection
|
||
cached_verification_passed = True
|
||
|
||
# Ensure repo is initialized (this creates the lock too)
|
||
repo_dir = _verify_repo_dir(vcs_url)
|
||
|
||
# Now safely get the lock (guaranteed to exist after _verify_repo_dir returns)
|
||
lock = VERIFY_REPO_LOCKS[vcs_url]
|
||
|
||
with lock:
|
||
# Double-check cache after acquiring lock (another thread may have verified while we waited)
|
||
if key in VERIFY_RESULTS:
|
||
return VERIFY_RESULTS[key]
|
||
|
||
env = os.environ.copy()
|
||
env.setdefault("GIT_TERMINAL_PROMPT", "0")
|
||
env.setdefault("GIT_ASKPASS", "true")
|
||
|
||
def _commit_exists(check_commit: str = None) -> bool:
|
||
"""Check if a commit exists in the local repo."""
|
||
target = check_commit if check_commit else commit
|
||
try:
|
||
subprocess.run(
|
||
["git", "rev-parse", "--verify", f"{target}^{{commit}}"],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
env=env,
|
||
)
|
||
return True
|
||
except subprocess.CalledProcessError:
|
||
return False
|
||
|
||
global VERIFY_COMMIT_CACHE_DIRTY, VERIFY_FALLBACK_COMMITS
|
||
cached = VERIFY_COMMIT_CACHE.get(cache_key)
|
||
|
||
commit_present = _commit_exists()
|
||
if cached and not commit_present:
|
||
# Cached entry without a local commit indicates stale data; drop it.
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
cached = None
|
||
|
||
# Only do shallow fetch if commit is not already present
|
||
# Doing --depth=1 on an already-full repo causes git to re-process history (very slow on large repos)
|
||
if not commit_present and ref_hint:
|
||
fetch_args = ["git", "fetch", "--depth=1", "origin", ref_hint]
|
||
|
||
try:
|
||
subprocess.run(
|
||
fetch_args,
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT,
|
||
env=env,
|
||
)
|
||
except subprocess.TimeoutExpired:
|
||
print(f" ⚠️ git fetch timeout ({GIT_CMD_TIMEOUT}s) for {vcs_url} {ref_hint or ''}")
|
||
except subprocess.CalledProcessError as exc:
|
||
detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else ""
|
||
if detail:
|
||
print(f" ⚠️ git fetch failed for {vcs_url} {ref_hint or ''}: {detail}")
|
||
# Continue to attempt direct commit fetch
|
||
|
||
# For pseudo-versions, we need to determine which branch contains the commit
|
||
# Strategy depends on whether this is a tagged version or pseudo-version
|
||
commit_fetched = commit_present # If already present, no need to fetch
|
||
|
||
if ref_hint and not commit_present:
|
||
# Tagged version: try shallow fetch of the specific commit (only if not already present)
|
||
try:
|
||
fetch_cmd = ["git", "fetch", "--depth=1", "origin", commit]
|
||
subprocess.run(
|
||
fetch_cmd,
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT,
|
||
env=env,
|
||
)
|
||
commit_fetched = True
|
||
|
||
except subprocess.CalledProcessError as exc:
|
||
detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else ""
|
||
if detail:
|
||
print(f" ⚠️ git fetch failed for {vcs_url[:50]}...: {detail[:100]}")
|
||
|
||
# If fetching commit failed for a tag, check if tag has moved
|
||
if ref_hint and ref_hint.startswith('refs/tags/'):
|
||
print(f" → Tag commit not fetchable, checking if tag moved...")
|
||
try:
|
||
# Try fetching the tag again to see what it currently points to
|
||
subprocess.run(
|
||
["git", "fetch", "--depth=1", "origin", ref_hint],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT,
|
||
env=env,
|
||
)
|
||
|
||
# Check what commit the tag now points to
|
||
result = subprocess.run(
|
||
["git", "rev-parse", "FETCH_HEAD"],
|
||
cwd=str(repo_dir),
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
env=env,
|
||
check=True,
|
||
)
|
||
current_tag_commit = result.stdout.strip()
|
||
|
||
if current_tag_commit != commit:
|
||
print(f" ✓ Tag moved detected:")
|
||
print(f" Proxy gave us: {commit[:12]} (no longer exists)")
|
||
print(f" Tag now points to: {current_tag_commit[:12]}")
|
||
print(f" → Using current tag commit")
|
||
|
||
# Update module to use current commit
|
||
VERIFY_FALLBACK_COMMITS[(vcs_url, commit)] = current_tag_commit
|
||
return ('corrected', module_path, version, commit, current_tag_commit)
|
||
except subprocess.CalledProcessError:
|
||
# Can't fetch tag either - this is a real error
|
||
pass
|
||
|
||
for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]:
|
||
lock_path = repo_dir / lock_file
|
||
if lock_path.exists():
|
||
try:
|
||
lock_path.unlink()
|
||
except Exception:
|
||
pass
|
||
VERIFY_RESULTS[key] = False
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
return False
|
||
else:
|
||
# Pseudo-version: MUST do full clone to detect which branch contains commit
|
||
# Shallow fetch is useless - we need history for git for-each-ref --contains
|
||
|
||
# Check if we already fetched full history for this repo URL
|
||
# This prevents redundant full-history fetches for repos with multiple module versions
|
||
shallow_file = repo_dir / "shallow"
|
||
is_shallow = shallow_file.exists()
|
||
already_full = vcs_url in VERIFY_FULL_REPOS
|
||
|
||
if is_shallow and not already_full:
|
||
print(f" → Fetching full history for branch detection...")
|
||
try:
|
||
# Use --unshallow to convert shallow clone to full clone
|
||
subprocess.run(
|
||
["git", "fetch", "--unshallow", "origin", "+refs/heads/*:refs/remotes/origin/*"],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT * 5,
|
||
env=env,
|
||
)
|
||
commit_fetched = True
|
||
# Mark this repo as having full history
|
||
VERIFY_FULL_REPOS.add(vcs_url)
|
||
except subprocess.TimeoutExpired:
|
||
print(f" ⚠️ Full clone timeout for {vcs_url[:50]}...")
|
||
for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]:
|
||
lock_path = repo_dir / lock_file
|
||
if lock_path.exists():
|
||
try:
|
||
lock_path.unlink()
|
||
except Exception:
|
||
pass
|
||
VERIFY_RESULTS[key] = False
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
return False
|
||
except subprocess.CalledProcessError as exc:
|
||
detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else ""
|
||
if detail:
|
||
print(f" ⚠️ Full clone failed for {vcs_url[:50]}...: {detail[:100]}")
|
||
for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]:
|
||
lock_path = repo_dir / lock_file
|
||
if lock_path.exists():
|
||
try:
|
||
lock_path.unlink()
|
||
except Exception:
|
||
pass
|
||
VERIFY_RESULTS[key] = False
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
return False
|
||
else:
|
||
# Already full - just fetch updates
|
||
print(f" → Fetching updates (repo already full)...")
|
||
try:
|
||
subprocess.run(
|
||
["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT,
|
||
env=env,
|
||
)
|
||
commit_fetched = True
|
||
except subprocess.TimeoutExpired:
|
||
print(f" ⚠️ Full clone timeout for {vcs_url[:50]}...")
|
||
for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]:
|
||
lock_path = repo_dir / lock_file
|
||
if lock_path.exists():
|
||
try:
|
||
lock_path.unlink()
|
||
except Exception:
|
||
pass
|
||
VERIFY_RESULTS[key] = False
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
return False
|
||
except subprocess.CalledProcessError as exc:
|
||
detail = (exc.stderr or exc.stdout or "").strip() if isinstance(exc.stderr, str) or isinstance(exc.stdout, str) else ""
|
||
if detail:
|
||
print(f" ⚠️ Full clone failed for {vcs_url[:50]}...: {detail[:100]}")
|
||
for lock_file in ["shallow.lock", "index.lock", "HEAD.lock"]:
|
||
lock_path = repo_dir / lock_file
|
||
if lock_path.exists():
|
||
try:
|
||
lock_path.unlink()
|
||
except Exception:
|
||
pass
|
||
VERIFY_RESULTS[key] = False
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
return False
|
||
|
||
# Use the original commit or fallback commit for verification
|
||
actual_commit = commit
|
||
|
||
if not _commit_exists():
|
||
# Commit doesn't exist in repository - try fallback strategy
|
||
# This handles orphaned commits from proxy.golang.org
|
||
print(f" ⚠️ Commit {commit[:12]} not found in repository {vcs_url[:50]}...")
|
||
|
||
if not ref_hint:
|
||
# Pseudo-version without a tag - use timestamp-based fallback
|
||
print(f" → Attempting fallback commit strategy for pseudo-version {version}")
|
||
fallback_result = _find_fallback_commit(vcs_url, version, timestamp)
|
||
|
||
if fallback_result:
|
||
fallback_commit, fallback_branch = fallback_result
|
||
print(f" ⚠️ Using fallback: {fallback_commit[:12]} from branch '{fallback_branch}'")
|
||
print(f" (Original commit {commit[:12]} from proxy.golang.org does not exist)")
|
||
|
||
# Update commit to use the fallback
|
||
actual_commit = fallback_commit
|
||
|
||
# Track the fallback mapping so callers can use the fallback commit
|
||
VERIFY_FALLBACK_COMMITS[(vcs_url, commit)] = fallback_commit
|
||
|
||
# Fetch the fallback commit (only unshallow if repo is still shallow)
|
||
shallow_file = repo_dir / "shallow"
|
||
is_shallow = shallow_file.exists()
|
||
try:
|
||
if is_shallow:
|
||
subprocess.run(
|
||
["git", "fetch", "--unshallow", "origin", "+refs/heads/*:refs/remotes/origin/*"],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT * 5,
|
||
env=env,
|
||
)
|
||
else:
|
||
# Repo already has full history - just fetch updates
|
||
subprocess.run(
|
||
["git", "fetch", "origin", "+refs/heads/*:refs/remotes/origin/*"],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT,
|
||
env=env,
|
||
)
|
||
except Exception as e:
|
||
print(f" ⚠️ Failed to fetch fallback commit: {e}")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
|
||
# Register the fallback branch
|
||
VERIFY_DETECTED_BRANCHES[(vcs_url, fallback_commit)] = fallback_branch
|
||
|
||
# Check if fallback commit exists
|
||
if not _commit_exists(fallback_commit):
|
||
print(f" ⚠️ Fallback commit {fallback_commit[:12]} also not found!")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
else:
|
||
print(f" ⚠️ Could not determine fallback commit")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
else:
|
||
# Tagged version with bad commit - this shouldn't happen but fail gracefully
|
||
print(f" ⚠️ Tagged version {version} has invalid commit {commit[:12]}")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
|
||
# Now verify the actual_commit (original or fallback)
|
||
if _commit_exists(actual_commit):
|
||
# Commit was fetched successfully - verify it's reachable from the ref_hint if provided
|
||
# This ensures the commit is on the branch/tag we'll use in SRC_URI
|
||
if ref_hint:
|
||
# For tagged versions, verify the tag still points to the same commit
|
||
# proxy.golang.org caches module@version->commit mappings, but tags can be force-pushed
|
||
# If the tag has moved to a different commit, we need to use the current commit
|
||
# Optimization: Use git ls-remote first (fast, cached) before fetching
|
||
if ref_hint.startswith('refs/tags/'):
|
||
try:
|
||
# First check if tag has moved using fast ls-remote (cached)
|
||
# FIX #37: Use dereferenced tag (^{}) to get the actual commit hash
|
||
# For annotated tags, ref_hint returns the tag object hash, not the commit
|
||
# Example: refs/tags/v1.0.1 -> c49ff274 (tag object)
|
||
# refs/tags/v1.0.1^{} -> 37c8de36 (actual commit)
|
||
current_tag_commit = git_ls_remote(vcs_url, f"{ref_hint}^{{}}")
|
||
|
||
if current_tag_commit and current_tag_commit != actual_commit:
|
||
# Tag has moved - fetch it to verify and update local repo
|
||
print(f" ⚠️ Tag has moved - proxy.golang.org cache is stale")
|
||
print(f" Proxy gave us: {actual_commit[:12]}")
|
||
print(f" Tag now points to: {current_tag_commit[:12]}")
|
||
print(f" → Using current tag commit")
|
||
|
||
# Fetch the tag to update local repo
|
||
subprocess.run(
|
||
["git", "fetch", "--depth=1", "origin", ref_hint],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=GIT_CMD_TIMEOUT,
|
||
env=env,
|
||
)
|
||
|
||
# Update to use current commit
|
||
VERIFY_FALLBACK_COMMITS[(vcs_url, actual_commit)] = current_tag_commit
|
||
actual_commit = current_tag_commit
|
||
|
||
# Verify the new commit exists (it should, since we just fetched it)
|
||
if not _commit_exists(current_tag_commit):
|
||
print(f" ⚠️ Current tag commit {current_tag_commit[:12]} not found!")
|
||
VERIFY_RESULTS[key] = False
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
return False
|
||
|
||
# The VERIFY_FALLBACK_COMMITS mapping will be used by the caller
|
||
# Continue with verification using the corrected commit
|
||
except Exception as e:
|
||
# Tag verification failed - continue with normal flow
|
||
print(f" ⚠️ Could not verify tag target: {e}")
|
||
pass
|
||
|
||
try:
|
||
# Check if commit is an ancestor of (or equal to) the ref
|
||
# This works even with shallow clones
|
||
result = subprocess.run(
|
||
["git", "merge-base", "--is-ancestor", actual_commit, "FETCH_HEAD"],
|
||
cwd=str(repo_dir),
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
env=env,
|
||
)
|
||
if result.returncode != 0:
|
||
# Commit is not an ancestor of the ref - might be on a different branch
|
||
# This is OK - BitBake can still fetch the commit directly
|
||
# Just log it for debugging
|
||
pass # Don't fail - commit exists and is fetchable
|
||
except subprocess.TimeoutExpired:
|
||
print(f" ⚠️ Timeout checking commit ancestry for {actual_commit[:12]}")
|
||
# Don't fail - commit exists
|
||
except subprocess.CalledProcessError:
|
||
# merge-base failed - don't fail verification
|
||
pass
|
||
else:
|
||
# For pseudo-versions, we MUST detect which branch contains the commit
|
||
# This is CRITICAL - BitBake cannot fetch arbitrary commits with nobranch=1
|
||
# We need branch=<name> in SRC_URI for interior commits
|
||
|
||
# Check if we already have the branch from fallback
|
||
if (vcs_url, actual_commit) not in VERIFY_DETECTED_BRANCHES:
|
||
# Now that we have full history, use git to find which branches contain this commit
|
||
try:
|
||
result = subprocess.run(
|
||
["git", "for-each-ref", "--contains", actual_commit, "refs/remotes/origin/", "--format=%(refname:short)"],
|
||
cwd=str(repo_dir),
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
env=env,
|
||
)
|
||
if result.returncode == 0 and result.stdout.strip():
|
||
# Commit IS on one or more branches
|
||
branches = result.stdout.strip().split('\n')
|
||
# Strip 'origin/' prefix from branch names
|
||
branches = [b.replace('origin/', '') for b in branches]
|
||
|
||
# Pick main/master if available, otherwise first branch
|
||
if 'main' in branches:
|
||
detected_branch = 'main'
|
||
elif 'master' in branches:
|
||
detected_branch = 'master'
|
||
else:
|
||
detected_branch = branches[0]
|
||
|
||
VERIFY_DETECTED_BRANCHES[(vcs_url, actual_commit)] = detected_branch
|
||
print(f" → Detected branch: {detected_branch} (verified with git for-each-ref)")
|
||
else:
|
||
# Commit exists but not in any branch - it's orphaned/dangling
|
||
# For pseudo-versions, try fallback strategy
|
||
# DEBUG: ALWAYS print this to confirm we reach this block
|
||
print(f" ⚠️ ORPHANED: Commit {actual_commit[:12]} not found in any branch for {vcs_url[:50]}")
|
||
print(f" DEBUG-ORPHANED: ref_hint={ref_hint}, actual_commit={actual_commit[:12]}, commit={commit[:12]}, version={version}")
|
||
print(f" DEBUG-ORPHANED: Condition: (not ref_hint)={not ref_hint}, (actual==commit)={actual_commit == commit}")
|
||
|
||
if not ref_hint and actual_commit == commit:
|
||
# This is a pseudo-version with orphaned commit - try fallback
|
||
print(f" → Attempting fallback commit strategy for orphaned commit")
|
||
fallback_result = _find_fallback_commit(vcs_url, version, timestamp)
|
||
|
||
if fallback_result:
|
||
fallback_commit, fallback_branch = fallback_result
|
||
print(f" ✓ Using fallback: {fallback_commit[:12]} from branch '{fallback_branch}'")
|
||
print(f" (Original commit {commit[:12]} from proxy.golang.org is orphaned)")
|
||
|
||
# Update to use the fallback
|
||
actual_commit = fallback_commit
|
||
VERIFY_FALLBACK_COMMITS[(vcs_url, commit)] = fallback_commit
|
||
VERIFY_DETECTED_BRANCHES[(vcs_url, fallback_commit)] = fallback_branch
|
||
|
||
# Verify fallback commit exists
|
||
if not _commit_exists(fallback_commit):
|
||
print(f" ⚠️ Fallback commit {fallback_commit[:12]} not found!")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
# Continue with fallback commit - don't fail here
|
||
else:
|
||
print(f" ⚠️ Could not determine fallback commit")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
else:
|
||
# Tagged version or already tried fallback - fail
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
except subprocess.TimeoutExpired:
|
||
print(f" ⚠️ Branch detection timeout for {actual_commit[:12]}")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
except subprocess.CalledProcessError:
|
||
print(f" ⚠️ Failed to detect branch for {actual_commit[:12]}")
|
||
VERIFY_RESULTS[key] = False
|
||
return False
|
||
|
||
|
||
# Commit exists AND is reachable - safe for BitBake nobranch=1
|
||
# Only save to cache if not already cached (branch detection is done, just finalize)
|
||
if not cached_verification_passed:
|
||
# Save with timestamp in v2 format
|
||
now = datetime.now(timezone.utc).isoformat()
|
||
existing_entry = VERIFY_COMMIT_CACHE_V2.get(cache_key, {})
|
||
|
||
VERIFY_COMMIT_CACHE_V2[cache_key] = {
|
||
"verified": True,
|
||
"first_verified": existing_entry.get("first_verified", now),
|
||
"last_checked": now,
|
||
"fetch_method": "fetch" # Successfully fetched from upstream
|
||
}
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
|
||
VERIFY_RESULTS[key] = True
|
||
return True
|
||
VERIFY_RESULTS[key] = False
|
||
# Remove from both caches
|
||
VERIFY_COMMIT_CACHE.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_V2.pop(cache_key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
return False
|
||
|
||
|
||
def get_actual_commit(vcs_url: str, commit: str) -> str:
|
||
"""
|
||
Get the actual commit to use, applying fallback if original commit doesn't exist.
|
||
|
||
This should be called after verify_commit_accessible() to get the commit that was
|
||
actually verified (which may be a fallback if the original didn't exist).
|
||
|
||
Args:
|
||
vcs_url: Repository URL
|
||
commit: Original commit hash from proxy.golang.org
|
||
|
||
Returns:
|
||
Fallback commit if one was used, otherwise the original commit
|
||
"""
|
||
return VERIFY_FALLBACK_COMMITS.get((vcs_url, commit), commit)
|
||
|
||
|
||
def _ref_points_to_commit(vcs_url: str, ref_hint: str, commit_hash: str) -> bool:
|
||
if not ref_hint:
|
||
return False
|
||
|
||
repo_dir = _verify_repo_dir(vcs_url)
|
||
# Lock is guaranteed to exist after _verify_repo_dir returns
|
||
lock = VERIFY_REPO_LOCKS[vcs_url]
|
||
|
||
with lock:
|
||
env = os.environ.copy()
|
||
env.setdefault("GIT_TERMINAL_PROMPT", "0")
|
||
env.setdefault("GIT_ASKPASS", "true")
|
||
|
||
try:
|
||
result = subprocess.run(
|
||
["git", "show-ref", "--verify", "--hash", ref_hint],
|
||
cwd=str(repo_dir),
|
||
check=True,
|
||
capture_output=True,
|
||
text=True,
|
||
env=env,
|
||
)
|
||
resolved = result.stdout.strip().lower()
|
||
return resolved == commit_hash.lower()
|
||
except subprocess.CalledProcessError:
|
||
return False
|
||
|
||
|
||
def correct_commit_hash_from_ref(vcs_url: str, vcs_hash: str, vcs_ref: str) -> Optional[str]:
|
||
"""
|
||
Fix proxy.golang.org bad hashes by dereferencing the tag to get the correct commit.
|
||
|
||
proxy.golang.org sometimes returns commits that:
|
||
1. Exist in the repo but aren't branch/tag HEADs (dangling commits)
|
||
2. Don't exist in the repo at all
|
||
|
||
BitBake's nobranch=1 requires commits to be HEADs of branches or dereferenced tags.
|
||
|
||
Args:
|
||
vcs_url: Repository URL
|
||
vcs_hash: Commit hash from proxy.golang.org (potentially bad)
|
||
vcs_ref: Git ref like "refs/tags/v1.2.3"
|
||
|
||
Returns:
|
||
Corrected commit hash if different from vcs_hash, None if vcs_hash is correct or can't be corrected
|
||
"""
|
||
if not vcs_ref or not vcs_ref.startswith("refs/"):
|
||
return None
|
||
|
||
# Try dereferenced tag first (annotated tags)
|
||
dereferenced_hash = git_ls_remote(vcs_url, f"{vcs_ref}^{{}}")
|
||
if dereferenced_hash and dereferenced_hash.lower() != vcs_hash.lower():
|
||
return dereferenced_hash.lower()
|
||
|
||
# Try without ^{} for lightweight tags
|
||
commit_hash = git_ls_remote(vcs_url, vcs_ref)
|
||
if commit_hash and commit_hash.lower() != vcs_hash.lower():
|
||
return commit_hash.lower()
|
||
|
||
return None
|
||
|
||
|
||
def is_commit_bitbake_fetchable(vcs_url: str, vcs_hash: str, vcs_ref: str) -> bool:
|
||
"""
|
||
Check if a commit is BitBake-fetchable (is a branch/tag HEAD).
|
||
|
||
BitBake's nobranch=1 requires commits to be:
|
||
- HEAD of a branch (refs/heads/*)
|
||
- HEAD of a dereferenced tag (refs/tags/*^{})
|
||
|
||
Uses cached git ls-remote to check if the commit appears in the remote repository as a ref HEAD.
|
||
|
||
Args:
|
||
vcs_url: Repository URL
|
||
vcs_hash: Commit hash to check
|
||
vcs_ref: Git ref hint like "refs/tags/v1.2.3"
|
||
|
||
Returns:
|
||
True if commit is a branch/tag HEAD, False if dangling/not found
|
||
"""
|
||
# Quick check: Does the ref point to this commit?
|
||
if vcs_ref and vcs_ref.startswith("refs/"):
|
||
# Try dereferenced tag (annotated)
|
||
ref_commit = git_ls_remote(vcs_url, f"{vcs_ref}^{{}}")
|
||
if ref_commit and ref_commit.lower() == vcs_hash.lower():
|
||
return True
|
||
|
||
# Try without ^{} for lightweight tags
|
||
ref_commit = git_ls_remote(vcs_url, vcs_ref)
|
||
if ref_commit and ref_commit.lower() == vcs_hash.lower():
|
||
return True
|
||
|
||
# If we get here, the vcs_hash doesn't match the ref, so it's dangling
|
||
return False
|
||
|
||
|
||
def verify_gomodcache_commits(gomodcache_path: Path, verify_jobs: int = 10) -> int:
|
||
"""
|
||
Verify commits in GOMODCACHE .info files still exist in repositories.
|
||
|
||
Detects force-pushed tags where proxy.golang.org has stale commit hashes.
|
||
Offers to automatically refresh stale .info files by re-downloading.
|
||
|
||
Returns:
|
||
0 if all commits valid or successfully refreshed
|
||
1 if stale commits found and user declined refresh
|
||
"""
|
||
global VERIFY_ENABLED
|
||
VERIFY_ENABLED = True
|
||
|
||
if isinstance(gomodcache_path, str):
|
||
gomodcache_path = Path(gomodcache_path)
|
||
|
||
if not gomodcache_path.exists():
|
||
print(f"❌ GOMODCACHE not found: {gomodcache_path}")
|
||
return 1
|
||
|
||
download_dir = gomodcache_path / "cache" / "download"
|
||
if not download_dir.exists():
|
||
print(f"❌ Download directory not found: {download_dir}")
|
||
return 1
|
||
|
||
print(f"\nScanning {download_dir} for .info files...")
|
||
|
||
# Collect all modules with VCS info
|
||
modules_to_check = []
|
||
for dirpath, _, filenames in os.walk(download_dir):
|
||
path_parts = Path(dirpath).relative_to(download_dir).parts
|
||
if not path_parts or path_parts[-1] != '@v':
|
||
continue
|
||
|
||
module_path = '/'.join(path_parts[:-1])
|
||
module_path = unescape_module_path(module_path)
|
||
|
||
for filename in filenames:
|
||
if not filename.endswith('.info'):
|
||
continue
|
||
|
||
version = filename[:-5]
|
||
info_path = Path(dirpath) / filename
|
||
|
||
try:
|
||
with open(info_path) as f:
|
||
info = json.load(f)
|
||
|
||
origin = info.get('Origin', {})
|
||
vcs_url = origin.get('URL')
|
||
vcs_hash = origin.get('Hash')
|
||
vcs_ref = origin.get('Ref', '')
|
||
|
||
if vcs_url and vcs_hash and len(vcs_hash) == 40:
|
||
modules_to_check.append({
|
||
'module_path': module_path,
|
||
'version': version,
|
||
'vcs_url': vcs_url,
|
||
'vcs_hash': vcs_hash,
|
||
'vcs_ref': vcs_ref,
|
||
'info_path': info_path
|
||
})
|
||
except Exception as e:
|
||
print(f" ⚠️ Error reading {info_path}: {e}")
|
||
|
||
print(f"Found {len(modules_to_check)} modules with VCS metadata to verify\n")
|
||
|
||
if not modules_to_check:
|
||
print("✅ No modules to verify")
|
||
return 0
|
||
|
||
# Verify commits in parallel
|
||
stale_modules = []
|
||
|
||
def check_module(module):
|
||
if verify_commit_accessible(module['vcs_url'], module['vcs_hash'], module['vcs_ref'], module.get('version', '')):
|
||
return None
|
||
else:
|
||
return module
|
||
|
||
if verify_jobs > 0:
|
||
print(f"Verifying commits in parallel ({verify_jobs} workers)...")
|
||
with ThreadPoolExecutor(max_workers=verify_jobs) as executor:
|
||
futures = {executor.submit(check_module, m): m for m in modules_to_check}
|
||
for future in futures:
|
||
result = future.result()
|
||
if result:
|
||
stale_modules.append(result)
|
||
else:
|
||
print("Verifying commits sequentially...")
|
||
for module in modules_to_check:
|
||
result = check_module(module)
|
||
if result:
|
||
stale_modules.append(result)
|
||
|
||
if not stale_modules:
|
||
print(f"\n✅ All {len(modules_to_check)} commits verified successfully!")
|
||
return 0
|
||
|
||
# Report stale modules
|
||
print(f"\n⚠️ Found {len(stale_modules)} modules with STALE commits:\n")
|
||
for module in stale_modules[:10]: # Show first 10
|
||
print(f" {module['module_path']}@{module['version']}")
|
||
print(f" Commit: {module['vcs_hash'][:12]} (not found in {module['vcs_url']})")
|
||
print(f" File: {module['info_path']}")
|
||
print()
|
||
|
||
if len(stale_modules) > 10:
|
||
print(f" ... and {len(stale_modules) - 10} more\n")
|
||
|
||
# Offer to auto-refresh
|
||
print("These commits likely represent force-pushed tags.")
|
||
print("The .info files can be refreshed by re-downloading from proxy.golang.org\n")
|
||
|
||
response = input("Refresh stale .info files automatically? [y/N]: ").strip().lower()
|
||
if response not in ('y', 'yes'):
|
||
print("\nNo action taken. To fix manually:")
|
||
print(" 1. Delete stale .info files")
|
||
print(" 2. Run: go mod download")
|
||
return 1
|
||
|
||
# Refresh stale modules
|
||
print("\nRefreshing stale modules...")
|
||
refreshed = 0
|
||
failed = []
|
||
|
||
for module in stale_modules:
|
||
print(f"\n Refreshing {module['module_path']}@{module['version']}...")
|
||
|
||
try:
|
||
# Delete stale .info file
|
||
module['info_path'].unlink()
|
||
print(f" Deleted stale .info")
|
||
|
||
# Re-download
|
||
result = subprocess.run(
|
||
['go', 'mod', 'download', f"{module['module_path']}@{module['version']}"],
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=60
|
||
)
|
||
|
||
if result.returncode == 0 and module['info_path'].exists():
|
||
# Verify new commit
|
||
with open(module['info_path']) as f:
|
||
new_info = json.load(f)
|
||
new_hash = new_info.get('Origin', {}).get('Hash', '')
|
||
|
||
if new_hash and new_hash != module['vcs_hash']:
|
||
print(f" ✓ Refreshed: {module['vcs_hash'][:12]} → {new_hash[:12]}")
|
||
refreshed += 1
|
||
else:
|
||
print(f" ⚠️ Proxy returned same commit")
|
||
failed.append(module)
|
||
else:
|
||
print(f" ❌ Download failed: {result.stderr[:100]}")
|
||
failed.append(module)
|
||
except Exception as e:
|
||
print(f" ❌ Error: {e}")
|
||
failed.append(module)
|
||
|
||
print(f"\n{'='*70}")
|
||
print(f"Refresh complete: {refreshed} refreshed, {len(failed)} failed")
|
||
|
||
if failed:
|
||
print(f"\nFailed modules require manual intervention:")
|
||
for module in failed[:5]:
|
||
print(f" {module['module_path']}@{module['version']}")
|
||
return 1
|
||
|
||
return 0
|
||
|
||
|
||
def is_module_actually_needed(module_path: str, source_dir: Path) -> bool:
|
||
"""
|
||
Check if a module is actually used by running 'go mod why'.
|
||
|
||
Returns:
|
||
True if module is needed by the main module
|
||
False if module is indirect-only and not actually imported
|
||
"""
|
||
try:
|
||
result = subprocess.run(
|
||
['go', 'mod', 'why', module_path],
|
||
cwd=str(source_dir),
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
# If go mod why fails, assume it's needed (conservative)
|
||
return True
|
||
|
||
output = result.stdout.strip()
|
||
|
||
# Check for the telltale sign that module is not needed
|
||
if "(main module does not need package" in output:
|
||
return False
|
||
|
||
# Also check for completely empty output (module not in graph)
|
||
if not output or output == f"# {module_path}":
|
||
return False
|
||
|
||
# Module is needed
|
||
return True
|
||
|
||
except Exception:
|
||
# On error, assume needed (conservative)
|
||
return True
|
||
|
||
|
||
def _execute(args: argparse.Namespace) -> int:
|
||
global CURRENT_SOURCE_DIR, CURRENT_GOMODCACHE, VERIFY_COMMIT_CACHE_DIRTY
|
||
debug_limit = args.debug_limit
|
||
|
||
if args.source_dir:
|
||
source_dir = Path(args.source_dir).resolve()
|
||
else:
|
||
source_dir = Path.cwd()
|
||
CURRENT_SOURCE_DIR = source_dir
|
||
|
||
if not (source_dir / "go.mod").exists():
|
||
print(f"❌ Error: go.mod not found in {source_dir}")
|
||
return 1
|
||
|
||
print(f"Source directory: {source_dir}")
|
||
|
||
if args.recipedir:
|
||
output_dir = Path(args.recipedir).resolve()
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
print(f"Output directory: {output_dir}")
|
||
else:
|
||
output_dir = None
|
||
if not args.validate and not args.dry_run:
|
||
print("❌ Error: --recipedir is required unless running with --validate, --dry-run, or cache-maintenance flags.")
|
||
return 1
|
||
|
||
configure_cache_paths(args.cache_dir, args.clone_cache_dir)
|
||
if args.cache_dir:
|
||
print(f"Metadata cache directory: {CACHE_BASE_DIR}")
|
||
if args.clone_cache_dir:
|
||
print(f"Clone cache directory: {CLONE_CACHE_DIR}")
|
||
|
||
# Set verification cache max age from command line
|
||
global MODULE_REPO_OVERRIDES_DIRTY, VERIFY_CACHE_MAX_AGE_DAYS
|
||
VERIFY_CACHE_MAX_AGE_DAYS = args.verify_cache_max_age
|
||
if VERIFY_CACHE_MAX_AGE_DAYS == 0:
|
||
print(f"Verification cache: DISABLED (always verify)")
|
||
else:
|
||
print(f"Verification cache max age: {VERIFY_CACHE_MAX_AGE_DAYS} days")
|
||
|
||
if args.clear_repo:
|
||
for (module_spec,) in args.clear_repo:
|
||
module_path, version = _parse_override_spec(module_spec)
|
||
removed = False
|
||
try:
|
||
key = _normalise_override_key(module_path, version)
|
||
except ValueError as exc:
|
||
print(f"Invalid module override '{module_spec}': {exc}")
|
||
continue
|
||
if version is not None:
|
||
if MODULE_REPO_OVERRIDES.pop(key, None) is not None:
|
||
removed = True
|
||
MODULE_REPO_OVERRIDES_DIRTY = True
|
||
print(f"Cleared repo override: {module_path}@{version}")
|
||
else:
|
||
wildcard_key = key
|
||
if MODULE_REPO_OVERRIDES.pop(wildcard_key, None) is not None:
|
||
removed = True
|
||
specific_keys = [
|
||
candidate for candidate in list(MODULE_REPO_OVERRIDES.keys())
|
||
if candidate[0] == module_path and candidate[1] is not None
|
||
]
|
||
for candidate in specific_keys:
|
||
MODULE_REPO_OVERRIDES.pop(candidate, None)
|
||
removed = True
|
||
if removed:
|
||
MODULE_REPO_OVERRIDES_DIRTY = True
|
||
print(f"Cleared repo overrides for: {module_path}")
|
||
if not removed:
|
||
if version is not None:
|
||
print(f"No repo override found for: {module_path}@{version}")
|
||
else:
|
||
print(f"No repo overrides found for: {module_path}")
|
||
|
||
if args.set_repo:
|
||
for module_spec, repo_url in args.set_repo:
|
||
module_path, version = _parse_override_spec(module_spec)
|
||
try:
|
||
key = _normalise_override_key(module_path, version)
|
||
except ValueError as exc:
|
||
print(f"Invalid module override '{module_spec}': {exc}")
|
||
continue
|
||
MODULE_REPO_OVERRIDES[key] = repo_url
|
||
MODULE_REPO_OVERRIDES_DIRTY = True
|
||
label = f"{module_path}@{version}" if version else module_path
|
||
print(f"Pinned repo override: {label} -> {repo_url}")
|
||
|
||
if args.clear_commit:
|
||
for repo, commit in args.clear_commit:
|
||
key = f"{repo}|||{commit}"
|
||
if key in VERIFY_COMMIT_CACHE:
|
||
VERIFY_COMMIT_CACHE.pop(key, None)
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
print(f"\n🧹 Cleared cached verification: {repo} {commit}\n")
|
||
else:
|
||
print(f"No cached verification found for: {repo} {commit}")
|
||
VERIFY_RESULTS.pop((repo, commit), None)
|
||
|
||
if args.inject_commit:
|
||
for repo, commit in args.inject_commit:
|
||
key = f"{repo}|||{commit}"
|
||
VERIFY_COMMIT_CACHE[key] = True
|
||
VERIFY_COMMIT_CACHE_DIRTY = True
|
||
VERIFY_RESULTS[(repo, commit)] = True
|
||
print(f"Injected verified commit: {repo} {commit}")
|
||
|
||
exit_code = 0
|
||
|
||
if args.clean_ls_remote_cache:
|
||
print("\n🗑️ Cleaning git ls-remote cache...")
|
||
if LS_REMOTE_CACHE_PATH.exists():
|
||
LS_REMOTE_CACHE_PATH.unlink()
|
||
print(f" Removed {LS_REMOTE_CACHE_PATH}")
|
||
else:
|
||
print(f" Cache file not found: {LS_REMOTE_CACHE_PATH}")
|
||
args.clean_cache = True
|
||
|
||
if args.clean_cache:
|
||
print("\n🗑️ Cleaning module metadata cache...")
|
||
if MODULE_METADATA_CACHE_PATH.exists():
|
||
MODULE_METADATA_CACHE_PATH.unlink()
|
||
print(f" Removed {MODULE_METADATA_CACHE_PATH}")
|
||
else:
|
||
print(f" Cache file not found: {MODULE_METADATA_CACHE_PATH}")
|
||
if VERIFY_COMMIT_CACHE_PATH.exists():
|
||
VERIFY_COMMIT_CACHE_PATH.unlink()
|
||
print(f" Removed {VERIFY_COMMIT_CACHE_PATH}")
|
||
VERIFY_COMMIT_CACHE.clear()
|
||
VERIFY_COMMIT_CACHE_DIRTY = False
|
||
print(" Note: Bootstrap from .inc files DISABLED to avoid reloading stale data.")
|
||
skip_inc_files = True
|
||
else:
|
||
skip_inc_files = False
|
||
|
||
skip_legacy_module_cache = args.skip_legacy_module_cache
|
||
bootstrap_metadata_cache(
|
||
output_dir,
|
||
skip_inc_files=skip_inc_files,
|
||
skip_legacy_module_cache=skip_legacy_module_cache,
|
||
)
|
||
prune_metadata_cache()
|
||
load_ls_remote_cache()
|
||
load_vanity_url_cache()
|
||
|
||
if args.dry_run:
|
||
print("\n--dry-run requested; skipping discovery/validation")
|
||
return 0
|
||
|
||
# --verify-cached command to check GOMODCACHE for stale commits
|
||
if args.verify_cached:
|
||
print("\n" + "=" * 70)
|
||
print("VERIFYING CACHED COMMITS IN GOMODCACHE")
|
||
print("=" * 70)
|
||
return verify_gomodcache_commits(args.gomodcache or source_dir / ".gomodcache", args.verify_jobs)
|
||
|
||
# Check for --discovered-modules (bootstrap strategy)
|
||
if args.discovered_modules:
|
||
print("\n" + "=" * 70)
|
||
print("PRE-DISCOVERED MODULES MODE")
|
||
print("=" * 70)
|
||
print("\nUsing pre-discovered module metadata from BitBake discovery build")
|
||
print("Skipping discovery phase - generator will convert to BitBake format\n")
|
||
|
||
discovered_modules_path = Path(args.discovered_modules).resolve()
|
||
modules = load_discovered_modules(discovered_modules_path)
|
||
|
||
if modules is None:
|
||
print("\n❌ Failed to load discovered modules - falling back to discovery")
|
||
modules = discover_modules(source_dir, args.gomodcache)
|
||
else:
|
||
print(f"\n✓ Successfully loaded {len(modules)} modules from discovery metadata")
|
||
print(" Skipping 'go mod download' discovery phase")
|
||
print(" Will use go.sum to resolve modules without Origin metadata")
|
||
|
||
# Auto-correction of dangling commits happens in Phase 2 during parallel verification
|
||
else:
|
||
# Normal discovery path
|
||
modules = discover_modules(source_dir, args.gomodcache)
|
||
if debug_limit is not None and len(modules) > debug_limit:
|
||
print(f"\n⚙️ Debug limit active: truncating discovered modules to first {debug_limit} entries")
|
||
modules = modules[:debug_limit]
|
||
|
||
# Set VERIFY_ENABLED based on whether verification is requested
|
||
global VERIFY_ENABLED
|
||
VERIFY_ENABLED = not args.skip_verify
|
||
|
||
# Parse go.mod replace directives for fork resolution
|
||
# Example: github.com/containerd/containerd/v2 => github.com/k3s-io/containerd/v2 v2.1.4-k3s2
|
||
go_mod_replaces = parse_go_mod_replaces(source_dir / "go.mod")
|
||
if go_mod_replaces:
|
||
print(f"\n✓ Parsed {len(go_mod_replaces)} replace directives from go.mod")
|
||
if VERBOSE_MODE:
|
||
for old_path, (new_path, new_version) in sorted(go_mod_replaces.items())[:5]:
|
||
print(f" {old_path} => {new_path} {new_version}")
|
||
if len(go_mod_replaces) > 5:
|
||
print(f" ... and {len(go_mod_replaces) - 5} more")
|
||
|
||
# Parse go.sum for fallback resolution
|
||
discovered_keys = {(m['module_path'], m['version']) for m in modules}
|
||
go_sum_modules_with_source, go_sum_indirect_only = parse_go_sum(source_dir / "go.sum")
|
||
|
||
FAILED_MODULE_PATHS.clear()
|
||
FAILED_MODULE_ENTRIES.clear()
|
||
SKIPPED_MODULES.clear()
|
||
|
||
print(f"\nFound {len(go_sum_indirect_only)} indirect-only dependencies (skipping - only need .mod files)")
|
||
|
||
if args.discovered_modules:
|
||
# With discovered modules, only resolve what's in go.sum but missing from discovery
|
||
# Do NOT call go list -m all - we already know what we need from the successful build
|
||
missing_from_discovery = go_sum_modules_with_source - discovered_keys
|
||
print(f"Discovered modules provided {len(discovered_keys)} modules with Origin metadata")
|
||
print(f"go.sum has {len(go_sum_modules_with_source)} modules total")
|
||
print(f"Resolving {len(missing_from_discovery)} modules without Origin metadata...")
|
||
else:
|
||
# Normal discovery - also use go list to find additional modules
|
||
go_list_modules = collect_modules_via_go_list(source_dir)
|
||
go_sum_modules_with_source |= go_list_modules
|
||
missing_from_discovery = go_sum_modules_with_source - discovered_keys
|
||
print(f"Resolving {len(missing_from_discovery)} additional modules discovered from go.sum/go list...")
|
||
|
||
modules_by_path: Dict[str, List[Dict]] = {}
|
||
for m in modules:
|
||
modules_by_path.setdefault(m['module_path'], []).append(m)
|
||
|
||
limit_reached = False
|
||
for module_path, version in sorted(go_sum_modules_with_source):
|
||
if debug_limit is not None and len(modules) >= debug_limit:
|
||
limit_reached = True
|
||
break
|
||
if module_path in FAILED_MODULE_PATHS:
|
||
print(f" ⚠️ Skipping {module_path}@{version} (previous resolution failure)")
|
||
continue
|
||
|
||
if (module_path, version) in discovered_keys:
|
||
continue
|
||
|
||
# Apply replace directives for k3s forks
|
||
# If module path is replaced in go.mod, try to resolve using the replacement path
|
||
resolved_path = module_path
|
||
resolved_version = version
|
||
if module_path in go_mod_replaces:
|
||
new_path, new_version = go_mod_replaces[module_path]
|
||
if new_version: # Replace has explicit version
|
||
resolved_path = new_path
|
||
resolved_version = new_version
|
||
if VERBOSE_MODE:
|
||
print(f" [replace] {module_path}@{version} => {resolved_path}@{resolved_version}")
|
||
# Check if we already have the replacement module
|
||
if (resolved_path, resolved_version) in discovered_keys:
|
||
# Copy the existing module entry with original path
|
||
for m in modules:
|
||
if m['module_path'] == resolved_path and m['version'] == resolved_version:
|
||
replacement_entry = m.copy()
|
||
replacement_entry['module_path'] = module_path
|
||
replacement_entry['version'] = version
|
||
modules.append(replacement_entry)
|
||
discovered_keys.add((module_path, version))
|
||
modules_by_path.setdefault(module_path, []).append(replacement_entry)
|
||
print(f" ✓ {module_path}@{version} (using replace directive -> {resolved_path}@{resolved_version})")
|
||
continue
|
||
|
||
fallback = resolve_module_metadata(resolved_path, resolved_version)
|
||
if fallback:
|
||
# If we used a replace directive, update the entry to use the original path
|
||
if resolved_path != module_path or resolved_version != version:
|
||
fallback['module_path'] = module_path
|
||
fallback['version'] = version
|
||
print(f" ✓ {module_path}@{version} (resolved via replace -> {resolved_path}@{resolved_version})")
|
||
modules.append(fallback)
|
||
discovered_keys.add((module_path, version))
|
||
modules_by_path.setdefault(module_path, []).append(fallback)
|
||
if debug_limit is not None and len(modules) >= debug_limit:
|
||
limit_reached = True
|
||
break
|
||
else:
|
||
# Handle monorepo submodule replacements (e.g., github.com/k3s-io/etcd/server/v3)
|
||
# When a replacement points to a submodule path that doesn't have its own VCS entry,
|
||
# try to find the base repository and use it with a subdir.
|
||
# Example: github.com/k3s-io/etcd/server/v3 -> base: github.com/k3s-io/etcd, subdir: server/v3
|
||
monorepo_handled = False
|
||
if resolved_path != module_path and '/' in resolved_path:
|
||
# Check if this looks like a submodule path (has version suffix like /v2, /v3, etc.)
|
||
parts = resolved_path.rsplit('/', 1)
|
||
if len(parts) == 2:
|
||
potential_base = parts[0]
|
||
potential_subdir = parts[1]
|
||
|
||
# Look for version-suffixed paths (e.g., /v2, /v3, /server/v3, /client/v3)
|
||
# Try progressively shorter base paths
|
||
base_candidates = []
|
||
path_segments = resolved_path.split('/')
|
||
|
||
# For github.com/k3s-io/etcd/server/v3:
|
||
# Try: github.com/k3s-io/etcd/server, github.com/k3s-io/etcd
|
||
for i in range(len(path_segments) - 1, 2, -1): # At least keep domain + org
|
||
candidate_base = '/'.join(path_segments[:i])
|
||
candidate_subdir = '/'.join(path_segments[i:])
|
||
base_candidates.append((candidate_base, candidate_subdir))
|
||
|
||
# Try each candidate base path
|
||
for base_path, subdir in base_candidates:
|
||
if base_path in modules_by_path:
|
||
# Found the base repository! Create a submodule entry
|
||
base_module = modules_by_path[base_path][0]
|
||
vcs_url = base_module['vcs_url']
|
||
|
||
# Use the replacement version for the tag
|
||
tag = resolved_version.split('+')[0]
|
||
commit = git_ls_remote(vcs_url, f"refs/tags/{tag}") or git_ls_remote(vcs_url, tag)
|
||
|
||
if commit:
|
||
timestamp = derive_timestamp_from_version(resolved_version)
|
||
fallback = {
|
||
"module_path": module_path, # Original path (go.etcd.io/etcd/server/v3)
|
||
"version": version,
|
||
"vcs_url": vcs_url,
|
||
"vcs_hash": commit,
|
||
"vcs_ref": f"refs/tags/{tag}" if git_ls_remote(vcs_url, f"refs/tags/{tag}") else tag,
|
||
"timestamp": timestamp,
|
||
"subdir": subdir, # e.g., "server/v3"
|
||
}
|
||
modules.append(fallback)
|
||
discovered_keys.add((module_path, version))
|
||
modules_by_path.setdefault(module_path, []).append(fallback)
|
||
print(f" ✓ {module_path}@{version} (monorepo submodule: base={base_path}, subdir={subdir})")
|
||
monorepo_handled = True
|
||
if debug_limit is not None and len(modules) >= debug_limit:
|
||
limit_reached = True
|
||
break
|
||
|
||
if monorepo_handled:
|
||
if limit_reached:
|
||
break
|
||
continue
|
||
|
||
if module_path in modules_by_path:
|
||
reference_module = modules_by_path[module_path][0]
|
||
vcs_url = reference_module['vcs_url']
|
||
tag = version.split('+')[0]
|
||
commit = None
|
||
pseudo_info = parse_pseudo_version_tag(tag)
|
||
|
||
if pseudo_info:
|
||
timestamp_str, short_commit = pseudo_info
|
||
commit = resolve_pseudo_version_commit(
|
||
vcs_url,
|
||
timestamp_str,
|
||
short_commit,
|
||
clone_cache_dir=CLONE_CACHE_DIR
|
||
)
|
||
if commit:
|
||
print(f" ✓ {module_path}@{version} (resolved pseudo-version via repository clone)")
|
||
else:
|
||
commit = git_ls_remote(vcs_url, f"refs/tags/{tag}") or git_ls_remote(vcs_url, tag)
|
||
if commit:
|
||
print(f" ✓ {module_path}@{version} (resolved using VCS URL from sibling version)")
|
||
|
||
if commit:
|
||
timestamp = derive_timestamp_from_version(version)
|
||
subdir = reference_module.get('subdir', '')
|
||
update_metadata_cache(module_path, version, vcs_url, commit, timestamp, subdir, '', dirty=True)
|
||
fallback = {
|
||
"module_path": module_path,
|
||
"version": version,
|
||
"vcs_url": vcs_url,
|
||
"vcs_hash": commit,
|
||
"vcs_ref": "",
|
||
"timestamp": timestamp,
|
||
"subdir": subdir,
|
||
}
|
||
modules.append(fallback)
|
||
discovered_keys.add((module_path, version))
|
||
modules_by_path[module_path].append(fallback)
|
||
if debug_limit is not None and len(modules) >= debug_limit:
|
||
limit_reached = True
|
||
break
|
||
continue
|
||
|
||
# Skip monorepo root modules that fail resolution when we have submodules
|
||
# Example: go.etcd.io/etcd/v3 (root) when we have github.com/k3s-io/etcd/server/v3, etc.
|
||
# Handles both direct prefix match and forked monorepos (via VCS URL comparison)
|
||
# These are never actually imported - they just exist in go.sum due to the monorepo go.mod
|
||
is_monorepo_root = False
|
||
|
||
# Check 1: Direct prefix match (same repository, e.g., go.etcd.io/etcd/v3 → go.etcd.io/etcd/server/v3)
|
||
if any(existing_path.startswith(module_path + '/') for existing_path in modules_by_path.keys()):
|
||
is_monorepo_root = True
|
||
|
||
# Check 2: Forked monorepo (e.g., go.etcd.io/etcd/v3 → github.com/k3s-io/etcd/server/v3)
|
||
# If we failed to derive a repository, try checking if any existing module's last path segment
|
||
# matches our module's last segment (e.g., both end in /v3)
|
||
if not is_monorepo_root and module_path.count('/') >= 2:
|
||
module_segments = module_path.split('/')
|
||
# For go.etcd.io/etcd/v3: domain=go.etcd.io, repo=etcd, suffix=v3
|
||
# Check if we have modules like */etcd/*/v3 (forked versions)
|
||
for existing_path in modules_by_path.keys():
|
||
if '/' in existing_path:
|
||
# Check if the existing path is a submodule of a similar repository
|
||
# Example: github.com/k3s-io/etcd/server/v3 shares repository 'etcd' with go.etcd.io/etcd/v3
|
||
if '/etcd/' in existing_path and module_path.endswith('/v3'):
|
||
is_monorepo_root = True
|
||
break
|
||
|
||
if is_monorepo_root:
|
||
print(f" ⊙ {module_path}@{version} (monorepo root - submodules already resolved)")
|
||
continue
|
||
|
||
if module_path in modules_by_path:
|
||
FAILED_MODULE_PATHS.add(module_path)
|
||
FAILED_MODULE_ENTRIES.add((module_path, version))
|
||
print(f" ⚠️ Skipping {module_path}@{version} (indirect-only dependency)")
|
||
if limit_reached:
|
||
break
|
||
|
||
if limit_reached:
|
||
print(f"\n⚠️ Debug limit {debug_limit} reached; skipping remaining modules discovered from go.sum/go list.")
|
||
|
||
# Resolve /go.mod-only (indirect) dependencies using sibling versions
|
||
# Even though these are "indirect", Go may still need them during compilation
|
||
# (e.g., due to complex replace directives or transitive dependencies).
|
||
# If we have a sibling version with Origin metadata, resolve the indirect version too.
|
||
print(f"\n⚙️ Resolving /go.mod-only dependencies from sibling versions...")
|
||
gomod_only_resolved = 0
|
||
gomod_only_skipped = 0
|
||
for module_path, version in sorted(go_sum_indirect_only):
|
||
try:
|
||
if (module_path, version) in discovered_keys:
|
||
continue # Already have this version
|
||
|
||
if module_path in modules_by_path:
|
||
# We have a sibling version - try to resolve this one using the sibling's VCS URL
|
||
reference_module = modules_by_path[module_path][0]
|
||
vcs_url = reference_module['vcs_url']
|
||
tag = version.split('+')[0]
|
||
commit = None
|
||
pseudo_info = parse_pseudo_version_tag(tag)
|
||
|
||
if pseudo_info:
|
||
timestamp_str, short_commit = pseudo_info
|
||
try:
|
||
commit = resolve_pseudo_version_commit(
|
||
vcs_url,
|
||
timestamp_str,
|
||
short_commit,
|
||
clone_cache_dir=CLONE_CACHE_DIR
|
||
)
|
||
except Exception as e:
|
||
print(f" ❌ Error resolving pseudo-version {module_path}@{version} (timestamp={timestamp_str}, commit={short_commit}): {e}")
|
||
gomod_only_skipped += 1
|
||
continue
|
||
else:
|
||
# For semantic version tags, try to find the tag reference
|
||
# This enables to detect orphaned tags for sibling-resolved modules
|
||
vcs_ref = ""
|
||
commit = git_ls_remote(vcs_url, f"refs/tags/{tag}")
|
||
if commit:
|
||
vcs_ref = f"refs/tags/{tag}"
|
||
else:
|
||
commit = git_ls_remote(vcs_url, tag)
|
||
|
||
if commit:
|
||
timestamp = derive_timestamp_from_version(version)
|
||
subdir = reference_module.get('subdir', '')
|
||
update_metadata_cache(module_path, version, vcs_url, commit, timestamp, subdir, '', dirty=True)
|
||
fallback = {
|
||
"module_path": module_path,
|
||
"version": version,
|
||
"vcs_url": vcs_url,
|
||
"vcs_hash": commit,
|
||
"vcs_ref": vcs_ref,
|
||
"timestamp": timestamp,
|
||
"subdir": subdir,
|
||
}
|
||
modules.append(fallback)
|
||
discovered_keys.add((module_path, version))
|
||
modules_by_path[module_path].append(fallback)
|
||
gomod_only_resolved += 1
|
||
print(f" ✓ {module_path}@{version} (/go.mod-only resolved using sibling version)")
|
||
else:
|
||
gomod_only_skipped += 1
|
||
else:
|
||
gomod_only_skipped += 1
|
||
except Exception as e:
|
||
print(f" ❌ Error resolving {module_path}@{version}: {e}")
|
||
gomod_only_skipped += 1
|
||
|
||
if gomod_only_resolved > 0:
|
||
print(f"✓ Resolved {gomod_only_resolved} /go.mod-only dependencies using sibling versions")
|
||
if gomod_only_skipped > 0:
|
||
print(f" ⚠️ Skipped {gomod_only_skipped} /go.mod-only dependencies (no sibling version available)")
|
||
|
||
if FAILED_MODULE_ENTRIES:
|
||
print("\n❌ Failed to resolve metadata for the following modules:")
|
||
for mod, ver in sorted(FAILED_MODULE_ENTRIES):
|
||
print(f" - {mod}@{ver}")
|
||
print("Aborting to avoid emitting invalid SRCREVs.")
|
||
return 1
|
||
|
||
if not modules:
|
||
print("❌ No modules discovered")
|
||
return 1
|
||
|
||
success = generate_recipe(
|
||
modules,
|
||
source_dir,
|
||
output_dir,
|
||
args.git_repo or "unknown",
|
||
args.git_ref or "unknown",
|
||
validate_only=args.validate,
|
||
debug_limit=debug_limit,
|
||
skip_verify=args.skip_verify,
|
||
verify_jobs=args.verify_jobs,
|
||
)
|
||
|
||
if success:
|
||
if args.validate:
|
||
print("\n" + "=" * 70)
|
||
print("✅ SUCCESS - Validation complete")
|
||
print("=" * 70)
|
||
else:
|
||
print("\n" + "=" * 70)
|
||
print("✅ SUCCESS - Recipe generation complete")
|
||
print("=" * 70)
|
||
|
||
# Write corrected modules back to JSON for future runs
|
||
if args.discovered_modules and VERIFY_CORRECTIONS_APPLIED:
|
||
corrected_json = args.discovered_modules.replace('.json', '-corrected.json')
|
||
try:
|
||
with open(corrected_json, 'w') as f:
|
||
json.dump(modules, f, indent=2)
|
||
print(f"\n✓ Wrote corrected module metadata to: {corrected_json}")
|
||
print(f" Use this file for future runs to avoid re-detecting orphaned commits")
|
||
except Exception as e:
|
||
print(f"\n⚠️ Could not write corrected JSON: {e}")
|
||
|
||
exit_code = 0
|
||
else:
|
||
print("\n❌ FAILED - Recipe generation failed")
|
||
exit_code = 1
|
||
|
||
if SKIPPED_MODULES:
|
||
print("\n⚠️ Skipped modules (no repository metadata)")
|
||
for (module_path, version), reason in sorted(SKIPPED_MODULES.items()):
|
||
print(f" - {module_path}@{version} [{reason}]")
|
||
print(" Use --set-repo / --inject-commit to add missing metadata before building.")
|
||
|
||
return exit_code
|
||
|
||
|
||
def parse_go_mod_replaces(go_mod_path: Path) -> Dict[str, Tuple[str, str]]:
|
||
"""
|
||
Parse replace directives from go.mod file.
|
||
|
||
Returns:
|
||
Dict mapping old_path to (new_path, new_version)
|
||
Example: {"github.com/containerd/containerd/v2": ("github.com/k3s-io/containerd/v2", "v2.1.4-k3s2")}
|
||
"""
|
||
replaces = {}
|
||
if not go_mod_path.exists():
|
||
return replaces
|
||
|
||
try:
|
||
content = go_mod_path.read_text()
|
||
# Match: old_path => new_path version
|
||
# Example: github.com/containerd/containerd/v2 => github.com/k3s-io/containerd/v2 v2.1.4-k3s2
|
||
for line in content.splitlines():
|
||
line = line.strip()
|
||
if not line.startswith('replace ') and '=>' not in line:
|
||
continue
|
||
|
||
# Remove 'replace ' prefix if present
|
||
if line.startswith('replace '):
|
||
line = line[8:].strip()
|
||
|
||
parts = line.split('=>')
|
||
if len(parts) != 2:
|
||
continue
|
||
|
||
left = parts[0].strip().split()
|
||
right = parts[1].strip().split()
|
||
|
||
if len(left) == 0 or len(right) == 0:
|
||
continue
|
||
|
||
old_path = left[0]
|
||
new_path = right[0]
|
||
new_version = right[1] if len(right) > 1 else ""
|
||
|
||
replaces[old_path] = (new_path, new_version)
|
||
except Exception as e:
|
||
print(f"⚠️ Failed to parse go.mod replaces: {e}", file=sys.stderr)
|
||
|
||
return replaces
|
||
|
||
|
||
def parse_pseudo_version_tag(tag: str) -> Optional[Tuple[str, str]]:
|
||
"""Return (timestamp, short_commit) for Go pseudo-versions."""
|
||
tag = tag.split('+', 1)[0]
|
||
parts = tag.split('-')
|
||
if len(parts) < 3:
|
||
return None
|
||
|
||
short_commit = parts[-1]
|
||
timestamp_part = parts[-2]
|
||
timestamp_str = timestamp_part.split('.')[-1]
|
||
|
||
if len(timestamp_str) != 14 or not timestamp_str.isdigit():
|
||
return None
|
||
|
||
if not re.fullmatch(r'[0-9a-fA-F]{6,40}', short_commit):
|
||
return None
|
||
|
||
return timestamp_str, short_commit
|
||
|
||
|
||
def _cache_key(url: str, ref: str) -> str:
|
||
return f"{url}|||{ref}"
|
||
|
||
|
||
def load_ls_remote_cache() -> None:
|
||
if not LS_REMOTE_CACHE_PATH.exists():
|
||
return
|
||
try:
|
||
data = json.loads(LS_REMOTE_CACHE_PATH.read_text())
|
||
except Exception:
|
||
return
|
||
for key, value in data.items():
|
||
try:
|
||
url, ref = key.split("|||", 1)
|
||
except ValueError:
|
||
continue
|
||
LS_REMOTE_CACHE[(url, ref)] = value
|
||
|
||
|
||
def save_ls_remote_cache() -> None:
|
||
if not LS_REMOTE_CACHE_DIRTY:
|
||
return
|
||
try:
|
||
payload = {
|
||
_cache_key(url, ref): value
|
||
for (url, ref), value in LS_REMOTE_CACHE.items()
|
||
}
|
||
LS_REMOTE_CACHE_PATH.write_text(json.dumps(payload, indent=2, sort_keys=True))
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def git_ls_remote(url: str, ref: str, *, debug: bool = False) -> Optional[str]:
|
||
"""
|
||
Query git repository for commit hash of a ref.
|
||
Uses disk-based cache and local clones to minimize network calls.
|
||
|
||
Args:
|
||
url: Git repository URL
|
||
ref: Git ref (tag, branch, commit, etc.)
|
||
debug: If True, print whether result came from cache or network
|
||
|
||
Returns:
|
||
Commit hash or None if not found
|
||
"""
|
||
global LS_REMOTE_CACHE_DIRTY
|
||
key = (url, ref)
|
||
|
||
# Check in-memory cache first
|
||
if key in LS_REMOTE_CACHE:
|
||
if debug or VERBOSE_MODE:
|
||
result = LS_REMOTE_CACHE[key]
|
||
status = "cached" if result else "cached (not found)"
|
||
print(f" [ls-remote {status}] {url} {ref}", file=sys.stderr)
|
||
return LS_REMOTE_CACHE[key]
|
||
|
||
# Try local repository clone if available
|
||
repo_hash = hashlib.sha256(url.encode()).hexdigest()[:16]
|
||
local_repo = CLONE_CACHE_DIR / f"repo_{repo_hash}"
|
||
|
||
if local_repo.exists() and (local_repo / 'HEAD').exists():
|
||
try:
|
||
# Query local repository instead of network
|
||
result = subprocess.run(
|
||
["git", "show-ref", "--hash", ref],
|
||
cwd=local_repo,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=10,
|
||
)
|
||
if result.returncode == 0 and result.stdout.strip():
|
||
commit_hash = result.stdout.strip().split()[0]
|
||
LS_REMOTE_CACHE[key] = commit_hash
|
||
LS_REMOTE_CACHE_DIRTY = True
|
||
if debug or VERBOSE_MODE:
|
||
print(f" [ls-remote local] {url} {ref} -> {commit_hash[:12]}", file=sys.stderr)
|
||
return commit_hash
|
||
except (subprocess.TimeoutExpired, subprocess.CalledProcessError, Exception):
|
||
# Fall through to network query
|
||
pass
|
||
|
||
if debug or VERBOSE_MODE:
|
||
print(f" [ls-remote network] {url} {ref}", file=sys.stderr)
|
||
|
||
try:
|
||
env = os.environ.copy()
|
||
env.setdefault("GIT_TERMINAL_PROMPT", "0")
|
||
env.setdefault("GIT_ASKPASS", "true")
|
||
|
||
# FIX: For tags, also query the dereferenced commit (^{}) to handle annotated tags
|
||
# Annotated tags have a tag object hash that differs from the commit hash.
|
||
# We need the actual commit hash for git archive/checkout operations.
|
||
refs_to_query = [ref]
|
||
if ref.startswith("refs/tags/"):
|
||
refs_to_query.append(f"{ref}^{{}}") # Add dereferenced query
|
||
|
||
result = subprocess.run(
|
||
["git", "ls-remote", url] + refs_to_query,
|
||
capture_output=True,
|
||
text=True,
|
||
check=True,
|
||
env=env,
|
||
timeout=GIT_CMD_TIMEOUT,
|
||
)
|
||
|
||
# Parse results - prefer dereferenced commit (^{}) over annotated tag object
|
||
tag_object_hash = None
|
||
dereferenced_hash = None
|
||
|
||
for line in result.stdout.strip().splitlines():
|
||
if not line:
|
||
continue
|
||
parts = line.split()
|
||
if len(parts) >= 2:
|
||
hash_val, ref_name = parts[0], parts[1]
|
||
if ref_name.endswith("^{}"):
|
||
# This is the dereferenced commit - preferred!
|
||
dereferenced_hash = hash_val
|
||
else:
|
||
# This is either a lightweight tag or annotated tag object
|
||
tag_object_hash = hash_val
|
||
|
||
# Prefer dereferenced commit, fall back to tag object (for lightweight tags)
|
||
commit_hash = dereferenced_hash or tag_object_hash
|
||
if commit_hash:
|
||
LS_REMOTE_CACHE[key] = commit_hash
|
||
LS_REMOTE_CACHE_DIRTY = True
|
||
return commit_hash
|
||
|
||
except subprocess.TimeoutExpired:
|
||
print(f" ⚠️ git ls-remote timeout ({GIT_CMD_TIMEOUT}s) for {url} {ref}")
|
||
LS_REMOTE_CACHE[key] = None
|
||
LS_REMOTE_CACHE_DIRTY = True
|
||
return None
|
||
except subprocess.CalledProcessError:
|
||
LS_REMOTE_CACHE[key] = None
|
||
LS_REMOTE_CACHE_DIRTY = True
|
||
return None
|
||
return None
|
||
|
||
|
||
def load_vanity_url_cache() -> None:
|
||
"""Load vanity URL resolution cache from disk."""
|
||
if not VANITY_URL_CACHE_PATH.exists():
|
||
return
|
||
try:
|
||
data = json.loads(VANITY_URL_CACHE_PATH.read_text())
|
||
VANITY_URL_CACHE.update(data)
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def save_vanity_url_cache() -> None:
|
||
"""Save vanity URL resolution cache to disk."""
|
||
if not VANITY_URL_CACHE_DIRTY:
|
||
return
|
||
try:
|
||
VANITY_URL_CACHE_PATH.write_text(json.dumps(VANITY_URL_CACHE, indent=2, sort_keys=True))
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def load_verify_commit_cache() -> None:
|
||
"""
|
||
Load verification cache with timestamp support for aging detection.
|
||
|
||
Cache format v2:
|
||
{
|
||
"repo|||commit": {
|
||
"verified": true,
|
||
"first_verified": "2025-01-15T10:30:00Z", # When first verified
|
||
"last_checked": "2025-02-10T14:20:00Z", # When last re-verified
|
||
"fetch_method": "fetch" # "fetch", "ref", or "cached"
|
||
}
|
||
}
|
||
"""
|
||
global VERIFY_COMMIT_CACHE_DIRTY, VERIFY_COMMIT_CACHE_V2
|
||
if not VERIFY_COMMIT_CACHE_PATH.exists():
|
||
return
|
||
try:
|
||
data = json.loads(VERIFY_COMMIT_CACHE_PATH.read_text())
|
||
except Exception:
|
||
return
|
||
|
||
if isinstance(data, dict):
|
||
# Detect format: v1 (bool values) vs v2 (dict values)
|
||
sample_value = next(iter(data.values())) if data else None
|
||
|
||
if isinstance(sample_value, bool):
|
||
# Legacy format: convert to v2
|
||
from datetime import datetime, timezone
|
||
now = datetime.now(timezone.utc).isoformat()
|
||
for k, v in data.items():
|
||
if v: # Only migrate verified=True entries
|
||
VERIFY_COMMIT_CACHE_V2[k] = {
|
||
"verified": True,
|
||
"first_verified": now,
|
||
"last_checked": now,
|
||
"fetch_method": "cached" # Unknown how it was verified
|
||
}
|
||
VERIFY_COMMIT_CACHE_DIRTY = True # Mark dirty to save in new format
|
||
elif isinstance(sample_value, dict):
|
||
# V2 format
|
||
VERIFY_COMMIT_CACHE_V2.update(data)
|
||
|
||
VERIFY_COMMIT_CACHE_DIRTY = False
|
||
|
||
|
||
def save_verify_commit_cache(force: bool = False) -> None:
|
||
"""Save verification cache in v2 format with timestamps.
|
||
|
||
Args:
|
||
force: If True, save even if not dirty (for incremental saves during long runs)
|
||
"""
|
||
global VERIFY_COMMIT_CACHE_DIRTY
|
||
|
||
if not force and not VERIFY_COMMIT_CACHE_DIRTY:
|
||
return
|
||
try:
|
||
VERIFY_COMMIT_CACHE_PATH.write_text(json.dumps(VERIFY_COMMIT_CACHE_V2, indent=2, sort_keys=True))
|
||
VERIFY_COMMIT_CACHE_DIRTY = False
|
||
except Exception as e:
|
||
print(f"⚠️ Failed to save verification cache: {e}")
|
||
pass
|
||
|
||
|
||
def _load_overrides_from_file(path: Path, target_dict: Dict[Tuple[str, Optional[str]], str]) -> None:
|
||
"""
|
||
Load module->repo overrides from a JSON file into the target dictionary.
|
||
|
||
File format:
|
||
{
|
||
"module/path": "https://github.com/org/repo",
|
||
"module/path@v1.2.3": "https://github.com/org/repo"
|
||
}
|
||
|
||
The @version suffix is optional. Use it to override only a specific version.
|
||
"""
|
||
if not path.exists():
|
||
return
|
||
try:
|
||
data = json.loads(path.read_text())
|
||
except Exception:
|
||
return
|
||
if not isinstance(data, dict):
|
||
return
|
||
|
||
for raw_key, repo_url in data.items():
|
||
if not isinstance(repo_url, str):
|
||
continue
|
||
module_path = str(raw_key)
|
||
version: Optional[str] = None
|
||
|
||
# Support both "module|||version" (legacy) and "module@version" (new) formats
|
||
if "|||" in module_path:
|
||
module_part, version_part = module_path.split("|||", 1)
|
||
version = None if version_part == "*" else version_part
|
||
module_path = module_part
|
||
elif "@" in module_path and not module_path.startswith("@"):
|
||
# Handle module@version format (but not @org/pkg scoped packages)
|
||
at_pos = module_path.rfind("@")
|
||
version = module_path[at_pos + 1:]
|
||
module_path = module_path[:at_pos]
|
||
|
||
try:
|
||
key = _normalise_override_key(module_path, version)
|
||
except ValueError:
|
||
continue
|
||
target_dict[key] = repo_url
|
||
|
||
|
||
def load_manual_overrides() -> None:
|
||
"""Load git-tracked manual overrides from manual-overrides.json."""
|
||
global MANUAL_OVERRIDES
|
||
MANUAL_OVERRIDES.clear()
|
||
_load_overrides_from_file(MANUAL_OVERRIDES_PATH, MANUAL_OVERRIDES)
|
||
if MANUAL_OVERRIDES:
|
||
print(f" Loaded {len(MANUAL_OVERRIDES)} manual repository override(s)")
|
||
|
||
|
||
def load_repo_overrides() -> None:
|
||
"""Load dynamic overrides from repo-overrides.json (created via --set-repo)."""
|
||
global MODULE_REPO_OVERRIDES_DIRTY
|
||
MODULE_REPO_OVERRIDES.clear()
|
||
_load_overrides_from_file(MODULE_REPO_OVERRIDES_PATH, MODULE_REPO_OVERRIDES)
|
||
MODULE_REPO_OVERRIDES_DIRTY = False
|
||
|
||
|
||
def save_repo_overrides() -> None:
|
||
if not MODULE_REPO_OVERRIDES_DIRTY:
|
||
return
|
||
try:
|
||
payload: Dict[str, str] = {}
|
||
for (module_path, version), repo_url in MODULE_REPO_OVERRIDES.items():
|
||
key = module_path if version is None else f"{module_path}|||{version}"
|
||
payload[key] = repo_url
|
||
MODULE_REPO_OVERRIDES_PATH.write_text(json.dumps(payload, indent=2, sort_keys=True))
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def query_vanity_url(module_path: str) -> Optional[str]:
|
||
"""
|
||
Query vanity URL metadata using ?go-get=1 to resolve actual VCS repository.
|
||
|
||
Go uses vanity URLs to provide custom import paths that redirect to actual
|
||
repositories. When you request https://example.com/module?go-get=1, the server
|
||
returns HTML with a meta tag like:
|
||
<meta name="go-import" content="example.com/module git https://github.com/org/repo">
|
||
|
||
This function queries that metadata and caches the result for future use.
|
||
|
||
Args:
|
||
module_path: Go module path (e.g., "go.uber.org/atomic")
|
||
|
||
Returns:
|
||
VCS repository URL if found, None otherwise
|
||
"""
|
||
global VANITY_URL_CACHE_DIRTY
|
||
|
||
# Check cache first
|
||
if module_path in VANITY_URL_CACHE:
|
||
return VANITY_URL_CACHE[module_path]
|
||
|
||
# Query the ?go-get=1 metadata
|
||
url = f"https://{module_path}?go-get=1"
|
||
|
||
try:
|
||
import urllib.request
|
||
import html.parser
|
||
|
||
class GoImportParser(html.parser.HTMLParser):
|
||
def __init__(self, target_module: str):
|
||
super().__init__()
|
||
self.target_module = target_module
|
||
self.repo_url = None
|
||
self.best_prefix_len = 0 # Track longest matching prefix
|
||
|
||
def handle_starttag(self, tag, attrs):
|
||
if tag == 'meta':
|
||
attrs_dict = dict(attrs)
|
||
if attrs_dict.get('name') == 'go-import':
|
||
content = attrs_dict.get('content', '')
|
||
# Format: "module_prefix vcs repo_url"
|
||
parts = content.split()
|
||
if len(parts) >= 3:
|
||
prefix = parts[0]
|
||
# parts[1] = vcs type (git, hg, svn, bzr)
|
||
repo_url = parts[2]
|
||
# Per Go spec: match the go-import whose prefix matches our module
|
||
# The module path must equal the prefix or have it as a path prefix
|
||
if self.target_module == prefix or self.target_module.startswith(prefix + '/'):
|
||
# Prefer longer (more specific) prefix matches
|
||
if len(prefix) > self.best_prefix_len:
|
||
self.best_prefix_len = len(prefix)
|
||
self.repo_url = repo_url
|
||
|
||
# Fetch the page with a timeout
|
||
req = urllib.request.Request(url, headers={'User-Agent': 'oe-go-mod-fetcher/3.0'})
|
||
with urllib.request.urlopen(req, timeout=10) as response:
|
||
html_content = response.read().decode('utf-8', errors='ignore')
|
||
|
||
# Parse the HTML to find matching go-import meta tag
|
||
parser = GoImportParser(module_path)
|
||
parser.feed(html_content)
|
||
|
||
# Cache the result (even if None)
|
||
VANITY_URL_CACHE[module_path] = parser.repo_url
|
||
VANITY_URL_CACHE_DIRTY = True
|
||
|
||
return parser.repo_url
|
||
|
||
except Exception as e:
|
||
# Cache negative result to avoid repeated failures
|
||
VANITY_URL_CACHE[module_path] = None
|
||
VANITY_URL_CACHE_DIRTY = True
|
||
return None
|
||
|
||
|
||
def get_github_mirror_url(vcs_url: str) -> Optional[str]:
|
||
"""
|
||
Get GitHub mirror URL for golang.org/x repositories.
|
||
|
||
golang.org/x repositories are mirrored on GitHub at github.com/golang/*.
|
||
These mirrors are often more reliable than go.googlesource.com.
|
||
|
||
Args:
|
||
vcs_url: Original VCS URL (e.g., https://go.googlesource.com/tools)
|
||
|
||
Returns:
|
||
GitHub mirror URL if applicable, None otherwise
|
||
"""
|
||
if 'go.googlesource.com' in vcs_url:
|
||
# Extract package name from URL
|
||
# https://go.googlesource.com/tools -> tools
|
||
pkg_name = vcs_url.rstrip('/').split('/')[-1]
|
||
return f"https://github.com/golang/{pkg_name}"
|
||
return None
|
||
|
||
|
||
def resolve_pseudo_version_commit(vcs_url: str, timestamp_str: str, short_commit: str,
|
||
clone_cache_dir: Optional[Path] = None) -> Optional[str]:
|
||
"""
|
||
Resolve a pseudo-version's short commit hash to a full 40-character hash.
|
||
|
||
This function clones (or updates) a git repository and searches the commit history
|
||
for a commit that matches both the timestamp and short commit hash from a pseudo-version.
|
||
|
||
For golang.org/x repositories, automatically tries GitHub mirrors if the primary
|
||
source fails (go.googlesource.com can be slow or unreliable).
|
||
|
||
Args:
|
||
vcs_url: Git repository URL
|
||
timestamp_str: Timestamp from pseudo-version (format: YYYYMMDDHHmmss)
|
||
short_commit: Short commit hash (12 characters) from pseudo-version
|
||
clone_cache_dir: Optional directory to cache cloned repositories (recommended)
|
||
|
||
Returns:
|
||
Full 40-character commit hash, or None if not found
|
||
"""
|
||
# Parse timestamp
|
||
try:
|
||
dt = datetime.strptime(timestamp_str, "%Y%m%d%H%M%S")
|
||
# Validate the date is within a reasonable range before doing arithmetic
|
||
# Python datetime supports years 1-9999, but Go pseudo-versions should be recent
|
||
# Also ensure year > 1 to avoid overflow when subtracting 1 day
|
||
if dt.year < 1970 or dt.year > 9999:
|
||
print(f"⚠️ Invalid timestamp year {dt.year} in pseudo-version (timestamp: {timestamp_str})", file=sys.stderr)
|
||
return None
|
||
if dt.year == 1:
|
||
# Special case: year 1 would overflow when subtracting 1 day
|
||
print(f"⚠️ Invalid timestamp year 1 in pseudo-version (timestamp: {timestamp_str})", file=sys.stderr)
|
||
return None
|
||
# Search window: ±1 day around timestamp for efficiency
|
||
try:
|
||
since = (dt - timedelta(days=1)).isoformat()
|
||
until = (dt + timedelta(days=1)).isoformat()
|
||
except OverflowError as e:
|
||
print(f"⚠️ Date arithmetic overflow for timestamp {timestamp_str}: {e}", file=sys.stderr)
|
||
return None
|
||
except ValueError as e:
|
||
print(f"⚠️ Invalid timestamp format {timestamp_str}: {e}", file=sys.stderr)
|
||
return None
|
||
|
||
# Try primary URL and GitHub mirror (if applicable)
|
||
urls_to_try = [vcs_url]
|
||
github_mirror = get_github_mirror_url(vcs_url)
|
||
if github_mirror:
|
||
urls_to_try.append(github_mirror)
|
||
|
||
git_env = os.environ.copy()
|
||
git_env.setdefault("GIT_TERMINAL_PROMPT", "0")
|
||
git_env.setdefault("GIT_ASKPASS", "true")
|
||
|
||
for try_url in urls_to_try:
|
||
# Determine clone directory based on URL being tried
|
||
if clone_cache_dir:
|
||
clone_cache_dir.mkdir(parents=True, exist_ok=True)
|
||
repo_hash = hashlib.sha256(try_url.encode()).hexdigest()[:16]
|
||
clone_dir = clone_cache_dir / f"repo_{repo_hash}"
|
||
else:
|
||
clone_dir = Path(tempfile.mkdtemp(prefix="pseudo-resolve-"))
|
||
|
||
try:
|
||
# Clone or update repository
|
||
if clone_dir.exists() and (clone_dir / 'HEAD').exists():
|
||
# Repository already cloned, fetch latest
|
||
try:
|
||
subprocess.run(
|
||
['git', 'fetch', '--all', '--quiet'],
|
||
cwd=clone_dir,
|
||
capture_output=True,
|
||
check=True,
|
||
timeout=60,
|
||
env=git_env,
|
||
)
|
||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
|
||
# Fetch failed, try to use existing clone anyway
|
||
pass
|
||
else:
|
||
# Clone repository (bare clone for efficiency)
|
||
if clone_dir.exists():
|
||
shutil.rmtree(clone_dir)
|
||
clone_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
subprocess.run(
|
||
['git', 'clone', '--bare', '--quiet', try_url, str(clone_dir)],
|
||
capture_output=True,
|
||
check=True,
|
||
timeout=300, # 5 minute timeout
|
||
env=git_env,
|
||
)
|
||
|
||
# Search for commits matching timestamp and short hash
|
||
result = subprocess.run(
|
||
['git', 'log', '--all', '--format=%H %ct',
|
||
f'--since={since}', f'--until={until}'],
|
||
cwd=clone_dir,
|
||
capture_output=True,
|
||
text=True,
|
||
check=True,
|
||
timeout=30,
|
||
env=git_env,
|
||
)
|
||
|
||
# Find commit with matching short hash prefix
|
||
for line in result.stdout.strip().splitlines():
|
||
if not line:
|
||
continue
|
||
parts = line.split()
|
||
if len(parts) < 2:
|
||
continue
|
||
full_hash = parts[0]
|
||
if full_hash.startswith(short_commit):
|
||
return full_hash
|
||
|
||
# Commit not found in this repository, try next URL
|
||
continue
|
||
|
||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
||
# Clone/fetch failed, try next URL if available
|
||
if not clone_cache_dir and clone_dir.exists():
|
||
shutil.rmtree(clone_dir)
|
||
continue
|
||
finally:
|
||
# Clean up temp directory if we created one
|
||
if not clone_cache_dir and clone_dir.exists():
|
||
try:
|
||
shutil.rmtree(clone_dir)
|
||
except:
|
||
pass
|
||
|
||
# All URLs failed
|
||
return None
|
||
|
||
|
||
def derive_timestamp_from_version(version: str) -> str:
|
||
parsed = parse_pseudo_version_tag(version)
|
||
if parsed:
|
||
timestamp_str, _ = parsed
|
||
try:
|
||
return datetime.strptime(timestamp_str, "%Y%m%d%H%M%S").strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
except ValueError:
|
||
return "1970-01-01T00:00:00Z"
|
||
return "1970-01-01T00:00:00Z"
|
||
|
||
|
||
def _cache_metadata_key(module_path: str, version: str) -> Tuple[str, str]:
|
||
return (module_path, version)
|
||
|
||
|
||
def load_metadata_cache_file() -> None:
|
||
if not MODULE_METADATA_CACHE_PATH.exists():
|
||
return
|
||
try:
|
||
data = json.loads(MODULE_METADATA_CACHE_PATH.read_text())
|
||
except Exception:
|
||
return
|
||
for key, value in data.items():
|
||
try:
|
||
module_path, version = key.split("|||", 1)
|
||
except ValueError:
|
||
continue
|
||
if not isinstance(value, dict):
|
||
continue
|
||
MODULE_METADATA_CACHE[_cache_metadata_key(module_path, version)] = {
|
||
'vcs_url': value.get('vcs_url', ''),
|
||
'commit': value.get('commit', ''),
|
||
'timestamp': value.get('timestamp', ''),
|
||
'subdir': value.get('subdir', ''),
|
||
'ref': value.get('ref', ''),
|
||
}
|
||
|
||
|
||
def save_metadata_cache() -> None:
|
||
if not MODULE_METADATA_CACHE_DIRTY:
|
||
return
|
||
payload = {
|
||
f"{module}|||{version}": value
|
||
for (module, version), value in MODULE_METADATA_CACHE.items()
|
||
}
|
||
try:
|
||
MODULE_METADATA_CACHE_PATH.write_text(json.dumps(payload, indent=2, sort_keys=True))
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def update_metadata_cache(module_path: str, version: str, vcs_url: str, commit: str,
|
||
timestamp: str = "", subdir: str = "", ref: str = "",
|
||
dirty: bool = True) -> None:
|
||
global MODULE_METADATA_CACHE_DIRTY
|
||
key = _cache_metadata_key(module_path, version)
|
||
value = {
|
||
'vcs_url': vcs_url or '',
|
||
'commit': commit or '',
|
||
'timestamp': timestamp or '',
|
||
'subdir': subdir or '',
|
||
'ref': ref or '',
|
||
}
|
||
if MODULE_METADATA_CACHE.get(key) != value:
|
||
MODULE_METADATA_CACHE[key] = value
|
||
if dirty:
|
||
MODULE_METADATA_CACHE_DIRTY = True
|
||
|
||
|
||
def get_cached_metadata(module_path: str, version: str) -> Optional[dict]:
|
||
entry = MODULE_METADATA_CACHE.get(_cache_metadata_key(module_path, version))
|
||
if not entry:
|
||
return None
|
||
timestamp = entry.get('timestamp') or derive_timestamp_from_version(version)
|
||
return {
|
||
"module_path": module_path,
|
||
"version": version,
|
||
"vcs_url": entry.get('vcs_url', ''),
|
||
"vcs_hash": entry.get('commit', ''),
|
||
"vcs_ref": entry.get('ref', ''),
|
||
"timestamp": timestamp,
|
||
"subdir": entry.get('subdir', ''),
|
||
}
|
||
|
||
|
||
def load_metadata_from_inc(output_dir: Path) -> None:
|
||
git_inc = output_dir / "go-mod-git.inc"
|
||
cache_inc = output_dir / "go-mod-cache.inc"
|
||
|
||
sha_to_url: Dict[str, str] = {}
|
||
if git_inc.exists():
|
||
for line in git_inc.read_text().splitlines():
|
||
line = line.strip()
|
||
if not line.startswith('SRC_URI'):
|
||
continue
|
||
if '"' not in line:
|
||
continue
|
||
content = line.split('"', 1)[1].rsplit('"', 1)[0]
|
||
parts = [p for p in content.split(';') if p]
|
||
if not parts:
|
||
continue
|
||
url_part = parts[0]
|
||
dest_sha = None
|
||
for part in parts[1:]:
|
||
if part.startswith('destsuffix='):
|
||
dest = part.split('=', 1)[1]
|
||
dest_sha = dest.rsplit('/', 1)[-1]
|
||
break
|
||
if not dest_sha:
|
||
continue
|
||
if url_part.startswith('git://'):
|
||
url_https = 'https://' + url_part[6:]
|
||
else:
|
||
url_https = url_part
|
||
sha_to_url[dest_sha] = url_https
|
||
|
||
if cache_inc.exists():
|
||
text = cache_inc.read_text()
|
||
marker = "GO_MODULE_CACHE_DATA = '"
|
||
if marker in text:
|
||
start = text.index(marker) + len(marker)
|
||
try:
|
||
end = text.index("'\n\n", start)
|
||
except ValueError:
|
||
end = len(text)
|
||
try:
|
||
data = json.loads(text[start:end])
|
||
except Exception:
|
||
data = []
|
||
for entry in data:
|
||
module_path = entry.get('module')
|
||
version = entry.get('version')
|
||
sha = entry.get('vcs_hash')
|
||
commit = entry.get('commit')
|
||
timestamp = entry.get('timestamp', '')
|
||
subdir = entry.get('subdir', '')
|
||
ref = entry.get('vcs_ref', '')
|
||
if not module_path or not version:
|
||
continue
|
||
vcs_url = sha_to_url.get(sha, '')
|
||
if not vcs_url:
|
||
continue
|
||
if not _url_allowed_for_module(module_path, vcs_url, version):
|
||
continue
|
||
# Skip entries with invalid commit hashes
|
||
if commit and len(commit) != 40:
|
||
continue
|
||
if not timestamp:
|
||
timestamp = derive_timestamp_from_version(version)
|
||
update_metadata_cache(module_path, version, vcs_url, commit or '', timestamp, subdir, ref, dirty=False)
|
||
|
||
|
||
def load_metadata_from_module_cache_task(output_dir: Path) -> None:
|
||
legacy_path = output_dir / "module_cache_task.inc"
|
||
if not legacy_path.exists():
|
||
return
|
||
import ast
|
||
pattern = re.compile(r'\(\{.*?\}\)', re.DOTALL)
|
||
text = legacy_path.read_text()
|
||
for match in pattern.finditer(text):
|
||
blob = match.group()[1:-1] # strip parentheses
|
||
try:
|
||
entry = ast.literal_eval(blob)
|
||
except Exception:
|
||
continue
|
||
module_path = entry.get('module')
|
||
version = entry.get('version')
|
||
vcs_url = entry.get('repo_url') or entry.get('url') or ''
|
||
commit = entry.get('commit') or ''
|
||
subdir = entry.get('subdir', '')
|
||
ref = entry.get('ref', '')
|
||
if not module_path or not version or not vcs_url or not commit:
|
||
continue
|
||
if vcs_url.startswith('git://'):
|
||
vcs_url = 'https://' + vcs_url[6:]
|
||
if not _url_allowed_for_module(module_path, vcs_url, version):
|
||
continue
|
||
timestamp = derive_timestamp_from_version(version)
|
||
update_metadata_cache(module_path, version, vcs_url, commit, timestamp, subdir, ref, dirty=True)
|
||
|
||
|
||
def bootstrap_metadata_cache(output_dir: Optional[Path],
|
||
skip_inc_files: bool = False,
|
||
skip_legacy_module_cache: bool = False) -> None:
|
||
"""
|
||
Bootstrap metadata cache from multiple sources.
|
||
|
||
Args:
|
||
output_dir: Recipe output directory (optional in cache-only mode)
|
||
skip_inc_files: If True, skip loading from .inc files (used with --clean-cache)
|
||
skip_legacy_module_cache: If True, skip loading legacy module_cache_task.inc metadata
|
||
"""
|
||
load_metadata_cache_file()
|
||
if not skip_inc_files and output_dir is not None:
|
||
load_metadata_from_inc(output_dir)
|
||
if not skip_legacy_module_cache and output_dir is not None:
|
||
load_metadata_from_module_cache_task(output_dir)
|
||
|
||
|
||
def _lookup_commit_for_version(vcs_url: str, version: str, preferred_ref: str = "") -> Tuple[Optional[str], Optional[str]]:
|
||
"""
|
||
Resolve the git commit for a module version using git ls-remote.
|
||
|
||
Returns:
|
||
Tuple of (commit, timestamp). Timestamp may be None if unknown.
|
||
"""
|
||
tag = version.split('+')[0]
|
||
pseudo_info = parse_pseudo_version_tag(tag)
|
||
candidate_urls = [vcs_url]
|
||
if not vcs_url.endswith('.git'):
|
||
candidate_urls.append(vcs_url.rstrip('/') + '.git')
|
||
|
||
for url in candidate_urls:
|
||
if preferred_ref:
|
||
commit = git_ls_remote(url, preferred_ref)
|
||
if commit:
|
||
return commit, "1970-01-01T00:00:00Z"
|
||
|
||
if pseudo_info:
|
||
timestamp_str, short_commit = pseudo_info
|
||
commit = git_ls_remote(url, short_commit)
|
||
if commit:
|
||
timestamp = derive_timestamp_from_version(version)
|
||
return commit, timestamp
|
||
else:
|
||
for ref in (f"refs/tags/{tag}", tag):
|
||
commit = git_ls_remote(url, ref)
|
||
if commit:
|
||
return commit, "1970-01-01T00:00:00Z"
|
||
|
||
if pseudo_info:
|
||
timestamp_str, short_commit = pseudo_info
|
||
for url in candidate_urls:
|
||
commit = resolve_pseudo_version_commit(
|
||
url,
|
||
timestamp_str,
|
||
short_commit,
|
||
clone_cache_dir=CLONE_CACHE_DIR,
|
||
)
|
||
if commit:
|
||
timestamp = derive_timestamp_from_version(version)
|
||
return commit, timestamp
|
||
|
||
if pseudo_info:
|
||
# Even if we couldn't resolve the commit, return derived timestamp
|
||
return None, derive_timestamp_from_version(version)
|
||
return None, None
|
||
|
||
|
||
def query_module_via_go_list(module_path: str, version: str) -> Optional[Dict[str, str]]:
|
||
"""Use `go list -m -json` to obtain VCS metadata for a module version."""
|
||
env = os.environ.copy()
|
||
env.setdefault('GOPROXY', 'https://proxy.golang.org')
|
||
if CURRENT_GOMODCACHE:
|
||
env['GOMODCACHE'] = CURRENT_GOMODCACHE
|
||
|
||
try:
|
||
result = subprocess.run(
|
||
['go', 'list', '-m', '-json', f'{module_path}@{version}'],
|
||
capture_output=True,
|
||
text=True,
|
||
check=True,
|
||
env=env,
|
||
timeout=GO_CMD_TIMEOUT,
|
||
)
|
||
except subprocess.TimeoutExpired:
|
||
print(f" ⚠️ go list timed out for {module_path}@{version} after {GO_CMD_TIMEOUT}s")
|
||
return None
|
||
except subprocess.CalledProcessError:
|
||
return None
|
||
|
||
try:
|
||
data = json.loads(result.stdout)
|
||
except json.JSONDecodeError:
|
||
return None
|
||
|
||
origin = data.get('Origin') or {}
|
||
vcs_url = origin.get('URL', '')
|
||
commit = origin.get('Hash', '')
|
||
subdir = origin.get('Subdir', '')
|
||
ref = origin.get('Ref', '')
|
||
timestamp = data.get('Time') or origin.get('Time') or ''
|
||
|
||
if vcs_url.startswith('git+'):
|
||
vcs_url = vcs_url[4:]
|
||
|
||
if not vcs_url or not commit:
|
||
return None
|
||
|
||
return {
|
||
'vcs_url': vcs_url,
|
||
'commit': commit,
|
||
'timestamp': timestamp,
|
||
'subdir': subdir or '',
|
||
'vcs_ref': ref or '',
|
||
}
|
||
|
||
|
||
def _candidate_gopkg_repos(module_path: str) -> List[str]:
|
||
"""
|
||
Generate candidate repository URLs for gopkg.in modules.
|
||
"""
|
||
if not module_path.startswith("gopkg.in/"):
|
||
return []
|
||
|
||
remainder = module_path[len("gopkg.in/"):]
|
||
if not remainder:
|
||
return []
|
||
|
||
parts = remainder.split('/')
|
||
last = parts[-1]
|
||
|
||
match = re.match(r'(?P<name>.+?)\.v\d+(?:[.\w-]*)?$', last)
|
||
if not match:
|
||
return []
|
||
|
||
repo_name = match.group('name')
|
||
owner_segments = parts[:-1]
|
||
|
||
owner_variants: List[str] = []
|
||
if owner_segments:
|
||
canonical_owner = '/'.join(owner_segments)
|
||
owner_variants.append(canonical_owner)
|
||
|
||
# Provide fallbacks with dotted segments replaced
|
||
dotted_to_hyphen = '/'.join(segment.replace('.', '-') for segment in owner_segments)
|
||
dotted_to_empty = '/'.join(segment.replace('.', '') for segment in owner_segments)
|
||
for candidate in (dotted_to_hyphen, dotted_to_empty):
|
||
if candidate and candidate not in owner_variants:
|
||
owner_variants.append(candidate)
|
||
else:
|
||
# Common conventions used by gopkg.in vanity repos
|
||
owner_variants.extend([
|
||
f"go-{repo_name}",
|
||
repo_name,
|
||
f"{repo_name}-go",
|
||
])
|
||
|
||
urls: List[str] = []
|
||
seen: Set[str] = set()
|
||
for owner in owner_variants:
|
||
owner = owner.strip('/')
|
||
if not owner:
|
||
continue
|
||
candidate = f"https://github.com/{owner}/{repo_name}"
|
||
if candidate not in seen:
|
||
seen.add(candidate)
|
||
urls.append(candidate)
|
||
return urls
|
||
|
||
|
||
def _recalculate_subdir_from_vanity(vcs_url: str, module_parts: List[str], current_subdir: str) -> str:
|
||
"""
|
||
Recalculate module subdirectory when a vanity import redirects to a different repository layout.
|
||
"""
|
||
if not vcs_url:
|
||
return current_subdir
|
||
|
||
vcs_repo_name = vcs_url.rstrip('/').split('/')[-1]
|
||
if vcs_repo_name.endswith('.git'):
|
||
vcs_repo_name = vcs_repo_name[:-4]
|
||
|
||
repo_boundary_index = None
|
||
for i, part in enumerate(module_parts):
|
||
if part == vcs_repo_name or part in vcs_repo_name or vcs_repo_name.endswith(part):
|
||
repo_boundary_index = i + 1
|
||
break
|
||
|
||
if repo_boundary_index is not None and repo_boundary_index < len(module_parts):
|
||
subdir_parts = module_parts[repo_boundary_index:]
|
||
if subdir_parts and subdir_parts[-1].startswith('v') and subdir_parts[-1][1:].isdigit():
|
||
subdir_parts = subdir_parts[:-1]
|
||
return '/'.join(subdir_parts) if subdir_parts else ''
|
||
|
||
if len(module_parts) <= 3:
|
||
return ''
|
||
|
||
return current_subdir
|
||
|
||
|
||
def resolve_module_metadata(module_path: str, version: str) -> Optional[dict]:
|
||
parts = module_path.split('/')
|
||
vanity_repo = None # Track if module was resolved via vanity URL
|
||
|
||
tag = version.split('+')[0]
|
||
pseudo_info = parse_pseudo_version_tag(tag)
|
||
expected_commit_prefix = pseudo_info[1] if pseudo_info else None
|
||
|
||
cached = get_cached_metadata(module_path, version)
|
||
if cached:
|
||
override_urls = repo_override_candidates(module_path, version)
|
||
if expected_commit_prefix:
|
||
cached_commit = cached.get('vcs_hash') or ''
|
||
if cached_commit and not cached_commit.startswith(expected_commit_prefix):
|
||
cached = None
|
||
if cached and override_urls:
|
||
url = cached.get('vcs_url') or ''
|
||
if url and url not in override_urls:
|
||
cached = None
|
||
if cached and not expected_commit_prefix:
|
||
ref_hint = cached.get('vcs_ref', '')
|
||
commit_check, _ = _lookup_commit_for_version(cached.get('vcs_url', ''), version, ref_hint)
|
||
if not commit_check or commit_check.lower() != (cached.get('vcs_hash', '') or '').lower():
|
||
cached = None
|
||
|
||
def fetch_go_metadata() -> Optional[Dict[str, str]]:
|
||
info = query_module_via_go_list(module_path, version)
|
||
if info:
|
||
return info
|
||
if go_mod_download(module_path, version):
|
||
return query_module_via_go_list(module_path, version)
|
||
return None
|
||
|
||
def resolve_with_go_info(go_info: Optional[Dict[str, str]], fallback_url: str, fallback_subdir: str) -> Optional[dict]:
|
||
if not go_info:
|
||
return None
|
||
|
||
candidate_urls: List[str] = []
|
||
overrides = repo_override_candidates(module_path, version)
|
||
candidate_urls.extend(overrides)
|
||
info_url = (go_info.get('vcs_url') or '').strip()
|
||
if info_url and info_url not in candidate_urls:
|
||
candidate_urls.append(info_url)
|
||
if fallback_url and fallback_url not in candidate_urls:
|
||
candidate_urls.append(fallback_url)
|
||
|
||
timestamp_hint = go_info.get('timestamp') or derive_timestamp_from_version(version)
|
||
subdir_hint = go_info.get('subdir', '') or fallback_subdir
|
||
ref_hint = go_info.get('vcs_ref', '')
|
||
|
||
for candidate in candidate_urls:
|
||
if not _url_allowed_for_module(module_path, candidate, version):
|
||
continue
|
||
commit_candidate, timestamp_candidate = _lookup_commit_for_version(candidate, version, ref_hint)
|
||
if commit_candidate:
|
||
final_timestamp = timestamp_candidate or timestamp_hint
|
||
update_metadata_cache(
|
||
module_path,
|
||
version,
|
||
candidate,
|
||
commit_candidate,
|
||
final_timestamp,
|
||
subdir_hint,
|
||
ref_hint,
|
||
dirty=True,
|
||
)
|
||
return {
|
||
"module_path": module_path,
|
||
"version": version,
|
||
"vcs_url": candidate,
|
||
"vcs_hash": commit_candidate,
|
||
"vcs_ref": ref_hint,
|
||
"timestamp": final_timestamp,
|
||
"subdir": subdir_hint,
|
||
}
|
||
return None
|
||
|
||
# Handle gopkg.in special case
|
||
if parts[0] == 'gopkg.in':
|
||
repo_candidates: List[str] = []
|
||
vanity_repo = query_vanity_url(module_path)
|
||
if vanity_repo:
|
||
repo_candidates.append(vanity_repo)
|
||
repo_candidates.extend(_candidate_gopkg_repos(module_path))
|
||
if cached and cached.get('vcs_url'):
|
||
repo_candidates.insert(0, cached['vcs_url'])
|
||
|
||
for vcs_url in repo_candidates:
|
||
if not vcs_url:
|
||
continue
|
||
commit, timestamp = _lookup_commit_for_version(vcs_url, version)
|
||
if commit:
|
||
resolved_timestamp = timestamp or derive_timestamp_from_version(version)
|
||
update_metadata_cache(module_path, version, vcs_url, commit, resolved_timestamp, '', '', dirty=True)
|
||
return {
|
||
"module_path": module_path,
|
||
"version": version,
|
||
"vcs_url": vcs_url,
|
||
"vcs_hash": commit,
|
||
"vcs_ref": "",
|
||
"timestamp": resolved_timestamp,
|
||
"subdir": "",
|
||
}
|
||
|
||
go_info = fetch_go_metadata()
|
||
result = resolve_with_go_info(go_info, '', '')
|
||
|
||
if result:
|
||
return result
|
||
|
||
if cached:
|
||
return cached
|
||
|
||
print(f" ⚠️ Unable to derive repository for gopkg.in path {module_path}@{version}")
|
||
return None
|
||
|
||
if len(parts) < 3:
|
||
go_info = fetch_go_metadata()
|
||
result = resolve_with_go_info(go_info, '', '')
|
||
if result:
|
||
return result
|
||
|
||
vanity_repo = query_vanity_url(module_path)
|
||
if vanity_repo:
|
||
commit, timestamp = _lookup_commit_for_version(vanity_repo, version)
|
||
if commit:
|
||
resolved_timestamp = timestamp or derive_timestamp_from_version(version)
|
||
update_metadata_cache(module_path, version, vanity_repo, commit, resolved_timestamp, '', '', dirty=True)
|
||
return {
|
||
"module_path": module_path,
|
||
"version": version,
|
||
"vcs_url": vanity_repo,
|
||
"vcs_hash": commit,
|
||
"vcs_ref": "",
|
||
"timestamp": resolved_timestamp,
|
||
"subdir": '',
|
||
}
|
||
|
||
if cached:
|
||
return cached
|
||
|
||
print(f" ⚠️ Unable to derive repository for {module_path}@{version}")
|
||
return None
|
||
else:
|
||
# Default calculation assuming 3-part paths (domain/org/repo)
|
||
base_repo = '/'.join(parts[:3])
|
||
|
||
# Calculate subdir from module path, but strip version suffixes (v2, v3, v11, etc.)
|
||
if len(parts) > 3:
|
||
subdir_parts = parts[3:]
|
||
# Remove trailing version suffix if present (e.g., v2, v3, v11)
|
||
if subdir_parts and subdir_parts[-1].startswith('v') and subdir_parts[-1][1:].isdigit():
|
||
subdir_parts = subdir_parts[:-1]
|
||
subdir = '/'.join(subdir_parts) if subdir_parts else ''
|
||
else:
|
||
subdir = ''
|
||
|
||
override_candidate = None
|
||
override_urls = repo_override_candidates(module_path, version)
|
||
if override_urls:
|
||
override_candidate = override_urls[0]
|
||
|
||
if override_candidate:
|
||
vcs_url = override_candidate
|
||
elif parts[0] == 'golang.org' and len(parts) >= 3 and parts[1] == 'x':
|
||
pkg_name = parts[2]
|
||
vcs_url = f"https://go.googlesource.com/{pkg_name}"
|
||
elif parts[0] == 'github.com' and len(parts) >= 3:
|
||
vcs_url = f"https://{base_repo}"
|
||
else:
|
||
vanity_repo = query_vanity_url(module_path)
|
||
if vanity_repo:
|
||
vcs_url = vanity_repo
|
||
subdir = _recalculate_subdir_from_vanity(vcs_url, parts, subdir)
|
||
else:
|
||
vcs_url = f"https://{base_repo}"
|
||
|
||
if cached and cached.get('vcs_url') and cached.get('vcs_hash'):
|
||
if vanity_repo:
|
||
adjusted_subdir = _recalculate_subdir_from_vanity(
|
||
cached['vcs_url'],
|
||
parts,
|
||
cached.get('subdir', ''),
|
||
)
|
||
if adjusted_subdir != cached.get('subdir', ''):
|
||
cached['subdir'] = adjusted_subdir
|
||
update_metadata_cache(
|
||
module_path,
|
||
version,
|
||
cached['vcs_url'],
|
||
cached['vcs_hash'],
|
||
cached['timestamp'],
|
||
adjusted_subdir,
|
||
cached.get('vcs_ref', ''),
|
||
dirty=True,
|
||
)
|
||
return cached
|
||
|
||
commit, timestamp = _lookup_commit_for_version(vcs_url, version)
|
||
if not commit:
|
||
go_info = fetch_go_metadata()
|
||
result = resolve_with_go_info(go_info, vcs_url, subdir)
|
||
if result:
|
||
return result
|
||
|
||
FAILED_MODULE_PATHS.add(module_path)
|
||
_record_skipped_module(module_path, version, "no repository metadata from go.sum/go list")
|
||
print(f" ⚠️ Unable to derive repository for {module_path}@{version}")
|
||
if cached and cached.get('vcs_hash'):
|
||
return cached
|
||
return None
|
||
|
||
if not _url_allowed_for_module(module_path, vcs_url, version):
|
||
FAILED_MODULE_PATHS.add(module_path)
|
||
_record_skipped_module(module_path, version, "resolved repo not allowed by override policy")
|
||
print(f" ⚠️ Resolved repo {vcs_url} for {module_path}@{version} not in override allowlist")
|
||
if cached and cached.get('vcs_hash'):
|
||
return cached
|
||
return None
|
||
|
||
resolved_timestamp = timestamp or derive_timestamp_from_version(version)
|
||
|
||
update_metadata_cache(module_path, version, vcs_url, commit, resolved_timestamp, subdir, '', dirty=True)
|
||
|
||
return {
|
||
"module_path": module_path,
|
||
"version": version,
|
||
"vcs_url": vcs_url,
|
||
"vcs_hash": commit,
|
||
"vcs_ref": "",
|
||
"timestamp": resolved_timestamp,
|
||
"subdir": subdir,
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# Utility Functions
|
||
# =============================================================================
|
||
|
||
def unescape_module_path(path: str) -> str:
|
||
"""
|
||
Unescape Go module paths that use ! for uppercase letters.
|
||
Example: github.com/!sirupsen/logrus -> github.com/Sirupsen/logrus
|
||
"""
|
||
import re
|
||
return re.sub(r'!([a-z])', lambda m: m.group(1).upper(), path)
|
||
|
||
def escape_module_path(path: str) -> str:
|
||
"""
|
||
Escape Go module paths by converting uppercase to !lowercase.
|
||
Example: github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus
|
||
"""
|
||
import re
|
||
return re.sub(r'([A-Z])', lambda m: '!' + m.group(1).lower(), path)
|
||
|
||
# =============================================================================
|
||
# Phase 1: Discovery
|
||
# =============================================================================
|
||
|
||
def parse_go_mod_requires(go_mod_path: Path) -> List[tuple]:
|
||
"""
|
||
Extract ALL module requirements from go.mod (direct + indirect).
|
||
|
||
This replaces the need for fast-fix-module.py by discovering all
|
||
transitive dependencies that Go needs.
|
||
|
||
Returns list of (module_path, version) tuples.
|
||
"""
|
||
modules = []
|
||
|
||
if not go_mod_path.exists():
|
||
print(f"Warning: go.mod not found at {go_mod_path}")
|
||
return modules
|
||
|
||
in_require = False
|
||
|
||
try:
|
||
with open(go_mod_path, 'r', encoding='utf-8') as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
|
||
# Start of require block
|
||
if line.startswith('require ('):
|
||
in_require = True
|
||
continue
|
||
|
||
# End of require block
|
||
if in_require and line == ')':
|
||
in_require = False
|
||
continue
|
||
|
||
# Single-line require
|
||
if line.startswith('require ') and '(' not in line:
|
||
parts = line.split()
|
||
if len(parts) >= 3: # require module version
|
||
module = parts[1]
|
||
version = parts[2]
|
||
modules.append((module, version))
|
||
continue
|
||
|
||
# Multi-line require block entry
|
||
if in_require and line:
|
||
# Skip comments
|
||
if line.startswith('//'):
|
||
continue
|
||
|
||
# Parse: "module version // indirect" or just "module version"
|
||
parts = line.split()
|
||
if len(parts) >= 2:
|
||
module = parts[0]
|
||
version = parts[1]
|
||
modules.append((module, version))
|
||
|
||
except Exception as e:
|
||
print(f"Error parsing go.mod: {e}")
|
||
|
||
return modules
|
||
|
||
|
||
def download_all_required_modules(source_dir: Path, gomodcache: Path) -> None:
|
||
"""
|
||
Download ALL modules required by go.mod (direct + indirect).
|
||
|
||
This ensures that indirect/transitive dependencies have .info files
|
||
in the GOMODCACHE, which allows discover_modules() to find them.
|
||
|
||
This is the key to replacing fast-fix-module.py - by downloading
|
||
everything upfront, we make all modules discoverable.
|
||
"""
|
||
go_mod_path = source_dir / "go.mod"
|
||
|
||
print(f"\n" + "=" * 70)
|
||
print("DISCOVERY ENHANCEMENT: Downloading all required modules")
|
||
print("=" * 70)
|
||
print(f"Parsing {go_mod_path}...")
|
||
|
||
required_modules = parse_go_mod_requires(go_mod_path)
|
||
|
||
if not required_modules:
|
||
print("Warning: No modules found in go.mod")
|
||
return
|
||
|
||
print(f"Found {len(required_modules)} total modules in go.mod (direct + indirect)")
|
||
|
||
# Set up environment for Go
|
||
env = os.environ.copy()
|
||
env['GOMODCACHE'] = str(gomodcache)
|
||
env['GOPROXY'] = 'https://proxy.golang.org'
|
||
|
||
# Download each module to ensure .info files exist
|
||
success_count = 0
|
||
skip_count = 0
|
||
fail_count = 0
|
||
|
||
for module_path, version in required_modules:
|
||
# Check if .info file already exists
|
||
escaped_module = escape_module_path(module_path)
|
||
escaped_version = escape_module_path(version)
|
||
info_path = gomodcache / "cache" / "download" / escaped_module / "@v" / f"{escaped_version}.info"
|
||
|
||
if info_path.exists():
|
||
skip_count += 1
|
||
continue
|
||
|
||
# Download to get .info file with VCS metadata
|
||
try:
|
||
result = subprocess.run(
|
||
['go', 'mod', 'download', f'{module_path}@{version}'],
|
||
cwd=source_dir,
|
||
env=env,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30
|
||
)
|
||
|
||
if result.returncode == 0:
|
||
success_count += 1
|
||
else:
|
||
fail_count += 1
|
||
if "no matching versions" not in result.stderr:
|
||
print(f" Warning: Failed to download {module_path}@{version}: {result.stderr.strip()[:100]}")
|
||
|
||
except subprocess.TimeoutExpired:
|
||
fail_count += 1
|
||
print(f" Warning: Timeout downloading {module_path}@{version}")
|
||
except Exception as e:
|
||
fail_count += 1
|
||
print(f" Warning: Error downloading {module_path}@{version}: {e}")
|
||
|
||
print(f"\nDownload results:")
|
||
print(f" ✓ {success_count} modules downloaded")
|
||
print(f" ⊙ {skip_count} modules already cached")
|
||
print(f" ✗ {fail_count} modules failed")
|
||
print(f" → Total: {len(required_modules)} modules")
|
||
|
||
|
||
def discover_modules(source_dir: Path, gomodcache: Optional[str] = None) -> List[Dict]:
|
||
"""
|
||
Phase 1: Discovery
|
||
|
||
Let Go download modules to discover correct paths and metadata.
|
||
This is ONLY for discovery - we build from git sources.
|
||
|
||
Returns list of modules with:
|
||
- module_path: CORRECT path from filesystem (no /v3 stripping!)
|
||
- version: Module version
|
||
- vcs_url: Git repository URL
|
||
- vcs_hash: Git commit hash
|
||
- vcs_ref: Git reference (tag/branch)
|
||
- timestamp: Commit timestamp
|
||
- subdir: Subdirectory within repo (for submodules)
|
||
"""
|
||
global CURRENT_GOMODCACHE
|
||
print("\n" + "=" * 70)
|
||
print("PHASE 1: DISCOVERY - Using Go to discover module metadata")
|
||
print("=" * 70)
|
||
|
||
# Create temporary or use provided GOMODCACHE
|
||
if gomodcache:
|
||
temp_cache = Path(gomodcache)
|
||
print(f"Using existing GOMODCACHE: {temp_cache}")
|
||
cleanup_cache = False
|
||
else:
|
||
temp_cache = Path(tempfile.mkdtemp(prefix="go-discover-"))
|
||
print(f"Created temporary cache: {temp_cache}")
|
||
cleanup_cache = True
|
||
CURRENT_GOMODCACHE = str(temp_cache)
|
||
|
||
try:
|
||
ensure_path_is_writable(temp_cache)
|
||
|
||
# Set up environment for Go
|
||
env = os.environ.copy()
|
||
env['GOMODCACHE'] = str(temp_cache)
|
||
env['GOPROXY'] = 'https://proxy.golang.org'
|
||
|
||
print(f"\nDownloading modules to discover metadata...")
|
||
print(f"Source: {source_dir}")
|
||
|
||
# Let Go download everything (initial discovery)
|
||
result = subprocess.run(
|
||
['go', 'mod', 'download'],
|
||
cwd=source_dir,
|
||
env=env,
|
||
capture_output=True,
|
||
text=True
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
print(f"Warning: go mod download had errors:\n{result.stderr}")
|
||
# Continue anyway - some modules may have been downloaded
|
||
|
||
# PRIORITY #2 FIX: Download ALL modules from go.mod (direct + indirect)
|
||
# This replaces the need for fast-fix-module.py by ensuring all
|
||
# transitive dependencies have .info files for discovery
|
||
download_all_required_modules(source_dir, temp_cache)
|
||
|
||
# Walk filesystem to discover what Go created
|
||
modules = []
|
||
download_dir = temp_cache / "cache" / "download"
|
||
|
||
if not download_dir.exists():
|
||
print(f"Error: Download directory not found: {download_dir}")
|
||
return []
|
||
|
||
print(f"\nScanning {download_dir} for modules...")
|
||
|
||
for dirpath, _, filenames in os.walk(download_dir):
|
||
path_parts = Path(dirpath).relative_to(download_dir).parts
|
||
|
||
# Look for @v directories
|
||
if not path_parts or path_parts[-1] != '@v':
|
||
continue
|
||
|
||
# Module path is everything before @v
|
||
module_path = '/'.join(path_parts[:-1])
|
||
module_path = unescape_module_path(module_path) # Unescape !-encoding
|
||
|
||
# Process each .info file
|
||
for filename in filenames:
|
||
if not filename.endswith('.info'):
|
||
continue
|
||
|
||
version = filename[:-5] # Strip .info extension
|
||
info_path = Path(dirpath) / filename
|
||
|
||
try:
|
||
# Read metadata from .info file
|
||
with open(info_path) as f:
|
||
info = json.load(f)
|
||
|
||
# Extract VCS information
|
||
origin = info.get('Origin', {})
|
||
vcs_url = origin.get('URL')
|
||
vcs_hash = origin.get('Hash')
|
||
vcs_ref = origin.get('Ref', '')
|
||
subdir = origin.get('Subdir', '')
|
||
|
||
if not vcs_url or not vcs_hash:
|
||
# Try to refresh cache entry and ask Go directly for metadata.
|
||
go_mod_download(module_path, version)
|
||
|
||
# Reload .info in case go mod download updated it.
|
||
try:
|
||
with open(info_path) as f:
|
||
info = json.load(f)
|
||
origin = info.get('Origin', {})
|
||
vcs_url = origin.get('URL')
|
||
vcs_hash = origin.get('Hash')
|
||
vcs_ref = origin.get('Ref', '')
|
||
subdir = origin.get('Subdir', '')
|
||
except Exception:
|
||
pass
|
||
|
||
if not vcs_url or not vcs_hash:
|
||
go_info = query_module_via_go_list(module_path, version)
|
||
if go_info:
|
||
vcs_url = go_info.get('vcs_url')
|
||
vcs_hash = go_info.get('commit')
|
||
subdir = go_info.get('subdir', subdir)
|
||
origin_time = go_info.get('timestamp', '')
|
||
if origin_time:
|
||
info['Time'] = origin_time
|
||
|
||
if not vcs_url or not vcs_hash:
|
||
print(f" ⚠️ Skipping {module_path}@{version}: No VCS info")
|
||
continue
|
||
|
||
overrides = repo_override_candidates(module_path, version)
|
||
if overrides:
|
||
vcs_url = overrides[0]
|
||
|
||
# BitBake requires full 40-character commit hashes
|
||
if len(vcs_hash) != 40:
|
||
print(f" ⚠️ Skipping {module_path}@{version}: Short commit hash ({vcs_hash})")
|
||
continue
|
||
|
||
# PROACTIVE dangling commit detection and correction
|
||
# Check if commit is BitBake-fetchable BEFORE expensive verification
|
||
# BitBake's nobranch=1 requires commits to be branch/tag HEADs, not dangling commits
|
||
if VERIFY_ENABLED and vcs_ref and vcs_ref.startswith("refs/"):
|
||
if not is_commit_bitbake_fetchable(vcs_url, vcs_hash, vcs_ref):
|
||
print(f" ⚠️ DANGLING COMMIT: {module_path}@{version} commit {vcs_hash[:12]} not a branch/tag HEAD")
|
||
|
||
# Try to correct by dereferencing the ref
|
||
corrected_hash = correct_commit_hash_from_ref(vcs_url, vcs_hash, vcs_ref)
|
||
if corrected_hash:
|
||
print(f" ✓ Corrected hash by dereferencing {vcs_ref}: {vcs_hash[:12]} → {corrected_hash[:12]}")
|
||
vcs_hash = corrected_hash
|
||
else:
|
||
print(f" ❌ Could not auto-correct dangling commit")
|
||
# Continue anyway - verification will catch if it's truly unfetchable
|
||
|
||
# Validate commit exists in repository (detect force-pushed tags)
|
||
# If verification is enabled, check that the commit from .info file
|
||
# actually exists in the repository. If not, refresh from Go proxy.
|
||
commit_verified = VERIFY_ENABLED and verify_commit_accessible(vcs_url, vcs_hash, vcs_ref, version, origin_time)
|
||
|
||
# Apply fallback commit if verification used one (for orphaned commits)
|
||
if commit_verified and VERIFY_ENABLED:
|
||
vcs_hash = get_actual_commit(vcs_url, vcs_hash)
|
||
|
||
if VERIFY_ENABLED and not commit_verified:
|
||
print(f" ⚠️ STALE CACHE: {module_path}@{version} commit {vcs_hash[:12]} not found in {vcs_url}")
|
||
|
||
# Last resort: Try proxy refresh (this shouldn't happen if dangling check worked)
|
||
corrected_hash = correct_commit_hash_from_ref(vcs_url, vcs_hash, vcs_ref)
|
||
if corrected_hash:
|
||
print(f" ✓ Corrected hash by dereferencing {vcs_ref}: {vcs_hash[:12]} → {corrected_hash[:12]}")
|
||
vcs_hash = corrected_hash
|
||
# Verify the corrected hash is accessible
|
||
if verify_commit_accessible(vcs_url, vcs_hash, vcs_ref, version, origin_time):
|
||
# Successfully corrected! Continue with this module (skip proxy refresh)
|
||
commit_verified = True
|
||
else:
|
||
print(f" ❌ Even corrected commit not accessible")
|
||
|
||
# If still not verified after correction attempt, try proxy refresh
|
||
if not commit_verified:
|
||
# Check if module is actually needed before attempting refresh
|
||
if not is_module_actually_needed(module_path, CURRENT_SOURCE_DIR):
|
||
print(f" ℹ️ Module not needed by main module (indirect-only), skipping")
|
||
print(f" (Verified via 'go mod why {module_path}')")
|
||
continue
|
||
|
||
print(f" Attempting to refresh from Go proxy...")
|
||
|
||
# Delete stale .info file to force re-download
|
||
try:
|
||
info_path.unlink()
|
||
print(f" Deleted stale .info file")
|
||
except Exception as e:
|
||
print(f" Warning: Could not delete .info file: {e}")
|
||
|
||
# Re-download from Go proxy to get current commit
|
||
try:
|
||
go_mod_download(module_path, version)
|
||
|
||
# Reload .info file with fresh data
|
||
if info_path.exists():
|
||
with open(info_path) as f:
|
||
info = json.load(f)
|
||
origin = info.get('Origin', {})
|
||
new_vcs_hash = origin.get('Hash')
|
||
|
||
if new_vcs_hash and new_vcs_hash != vcs_hash:
|
||
print(f" ✓ Refreshed: {vcs_hash[:12]} → {new_vcs_hash[:12]}")
|
||
vcs_hash = new_vcs_hash
|
||
vcs_ref = origin.get('Ref', vcs_ref)
|
||
|
||
# Verify new commit exists
|
||
if not verify_commit_accessible(vcs_url, vcs_hash, vcs_ref, version, origin.get('Time', '')):
|
||
print(f" ❌ Even refreshed commit not accessible")
|
||
# Last resort: check if it's actually needed
|
||
if not is_module_actually_needed(module_path, CURRENT_SOURCE_DIR):
|
||
print(f" ℹ️ Module not needed anyway, skipping")
|
||
continue
|
||
else:
|
||
print(f" ❌ Module IS needed but commit unavailable")
|
||
print(f" This module cannot be built from git sources")
|
||
continue
|
||
else:
|
||
print(f" ⚠️ Go proxy returned same commit (permanently deleted)")
|
||
# Check if it's actually needed
|
||
if not is_module_actually_needed(module_path, CURRENT_SOURCE_DIR):
|
||
print(f" ℹ️ Module not needed by main module, skipping")
|
||
continue
|
||
else:
|
||
print(f" ❌ Module IS needed but commit permanently deleted")
|
||
print(f" Consider using gomod:// fetcher for this module")
|
||
continue
|
||
else:
|
||
print(f" ❌ Re-download failed, skipping module")
|
||
continue
|
||
except Exception as e:
|
||
print(f" ❌ Refresh failed: {e}")
|
||
continue
|
||
|
||
DOWNLOADED_MODULES.add((module_path, version))
|
||
modules.append({
|
||
'module_path': module_path,
|
||
'version': version,
|
||
'vcs_url': vcs_url,
|
||
'vcs_hash': vcs_hash,
|
||
'vcs_ref': vcs_ref,
|
||
'timestamp': info.get('Time', ''),
|
||
'subdir': subdir or '',
|
||
})
|
||
|
||
print(f" ✓ {module_path}@{version}")
|
||
|
||
except Exception as e:
|
||
print(f" ✗ Error processing {info_path}: {e}")
|
||
continue
|
||
|
||
print(f"\nDiscovered {len(modules)} modules with VCS info")
|
||
|
||
# FIX: Synthesize entries for +incompatible versions that lack VCS data
|
||
# These are pre-v2 versions of modules that later adopted semantic import versioning (/v2, /v3, etc.)
|
||
# The GOMODCACHE has .info files for them but without Origin data (old proxy cache)
|
||
# Strategy: For each versioned module path (e.g., foo/v3), check if a base path version
|
||
# with +incompatible exists in GOMODCACHE and lacks VCS data. If so, synthesize an entry.
|
||
#
|
||
# NOTE (2025-11-28): This code overlaps with Fix #29 in extract-native-modules.py, which
|
||
# now uses derive_vcs_info() to handle +incompatible modules at discovery time. Fix #29
|
||
# is more complete because it handles ALL +incompatible modules directly from their path,
|
||
# not just those with a corresponding /vN version. This code is kept as a fallback for
|
||
# cases where extract-native-modules.py wasn't used (e.g., legacy workflows).
|
||
print("\nSynthesizing entries for +incompatible versions without VCS data...")
|
||
synthesized_count = 0
|
||
|
||
# Build a map of module_path -> vcs_url for discovered modules
|
||
module_vcs_map: Dict[str, str] = {}
|
||
for mod in modules:
|
||
module_vcs_map[mod['module_path']] = mod['vcs_url']
|
||
|
||
# For each module with a versioned path suffix (/v2, /v3, etc.), check for base path incompatible versions
|
||
for mod in list(modules): # Iterate over copy since we'll append to modules
|
||
module_path = mod['module_path']
|
||
vcs_url = mod['vcs_url']
|
||
|
||
# Check if this module has a version suffix (/v2, /v3, etc.)
|
||
version_match = re.search(r'/v(\d+)$', module_path)
|
||
if not version_match:
|
||
continue
|
||
|
||
# Extract base path (without /vN suffix)
|
||
base_path = module_path[:module_path.rfind('/v')]
|
||
|
||
# Check if we already discovered the base path
|
||
if base_path in module_vcs_map:
|
||
continue # Base path already has VCS data, no synthesis needed
|
||
|
||
# Look for +incompatible versions of the base path in GOMODCACHE
|
||
# Note: GOMODCACHE uses raw paths as directory names (not escaped)
|
||
base_path_dir = download_dir / base_path / '@v'
|
||
|
||
if not base_path_dir.exists():
|
||
continue
|
||
|
||
# Scan for .info files with +incompatible versions
|
||
for info_file in base_path_dir.glob('*.info'):
|
||
version = info_file.stem
|
||
|
||
if not version.endswith('+incompatible'):
|
||
continue
|
||
|
||
# Read the .info file to check if it lacks VCS data
|
||
try:
|
||
with open(info_file) as f:
|
||
info = json.load(f)
|
||
|
||
# If it already has Origin data, skip it
|
||
if 'Origin' in info and info['Origin'].get('URL') and info['Origin'].get('Hash'):
|
||
continue
|
||
|
||
# This +incompatible version lacks VCS data - synthesize an entry
|
||
# Extract the tag name from version (e.g., v2.16.0+incompatible -> v2.16.0)
|
||
tag_version = version.replace('+incompatible', '')
|
||
tag_ref = f"refs/tags/{tag_version}"
|
||
|
||
# Use git ls-remote to find the commit for this tag
|
||
tag_commit = git_ls_remote(vcs_url, tag_ref)
|
||
|
||
if not tag_commit:
|
||
print(f" ⚠️ Could not find tag {tag_ref} for {base_path}@{version}")
|
||
continue
|
||
|
||
# Synthesize a module entry using data from the versioned path
|
||
synthesized_module = {
|
||
'module_path': base_path, # Use BASE path (without /vN)
|
||
'version': version,
|
||
'vcs_url': vcs_url,
|
||
'vcs_hash': tag_commit,
|
||
'vcs_ref': tag_ref,
|
||
'timestamp': info.get('Time', ''),
|
||
'subdir': '',
|
||
}
|
||
|
||
modules.append(synthesized_module)
|
||
module_vcs_map[base_path] = vcs_url # Prevent duplicate synthesis
|
||
synthesized_count += 1
|
||
|
||
print(f" ✓ Synthesized {base_path}@{version} (from {module_path} VCS data)")
|
||
print(f" VCS: {vcs_url}")
|
||
print(f" Commit: {tag_commit[:12]} (tag {tag_version})")
|
||
|
||
except Exception as e:
|
||
print(f" ⚠️ Error synthesizing {base_path}@{version}: {e}")
|
||
continue
|
||
|
||
if synthesized_count > 0:
|
||
print(f"\nSynthesized {synthesized_count} +incompatible module entries")
|
||
else:
|
||
print("No +incompatible versions needed synthesis")
|
||
|
||
print(f"\nTotal modules after synthesis: {len(modules)}")
|
||
return modules
|
||
|
||
finally:
|
||
# Defer cleanup of temporary caches until the end of execution
|
||
if cleanup_cache and temp_cache.exists():
|
||
TEMP_GOMODCACHES.append(temp_cache)
|
||
|
||
# =============================================================================
|
||
# Phase 2: Recipe Generation
|
||
# =============================================================================
|
||
|
||
def generate_recipe(modules: List[Dict], source_dir: Path, output_dir: Optional[Path],
|
||
git_repo: str, git_ref: str, validate_only: bool = False,
|
||
debug_limit: Optional[int] = None, skip_verify: bool = False,
|
||
verify_jobs: int = 10) -> bool:
|
||
"""
|
||
Phase 2: Recipe Generation
|
||
|
||
Generate BitBake recipe with git:// SRC_URI entries.
|
||
No file:// entries - we'll build cache from git during do_create_module_cache.
|
||
|
||
Creates:
|
||
- go-mod-git.inc: SRC_URI with git:// entries
|
||
- go-mod-cache.inc: BitBake task to build module cache
|
||
"""
|
||
print("\n" + "=" * 70)
|
||
phase_label = "VALIDATION" if validate_only else "RECIPE GENERATION"
|
||
print(f"PHASE 2: {phase_label} - {('commit verification' if validate_only else 'Creating BitBake recipe files')}")
|
||
print("=" * 70)
|
||
|
||
src_uri_entries = []
|
||
modules_data = []
|
||
vcs_repos: Dict[str, Dict] = {}
|
||
|
||
def repo_key_for_url(url: str) -> str:
|
||
return hashlib.sha256(f"git3:{url}".encode()).hexdigest()
|
||
|
||
def commit_cache_key(repo_key: str, commit: str) -> str:
|
||
return hashlib.sha256(f"{repo_key}:{commit}".encode()).hexdigest()
|
||
|
||
unresolved_commits: List[Tuple[str, str, str, str, str]] = []
|
||
|
||
total_modules = len(modules)
|
||
if debug_limit is not None:
|
||
print(f"\n⚙️ Debug limit active: validating first {debug_limit} modules (total list size {total_modules})")
|
||
|
||
if skip_verify:
|
||
print(f"\n⚙️ Skipping verification (--skip-verify enabled)")
|
||
|
||
# First pass: Build repo structure without verification
|
||
for index, module in enumerate(modules, start=1):
|
||
vcs_url = module['vcs_url']
|
||
commit_hash = module['vcs_hash']
|
||
|
||
repo_key = repo_key_for_url(vcs_url)
|
||
repo_info = vcs_repos.setdefault(
|
||
repo_key,
|
||
{
|
||
'url': vcs_url,
|
||
'commits': {}, # commit hash -> commit metadata
|
||
},
|
||
)
|
||
|
||
if commit_hash not in repo_info['commits']:
|
||
commit_sha = commit_cache_key(repo_key, commit_hash)
|
||
repo_info['commits'][commit_hash] = {
|
||
'commit_sha': commit_sha,
|
||
'modules': [],
|
||
}
|
||
else:
|
||
commit_sha = repo_info['commits'][commit_hash]['commit_sha']
|
||
|
||
ref_hint = module.get('vcs_ref', '')
|
||
if ref_hint and not _ref_points_to_commit(vcs_url, ref_hint, commit_hash):
|
||
ref_hint = ''
|
||
|
||
entry = repo_info['commits'][commit_hash]
|
||
entry['modules'].append(module)
|
||
if ref_hint:
|
||
entry['ref_hint'] = ref_hint
|
||
|
||
module['repo_key'] = repo_key
|
||
module['commit_sha'] = commit_sha
|
||
|
||
# Second pass: Verify commits (parallel or sequential) with auto-correction
|
||
# PHASE MERGE: This now includes force-pushed tag detection and auto-correction
|
||
global VERIFY_CORRECTIONS_APPLIED
|
||
if not skip_verify:
|
||
print(f"\n⚙️ Verifying {total_modules} commits with {verify_jobs} parallel jobs")
|
||
corrected_modules = [] # Track corrections for reporting
|
||
|
||
def verify_module(module_info):
|
||
index, module = module_info
|
||
vcs_url = module['vcs_url']
|
||
commit_hash = module['vcs_hash']
|
||
ref_hint = module.get('vcs_ref', '')
|
||
|
||
print(f" • verifying [{index}/{total_modules}] {module['module_path']}@{module['version']} -> {commit_hash[:12]}")
|
||
|
||
# Verify commit is accessible
|
||
if not verify_commit_accessible(vcs_url, commit_hash, ref_hint, module.get('version', ''), module.get('timestamp', '')):
|
||
# PHASE MERGE: If verification fails and we have a ref, try auto-correction
|
||
if ref_hint and ref_hint.startswith("refs/"):
|
||
corrected_hash = correct_commit_hash_from_ref(vcs_url, commit_hash, ref_hint)
|
||
if corrected_hash and corrected_hash != commit_hash:
|
||
print(f" ✓ Auto-corrected: {commit_hash[:12]} → {corrected_hash[:12]} (force-pushed tag)")
|
||
module['vcs_hash'] = corrected_hash
|
||
|
||
# Update repo_info dict to use the new hash as key
|
||
repo_key = module['repo_key']
|
||
if commit_hash in vcs_repos[repo_key]['commits']:
|
||
# Move the entry from old hash to new hash
|
||
vcs_repos[repo_key]['commits'][corrected_hash] = vcs_repos[repo_key]['commits'].pop(commit_hash)
|
||
|
||
return ('corrected', module['module_path'], module['version'], commit_hash, corrected_hash)
|
||
else:
|
||
# Could not correct - treat as failure
|
||
return ('failed', module['module_path'], module['version'], commit_hash, vcs_url, ref_hint)
|
||
else:
|
||
# No ref to dereference - genuine failure
|
||
return ('failed', module['module_path'], module['version'], commit_hash, vcs_url, ref_hint)
|
||
else:
|
||
# Verification succeeded - apply fallback commit if one was used
|
||
actual_hash = get_actual_commit(vcs_url, commit_hash)
|
||
if actual_hash != commit_hash:
|
||
print(f" ✓ Applied fallback: {commit_hash[:12]} → {actual_hash[:12]} (orphaned commit)")
|
||
module['vcs_hash'] = actual_hash
|
||
|
||
# Update repo_info dict to use the new hash as key
|
||
repo_key = module['repo_key']
|
||
if commit_hash in vcs_repos[repo_key]['commits']:
|
||
# Move the entry from old hash to new hash
|
||
vcs_repos[repo_key]['commits'][actual_hash] = vcs_repos[repo_key]['commits'].pop(commit_hash)
|
||
|
||
return ('corrected', module['module_path'], module['version'], commit_hash, actual_hash)
|
||
return None
|
||
|
||
if verify_jobs > 0:
|
||
# Parallel verification
|
||
with concurrent.futures.ThreadPoolExecutor(max_workers=verify_jobs) as executor:
|
||
results = list(executor.map(verify_module, enumerate(modules, start=1)))
|
||
else:
|
||
# Sequential verification (--verify-jobs=0)
|
||
results = []
|
||
for index, module in enumerate(modules, start=1):
|
||
result = verify_module((index, module))
|
||
if result is not None:
|
||
results.append(result)
|
||
|
||
# Save verification cache every 50 modules
|
||
if index % 50 == 0:
|
||
save_verify_commit_cache(force=True)
|
||
print(f" 💾 Saved verification cache at {index}/{total_modules}")
|
||
|
||
# Separate corrected vs failed results
|
||
corrected_results = [r for r in results if r and r[0] == 'corrected']
|
||
failed_results = [r for r in results if r and r[0] == 'failed']
|
||
|
||
# Apply corrections back to modules list (needed for parallel execution)
|
||
if corrected_results:
|
||
VERIFY_CORRECTIONS_APPLIED = True
|
||
print(f"\n✓ Auto-corrected {len(corrected_results)} force-pushed tags:")
|
||
for _, module_path, version, old_hash, new_hash in corrected_results:
|
||
print(f" • {module_path}@{version}: {old_hash[:12]} → {new_hash[:12]}")
|
||
|
||
# Find and update the module in the main list
|
||
for module in modules:
|
||
if module['module_path'] == module_path and module['version'] == version:
|
||
module['vcs_hash'] = new_hash
|
||
|
||
# Also update the vcs_repos dict
|
||
repo_key = module['repo_key']
|
||
if old_hash in vcs_repos[repo_key]['commits']:
|
||
vcs_repos[repo_key]['commits'][new_hash] = vcs_repos[repo_key]['commits'].pop(old_hash)
|
||
break
|
||
else:
|
||
# Verification skipped - no failed results
|
||
failed_results = []
|
||
|
||
print(f"\nFound {len(vcs_repos)} unique git repositories")
|
||
print(f"Supporting {len(modules)} modules")
|
||
|
||
if failed_results:
|
||
print("\n❌ Unable to verify the following module commits against their repositories:")
|
||
for _, module_path, version, commit_hash, vcs_url, ref_hint in failed_results:
|
||
print(f" - {module_path}@{version} ({commit_hash})")
|
||
hint = f" {ref_hint}" if ref_hint else ""
|
||
print(f" try: git fetch --depth=1 {vcs_url}{hint} {commit_hash}")
|
||
print(f" cache: mark reachable via --inject-commit '{vcs_url} {commit_hash}'")
|
||
print(f" repo : override via --set-repo {module_path}@{version} {vcs_url}")
|
||
print("Aborting to prevent emitting invalid SRCREVs.")
|
||
return False
|
||
|
||
if validate_only:
|
||
print("\n✅ Validation complete - all commits are reachable upstream")
|
||
return True
|
||
|
||
if output_dir is None:
|
||
print("❌ Internal error: output directory missing for recipe generation")
|
||
return False
|
||
|
||
# Generate SRC_URI entries for each repo/commit combination
|
||
for repo_key, repo_info in vcs_repos.items():
|
||
git_url = repo_info['url']
|
||
|
||
if git_url.startswith('https://'):
|
||
git_url_bb = 'git://' + git_url[8:]
|
||
protocol = 'https'
|
||
elif git_url.startswith('http://'):
|
||
git_url_bb = 'git://' + git_url[7:]
|
||
protocol = 'http'
|
||
else:
|
||
git_url_bb = git_url
|
||
protocol = 'https'
|
||
|
||
for idx, (commit_hash, commit_info) in enumerate(sorted(repo_info['commits'].items())):
|
||
fetch_name = f"git_{repo_key[:8]}_{idx}"
|
||
destsuffix = f"vcs_cache/{commit_info['commit_sha']}"
|
||
|
||
# Use branch name from ref_hint when available (more reliable than nobranch=1)
|
||
# ref_hint is like "refs/tags/v1.9.3" or "refs/heads/main"
|
||
ref_hint = commit_info.get('ref_hint', '')
|
||
if ref_hint:
|
||
shallow_param = ';shallow=1'
|
||
# For tags, use nobranch=1 since the commit may not be on a branch head
|
||
# For branches, use the branch name directly
|
||
if ref_hint.startswith('refs/tags/'):
|
||
# Tags: BitBake can fetch tagged commits with nobranch=1
|
||
branch_param = ';nobranch=1'
|
||
elif ref_hint.startswith('refs/heads/'):
|
||
# Branches: use the actual branch name
|
||
branch_name = ref_hint[11:] # Strip "refs/heads/"
|
||
branch_param = f';branch={branch_name}'
|
||
else:
|
||
branch_param = ';nobranch=1'
|
||
else:
|
||
# For pseudo-versions (no ref_hint), check if we detected a branch
|
||
detected_branch = VERIFY_DETECTED_BRANCHES.get((git_url, commit_hash))
|
||
if detected_branch:
|
||
# Use the detected branch name instead of nobranch=1
|
||
shallow_param = ''
|
||
branch_param = f';branch={detected_branch}'
|
||
print(f" Using detected branch: {detected_branch} for {commit_hash[:12]}")
|
||
else:
|
||
# No ref and no detected branch - use nobranch=1
|
||
# This should only happen for genuine orphaned commits that couldn't be fixed
|
||
shallow_param = ''
|
||
branch_param = ';nobranch=1'
|
||
|
||
src_uri_entries.append(
|
||
f'{git_url_bb};protocol={protocol}{branch_param}{shallow_param};'
|
||
f'rev={commit_hash};'
|
||
f'name={fetch_name};'
|
||
f'destsuffix={destsuffix}'
|
||
)
|
||
|
||
commit_info['fetch_name'] = fetch_name
|
||
commit_info['destsuffix'] = destsuffix
|
||
|
||
if len(repo_info['commits']) == 1:
|
||
print(f" {fetch_name}: {repo_info['url'][:60]}...")
|
||
else:
|
||
print(f" {fetch_name}: {repo_info['url'][:60]}... (commit {commit_hash[:12]})")
|
||
|
||
# Prepare modules data for do_create_module_cache
|
||
for module in modules:
|
||
repo_key = module['repo_key']
|
||
commit_hash = module['vcs_hash']
|
||
commit_info = vcs_repos[repo_key]['commits'][commit_hash]
|
||
|
||
update_metadata_cache(
|
||
module['module_path'],
|
||
module['version'],
|
||
module['vcs_url'],
|
||
module['vcs_hash'],
|
||
module.get('timestamp', ''),
|
||
module.get('subdir', ''),
|
||
module.get('vcs_ref', ''),
|
||
dirty=True,
|
||
)
|
||
|
||
# DEBUG: Track server/v3 module
|
||
if 'server/v3' in module['module_path']:
|
||
print(f"\n🔍 DEBUG server/v3: Adding to modules_data")
|
||
print(f" module_path: {module['module_path']}")
|
||
print(f" subdir: '{module.get('subdir', '')}' (from module dict)")
|
||
print(f" timestamp: {module['timestamp']}")
|
||
print(f" vcs_hash: {module['vcs_hash']}")
|
||
|
||
modules_data.append({
|
||
'module': module['module_path'],
|
||
'version': module['version'],
|
||
'vcs_hash': commit_info['commit_sha'],
|
||
'timestamp': module['timestamp'],
|
||
'subdir': module.get('subdir', ''),
|
||
'vcs_ref': module.get('vcs_ref', ''),
|
||
})
|
||
|
||
# Write go-mod-git.inc
|
||
git_inc_path = output_dir / "go-mod-git.inc"
|
||
print(f"\nWriting {git_inc_path}")
|
||
|
||
with open(git_inc_path, 'w') as f:
|
||
f.write("# Generated by oe-go-mod-fetcher.py v" + VERSION + "\n")
|
||
f.write("# Git repositories for Go module dependencies\n\n")
|
||
for entry in src_uri_entries:
|
||
f.write(f'SRC_URI += "{entry}"\n')
|
||
f.write('\n')
|
||
|
||
# Collect all tag references for shallow cloning
|
||
# BB_GIT_SHALLOW_EXTRA_REFS ensures these refs are included in shallow clones
|
||
tag_refs = set()
|
||
for module in modules:
|
||
vcs_ref = module.get('vcs_ref', '')
|
||
if vcs_ref and 'refs/tags/' in vcs_ref:
|
||
tag_refs.add(vcs_ref)
|
||
|
||
if tag_refs:
|
||
f.write("# Tag references for shallow cloning\n")
|
||
f.write("# Ensures shallow clones include all necessary tags\n")
|
||
f.write("BB_GIT_SHALLOW_EXTRA_REFS = \"\\\n")
|
||
for tag_ref in sorted(tag_refs):
|
||
f.write(f" {tag_ref} \\\n")
|
||
f.write('"\n')
|
||
|
||
# Note: SRCREV_* variables are not needed since rev= is embedded directly in SRC_URI
|
||
|
||
# Write go-mod-cache.inc
|
||
cache_inc_path = output_dir / "go-mod-cache.inc"
|
||
print(f"Writing {cache_inc_path}")
|
||
|
||
with open(cache_inc_path, 'w') as f:
|
||
f.write("# Generated by oe-go-mod-fetcher.py v" + VERSION + "\n")
|
||
f.write("# Module cache data for Go dependencies\n")
|
||
f.write("#\n")
|
||
f.write("# This file contains recipe-specific module metadata.\n")
|
||
f.write("# The task implementations are in go-mod-vcs.bbclass.\n\n")
|
||
|
||
# Inherit the bbclass that provides the task implementations
|
||
f.write("inherit go-mod-vcs\n\n")
|
||
|
||
# Write modules data as JSON - one module per line for readability
|
||
f.write("# Module metadata for cache building (one module per line)\n")
|
||
f.write("GO_MODULE_CACHE_DATA = '[\\\n")
|
||
for i, mod in enumerate(modules_data):
|
||
line = json.dumps(mod, separators=(',', ':'))
|
||
if i < len(modules_data) - 1:
|
||
f.write(f"{line},\\\n")
|
||
else:
|
||
f.write(f"{line}\\\n")
|
||
f.write("]'\n")
|
||
|
||
print(f"\n✅ Generated recipe files:")
|
||
print(f" {git_inc_path}")
|
||
print(f" {cache_inc_path}")
|
||
print(f"\nTo use these files, add to your recipe:")
|
||
print(f" require go-mod-git.inc")
|
||
print(f" require go-mod-cache.inc")
|
||
|
||
return True
|
||
|
||
# =============================================================================
|
||
# Discovered Module Loading (Bootstrap Strategy)
|
||
# =============================================================================
|
||
|
||
def load_discovered_modules(discovered_modules_path: Path) -> Optional[List[Dict]]:
|
||
"""
|
||
Load pre-discovered module metadata from BitBake discovery build.
|
||
|
||
This implements the bootstrap strategy where a BitBake discovery build has
|
||
already run 'go mod download' (via do_discover_modules task) and
|
||
extract-native-modules.py has extracted complete metadata from the GOMODCACHE.
|
||
|
||
Args:
|
||
discovered_modules_path: Path to JSON file with module metadata
|
||
|
||
Returns:
|
||
List of module dicts with complete VCS info, or None if load fails
|
||
"""
|
||
if not discovered_modules_path.exists():
|
||
print(f"❌ Discovered modules file not found: {discovered_modules_path}")
|
||
return None
|
||
|
||
try:
|
||
with open(discovered_modules_path) as f:
|
||
modules = json.load(f)
|
||
|
||
if not isinstance(modules, list):
|
||
print(f"❌ Invalid discovered modules file format (expected list, got {type(modules).__name__})")
|
||
return None
|
||
|
||
print(f"✓ Loaded {len(modules)} modules from discovery metadata")
|
||
print(f" File: {discovered_modules_path}")
|
||
|
||
# Validate module format
|
||
required_fields = ['module_path', 'version', 'vcs_url', 'vcs_hash']
|
||
for i, module in enumerate(modules):
|
||
if not isinstance(module, dict):
|
||
print(f"❌ Module {i} is not a dict: {module}")
|
||
return None
|
||
for field in required_fields:
|
||
if field not in module:
|
||
print(f"❌ Module {i} missing required field '{field}': {module.get('module_path', '<unknown>')}")
|
||
return None
|
||
|
||
# Show statistics
|
||
unique_repos = len(set(m['vcs_url'] for m in modules))
|
||
with_subdirs = sum(1 for m in modules if m.get('subdir'))
|
||
|
||
print(f"\nDiscovery metadata summary:")
|
||
print(f" Modules: {len(modules)}")
|
||
print(f" Unique repositories: {unique_repos}")
|
||
print(f" Multi-module repos: {with_subdirs} modules have subdirs")
|
||
|
||
# Expand 12-char short hashes to full 40-char hashes.
|
||
# Pseudo-versions like v0.0.0-20161002113705-648efa622239 only contain
|
||
# 12 chars of the commit hash. BitBake's git fetcher needs full 40-char.
|
||
short_hash_modules = [m for m in modules if len(m.get('vcs_hash', '')) == 12]
|
||
if short_hash_modules:
|
||
print(f"\n⚙️ Expanding {len(short_hash_modules)} short hashes to full 40-char...")
|
||
expanded = 0
|
||
failed = 0
|
||
for i, module in enumerate(short_hash_modules):
|
||
if (i + 1) % 20 == 0 or i == 0:
|
||
print(f" Progress: {i + 1}/{len(short_hash_modules)}...", end='\r', flush=True)
|
||
|
||
version = module.get('version', '')
|
||
vcs_url = module['vcs_url']
|
||
short_hash = module['vcs_hash']
|
||
|
||
# Parse pseudo-version to get timestamp
|
||
pseudo_info = parse_pseudo_version_tag(version.split('+')[0])
|
||
if pseudo_info:
|
||
timestamp_str, _ = pseudo_info
|
||
full_hash = resolve_pseudo_version_commit(
|
||
vcs_url, timestamp_str, short_hash,
|
||
clone_cache_dir=CLONE_CACHE_DIR
|
||
)
|
||
if full_hash and len(full_hash) == 40:
|
||
module['vcs_hash'] = full_hash
|
||
expanded += 1
|
||
else:
|
||
failed += 1
|
||
if VERBOSE_MODE:
|
||
print(f"\n ⚠️ Could not expand: {module['module_path']}@{version}")
|
||
else:
|
||
failed += 1
|
||
|
||
print(f" Expanded {expanded} short hashes, {failed} failed ")
|
||
|
||
# Filter out modules with empty vcs_hash - these are typically pre-Go 1.18
|
||
# modules lacking Origin metadata (e.g. pre-release pseudo-versions) that
|
||
# cannot be fetched from git. They are usually transitive dependencies that
|
||
# aren't actually needed by the build.
|
||
empty_hash_modules = [m for m in modules if not m.get('vcs_hash')]
|
||
if empty_hash_modules:
|
||
print(f"\n⚠️ Filtering out {len(empty_hash_modules)} modules with empty vcs_hash:")
|
||
for m in empty_hash_modules:
|
||
print(f" - {m['module_path']}@{m['version']}")
|
||
modules = [m for m in modules if m.get('vcs_hash')]
|
||
print(f" Remaining modules: {len(modules)}")
|
||
|
||
return modules
|
||
|
||
except json.JSONDecodeError as e:
|
||
print(f"❌ Failed to parse discovered modules JSON: {e}")
|
||
return None
|
||
except Exception as e:
|
||
print(f"❌ Error loading discovered modules: {e}")
|
||
return None
|
||
|
||
# =============================================================================
|
||
# Main Entry Point
|
||
# =============================================================================
|
||
|
||
def main():
|
||
global LOG_PATH, CURRENT_GOMODCACHE
|
||
parser = argparse.ArgumentParser(
|
||
description=f"Generate BitBake recipes for Go modules using hybrid approach (v{VERSION})",
|
||
epilog="""
|
||
This tool uses a 3-phase hybrid approach:
|
||
1. Discovery: Run 'go mod download' to get correct module paths
|
||
2. Recipe Generation: Create git:// SRC_URI entries for BitBake
|
||
3. Cache Building: Build module cache from git during do_create_module_cache
|
||
|
||
Persistent Caches:
|
||
The generator maintains caches in the data/ subdirectory:
|
||
- data/module-cache.json: Module metadata (VCS URL, timestamp, subdir, etc.)
|
||
- data/ls-remote-cache.json: Git ls-remote results
|
||
- data/vanity-url-cache.json: Vanity import path resolution
|
||
- data/verify-cache.json: Commit verification status
|
||
|
||
These caches speed up regeneration but may need cleaning when:
|
||
- Derivation logic changes (e.g., subdir calculation fixes)
|
||
- Cached data becomes stale or incorrect
|
||
|
||
Use --clean-cache to remove metadata cache before regeneration.
|
||
Use --clean-ls-remote-cache to remove both caches (slower, but fully fresh).
|
||
|
||
Examples:
|
||
# Normal regeneration (fast, uses caches)
|
||
%(prog)s --recipedir /path/to/recipe/output
|
||
|
||
# Clean metadata cache (e.g., after fixing subdir derivation)
|
||
%(prog)s --recipedir /path/to/recipe/output --clean-cache
|
||
|
||
# Fully clean regeneration (slow, calls git ls-remote for everything)
|
||
%(prog)s --recipedir /path/to/recipe/output --clean-ls-remote-cache
|
||
""",
|
||
formatter_class=argparse.RawDescriptionHelpFormatter
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--recipedir",
|
||
help="Output directory for generated .inc files (required unless running with --validate/--dry-run/--clean-only)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--gomodcache",
|
||
help="Directory to use for Go module cache (for discovery phase)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--cache-dir",
|
||
help="Directory to store JSON metadata caches (default: scripts/data)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--clone-cache-dir",
|
||
help="Directory to cache cloned git repositories (default: scripts/.cache/repos)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--source-dir",
|
||
help="Source directory containing go.mod (default: current directory)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--git-repo",
|
||
help="Git repository URL (for documentation purposes)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--git-ref",
|
||
help="Git reference (for documentation purposes)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"-v", "--verbose",
|
||
action="store_true",
|
||
help="Verbose output"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--clean-cache",
|
||
action="store_true",
|
||
help="Clear metadata cache before regeneration (useful when derivation logic changes)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--clean-ls-remote-cache",
|
||
action="store_true",
|
||
help="Clear git ls-remote cache in addition to metadata cache (implies --clean-cache)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--skip-legacy-module-cache",
|
||
action="store_true",
|
||
help="Skip importing legacy module metadata from module_cache_task.inc"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--dry-run",
|
||
action="store_true",
|
||
help="Execute cache mutations without discovery/generation"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--clean-gomodcache",
|
||
action="store_true",
|
||
help="Clean stale .info files in GOMODCACHE that lack VCS metadata (fixes 'module lookup disabled' errors)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--validate",
|
||
action="store_true",
|
||
help="Validate module commits without emitting recipe files"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--validate-only",
|
||
action="store_true",
|
||
help=argparse.SUPPRESS
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--skip-verify",
|
||
action="store_true",
|
||
help="Skip commit verification (trust cached verify results, much faster)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--verify-jobs",
|
||
type=int,
|
||
default=10,
|
||
metavar="N",
|
||
help="Number of parallel verification jobs (default: 10, 0=sequential)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--verify-cached",
|
||
action="store_true",
|
||
help="Verify commits in GOMODCACHE .info files still exist in repositories (detects force-pushed tags)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--verify-cache-max-age",
|
||
type=int,
|
||
default=30,
|
||
metavar="DAYS",
|
||
help="Re-verify cached commits older than this many days (default: 30, 0=always verify)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--debug-limit",
|
||
type=int,
|
||
help="Process at most N modules during validation/generation (debug only)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--inject-commit",
|
||
metavar=("REPO", "COMMIT"),
|
||
nargs=2,
|
||
action="append",
|
||
help="Mark a repo+commit pair as already verified (skips network check)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--clear-commit",
|
||
metavar=("REPO", "COMMIT"),
|
||
nargs=2,
|
||
action="append",
|
||
help="Remove a repo+commit pair from the verified cache"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--set-repo",
|
||
metavar=("MODULE", "REPO"),
|
||
nargs=2,
|
||
action="append",
|
||
help="Pin a module (or module@version) to the specified repository URL"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--clear-repo",
|
||
metavar="MODULE",
|
||
nargs=1,
|
||
action="append",
|
||
help="Remove a previously pinned repository override (module or module@version)"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--version",
|
||
action="version",
|
||
version=f"%(prog)s {VERSION}"
|
||
)
|
||
|
||
parser.add_argument(
|
||
"--discovered-modules",
|
||
dest="discovered_modules",
|
||
help="JSON file with pre-discovered module metadata (skips discovery phase)"
|
||
)
|
||
# Backward compatibility alias for --discovered-modules
|
||
parser.add_argument("--native-modules", dest="discovered_modules", help=argparse.SUPPRESS)
|
||
|
||
# Add compatibility args that we ignore (for backward compatibility)
|
||
parser.add_argument("--use-hybrid", action="store_true", help=argparse.SUPPRESS)
|
||
parser.add_argument("go_mod_file", nargs='?', help=argparse.SUPPRESS)
|
||
|
||
args = parser.parse_args()
|
||
if args.validate_only:
|
||
args.validate = True
|
||
|
||
# Set global verbose mode
|
||
global VERBOSE_MODE
|
||
VERBOSE_MODE = args.verbose
|
||
|
||
original_stdout = sys.stdout
|
||
original_stderr = sys.stderr
|
||
log_handle = None
|
||
log_path = None
|
||
try:
|
||
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
||
log_path = Path(tempfile.gettempdir()) / f"oe-go-mod-fetcher-{timestamp}.log"
|
||
LOG_PATH = log_path
|
||
log_handle = log_path.open("w", encoding="utf-8", buffering=1)
|
||
sys.stdout = Tee(original_stdout, log_handle)
|
||
sys.stderr = Tee(original_stderr, log_handle)
|
||
|
||
print(f"Go Module Git Fetcher v{VERSION}")
|
||
print("Hybrid Architecture: Discovery from Go + Build from Git")
|
||
print("=" * 70)
|
||
print(f"Logs: {log_path} (pass --dry-run to load caches only)")
|
||
|
||
exit_code = _execute(args)
|
||
except KeyboardInterrupt:
|
||
print("\n\nOperation cancelled by user")
|
||
exit_code = 1
|
||
except Exception as e:
|
||
print(f"\n❌ Unexpected error: {e}")
|
||
if args.verbose:
|
||
import traceback
|
||
traceback.print_exc()
|
||
exit_code = 1
|
||
finally:
|
||
save_ls_remote_cache()
|
||
save_metadata_cache()
|
||
save_vanity_url_cache()
|
||
save_verify_commit_cache()
|
||
save_repo_overrides()
|
||
for temp_cache in TEMP_GOMODCACHES:
|
||
try:
|
||
if temp_cache.exists():
|
||
shutil.rmtree(temp_cache)
|
||
except Exception:
|
||
pass
|
||
TEMP_GOMODCACHES.clear()
|
||
if CURRENT_GOMODCACHE and not Path(CURRENT_GOMODCACHE).exists():
|
||
CURRENT_GOMODCACHE = None
|
||
if log_handle:
|
||
log_handle.flush()
|
||
log_handle.close()
|
||
sys.stdout = original_stdout
|
||
sys.stderr = original_stderr
|
||
if LOG_PATH:
|
||
print(f"Logs: {LOG_PATH}")
|
||
|
||
sys.exit(exit_code)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|