mirror of
git://git.yoctoproject.org/meta-virtualization.git
synced 2026-01-27 10:41:26 +01:00
Add the oe-go-mod-fetcher.py tool and supporting files for resolving Go module dependencies via git repositories instead of module proxies. oe-go-mod-fetcher.py: - Parses go.mod and go.sum to identify required modules - Resolves module paths to git repositories (handles vanity URLs) - Maps module versions to git commits - Generates SRC_URI entries for bitbake fetcher - Creates go-mod-git.inc and go-mod-cache.inc files - Supports monorepo detection and nested module handling - Caches resolution results for performance extract-discovered-modules.py: - Helper script to extract module information from discovery cache - Used by go-mod-discovery.bbclass during build Also adds .gitignore to exclude runtime caches from version control. Signed-off-by: Bruce Ashfield <bruce.ashfield@gmail.com>
492 lines
19 KiB
Python
Executable File
492 lines
19 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# go-dep processor
|
|
#
|
|
# Copyright (C) 2025 Bruce Ashfield
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
# published by the Free Software Foundation.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
"""
|
|
Extract complete module metadata from BitBake Go discovery build cache.
|
|
|
|
This script walks a GOMODCACHE directory (from BitBake discovery build) and
|
|
extracts all module metadata from .info files, including VCS information.
|
|
|
|
Usage:
|
|
extract-discovered-modules.py --gomodcache /path/to/cache --output modules.json
|
|
|
|
The script creates:
|
|
- modules.json: Complete metadata with VCS URLs, commits, subdirs, timestamps
|
|
- modules.txt: Simple module@version list
|
|
|
|
This provides 100% accurate module discovery for BitBake recipe generation.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import urllib.parse
|
|
from pathlib import Path
|
|
|
|
|
|
def git_ls_remote(url: str, ref: str) -> str:
|
|
"""
|
|
Query a git repository for a ref and return the commit hash.
|
|
|
|
For tags, also tries dereferenced form (^{}) to handle annotated tags.
|
|
"""
|
|
try:
|
|
# Try dereferenced form first (handles annotated tags)
|
|
refs_to_try = [f"{ref}^{{}}", ref] if ref.startswith("refs/tags/") else [ref]
|
|
|
|
for query_ref in refs_to_try:
|
|
result = subprocess.run(
|
|
['git', 'ls-remote', url, query_ref],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30
|
|
)
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
# Parse: "hash<tab>ref"
|
|
line = result.stdout.strip().split('\n')[0]
|
|
parts = line.split('\t')
|
|
if len(parts) >= 1 and len(parts[0]) == 40:
|
|
return parts[0]
|
|
except Exception:
|
|
pass
|
|
return ''
|
|
|
|
|
|
def resolve_short_hash(url: str, short_hash: str) -> str:
|
|
"""
|
|
Resolve a 12-char short hash to full 40-char hash.
|
|
|
|
Go pseudo-versions only contain 12 characters of the commit hash.
|
|
BitBake's git fetcher needs the full 40-char hash.
|
|
|
|
Strategy: Try GitHub API first (fast), then git ls-remote, then shallow clone.
|
|
"""
|
|
if len(short_hash) != 12:
|
|
return short_hash # Already full or invalid
|
|
|
|
# First try: GitHub API (fast - single HTTP request)
|
|
# Note: Rate limited to 60/hour without auth token
|
|
if 'github.com' in url:
|
|
try:
|
|
import urllib.request
|
|
repo_path = url.replace('https://github.com/', '').replace('.git', '')
|
|
api_url = f"https://api.github.com/repos/{repo_path}/commits/{short_hash}"
|
|
req = urllib.request.Request(api_url, headers={'User-Agent': 'oe-go-mod-fetcher'})
|
|
with urllib.request.urlopen(req, timeout=10) as response:
|
|
data = json.loads(response.read().decode())
|
|
if 'sha' in data and len(data['sha']) == 40:
|
|
return data['sha']
|
|
except Exception:
|
|
pass # Rate limited or other error - try next method
|
|
|
|
# Second try: git ls-remote (downloads all refs, checks if any match)
|
|
# This works if the commit is a branch head or tag
|
|
try:
|
|
result = subprocess.run(
|
|
['git', 'ls-remote', url],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30
|
|
)
|
|
if result.returncode == 0:
|
|
for line in result.stdout.strip().split('\n'):
|
|
if line:
|
|
full_hash = line.split('\t')[0]
|
|
if full_hash.startswith(short_hash):
|
|
return full_hash
|
|
except Exception:
|
|
pass
|
|
|
|
# Third try: Shallow clone and rev-parse (slower but works for any commit)
|
|
try:
|
|
with tempfile.TemporaryDirectory(prefix='hash-resolve-') as tmpdir:
|
|
# Clone with minimal depth
|
|
clone_result = subprocess.run(
|
|
['git', 'clone', '--bare', '--filter=blob:none', url, tmpdir + '/repo'],
|
|
capture_output=True,
|
|
timeout=120,
|
|
env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'}
|
|
)
|
|
if clone_result.returncode == 0:
|
|
# Use rev-parse to expand short hash
|
|
parse_result = subprocess.run(
|
|
['git', 'rev-parse', short_hash],
|
|
cwd=tmpdir + '/repo',
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10
|
|
)
|
|
if parse_result.returncode == 0:
|
|
full_hash = parse_result.stdout.strip()
|
|
if len(full_hash) == 40:
|
|
return full_hash
|
|
except Exception:
|
|
pass
|
|
|
|
# Could not resolve - return original short hash
|
|
return short_hash
|
|
|
|
|
|
def derive_vcs_info(module_path, version):
|
|
"""
|
|
Derive VCS URL and commit info from module path and version.
|
|
|
|
This is used for modules where the Go proxy doesn't provide Origin metadata
|
|
(older modules cached before Go 1.18).
|
|
|
|
Returns:
|
|
dict with vcs_url, vcs_hash (if pseudo-version), vcs_ref, subdir
|
|
or None if cannot derive
|
|
"""
|
|
vcs_url = None
|
|
vcs_hash = ''
|
|
vcs_ref = ''
|
|
subpath = '' # FIX #32: Track subpath for multi-module repos (tag prefix)
|
|
|
|
# Derive URL from module path
|
|
if module_path.startswith('github.com/'):
|
|
# github.com/owner/repo or github.com/owner/repo/subpkg
|
|
parts = module_path.split('/')
|
|
if len(parts) >= 3:
|
|
vcs_url = f"https://github.com/{parts[1]}/{parts[2]}"
|
|
# FIX #32: Track subpath for multi-module repos (e.g., github.com/owner/repo/cmd/tool)
|
|
if len(parts) > 3:
|
|
subpath = '/'.join(parts[3:])
|
|
|
|
elif module_path.startswith('gitlab.com/'):
|
|
parts = module_path.split('/')
|
|
if len(parts) >= 3:
|
|
vcs_url = f"https://gitlab.com/{parts[1]}/{parts[2]}"
|
|
|
|
elif module_path.startswith('bitbucket.org/'):
|
|
parts = module_path.split('/')
|
|
if len(parts) >= 3:
|
|
vcs_url = f"https://bitbucket.org/{parts[1]}/{parts[2]}"
|
|
|
|
elif module_path.startswith('gopkg.in/'):
|
|
# gopkg.in/yaml.v2 -> github.com/go-yaml/yaml
|
|
# gopkg.in/check.v1 -> github.com/go-check/check
|
|
# gopkg.in/pkg.v3 -> github.com/go-pkg/pkg (convention)
|
|
# gopkg.in/fsnotify.v1 -> github.com/fsnotify/fsnotify (no go- prefix)
|
|
match = re.match(r'gopkg\.in/([^/]+)\.v\d+', module_path)
|
|
if match:
|
|
pkg_name = match.group(1)
|
|
# Common mappings - some use go-* prefix, others don't
|
|
mappings = {
|
|
'yaml': 'https://github.com/go-yaml/yaml',
|
|
'check': 'https://github.com/go-check/check',
|
|
'inf': 'https://github.com/go-inf/inf',
|
|
'tomb': 'https://github.com/go-tomb/tomb',
|
|
'fsnotify': 'https://github.com/fsnotify/fsnotify', # No go- prefix
|
|
}
|
|
vcs_url = mappings.get(pkg_name, f"https://github.com/go-{pkg_name}/{pkg_name}")
|
|
|
|
elif module_path.startswith('google.golang.org/'):
|
|
# google.golang.org vanity imports -> github.com/golang/*
|
|
# google.golang.org/appengine -> github.com/golang/appengine
|
|
# google.golang.org/protobuf -> github.com/protocolbuffers/protobuf-go (special case)
|
|
# google.golang.org/grpc -> github.com/grpc/grpc-go (special case)
|
|
# google.golang.org/genproto -> github.com/googleapis/go-genproto (special case)
|
|
#
|
|
# FIX #32: Handle submodules in multi-module repos
|
|
# google.golang.org/grpc/cmd/protoc-gen-go-grpc has tags like:
|
|
# cmd/protoc-gen-go-grpc/v1.1.0 (NOT v1.1.0)
|
|
# We need to track the subpath for tag prefix construction
|
|
parts = module_path.split('/')
|
|
if len(parts) >= 2:
|
|
pkg_name = parts[1] # First component after google.golang.org/
|
|
mappings = {
|
|
'protobuf': 'https://github.com/protocolbuffers/protobuf-go',
|
|
'grpc': 'https://github.com/grpc/grpc-go',
|
|
'genproto': 'https://github.com/googleapis/go-genproto',
|
|
'api': 'https://github.com/googleapis/google-api-go-client',
|
|
}
|
|
vcs_url = mappings.get(pkg_name, f"https://github.com/golang/{pkg_name}")
|
|
# Track subpath for submodule tag construction (e.g., cmd/protoc-gen-go-grpc)
|
|
if len(parts) > 2:
|
|
subpath = '/'.join(parts[2:]) # Everything after google.golang.org/grpc/
|
|
|
|
if not vcs_url:
|
|
return None
|
|
|
|
# Parse version for commit hash (pseudo-versions)
|
|
# Go pseudo-version formats:
|
|
# v0.0.0-20200815063812-42c35b437635 (no base version)
|
|
# v1.2.3-0.20200815063812-42c35b437635 (pre-release with "0." prefix)
|
|
# v1.2.4-0.20200815063812-42c35b437635 (post v1.2.3, pre v1.2.4)
|
|
# The key pattern: optional "0." then YYYYMMDDHHMMSS (14 digits) then 12-char commit hash
|
|
# Also handle +incompatible suffix
|
|
clean_version = version.replace('+incompatible', '')
|
|
|
|
# Try both pseudo-version formats:
|
|
# Format 1: -0.YYYYMMDDHHMMSS-HASH (with "0." prefix)
|
|
# Format 2: -YYYYMMDDHHMMSS-HASH (without prefix, typically v0.0.0-...)
|
|
pseudo_match = re.search(r'-(?:0\.)?(\d{14})-([0-9a-f]{12})$', clean_version)
|
|
if pseudo_match:
|
|
vcs_hash = pseudo_match.group(2) # 12-char short hash
|
|
# Note: Short hashes are expanded to full 40-char by oe-go-mod-fetcher.py
|
|
# in load_native_modules() using resolve_pseudo_version_commit()
|
|
else:
|
|
# Tagged version - resolve tag to commit hash
|
|
# FIX #32: For multi-module repos, the tag includes the subpath prefix
|
|
# e.g., google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1.0
|
|
# has tag: cmd/protoc-gen-go-grpc/v1.1.0 (not v1.1.0)
|
|
if subpath:
|
|
tag_name = f"{subpath}/{clean_version}"
|
|
else:
|
|
tag_name = clean_version
|
|
vcs_ref = f"refs/tags/{tag_name}"
|
|
# Query the repository to get the actual commit hash for this tag
|
|
vcs_hash = git_ls_remote(vcs_url, vcs_ref)
|
|
if not vcs_hash and subpath:
|
|
# FIX #32: Fallback - try without subpath prefix
|
|
# Some repos don't use prefixed tags for submodules
|
|
fallback_ref = f"refs/tags/{clean_version}"
|
|
vcs_hash = git_ls_remote(vcs_url, fallback_ref)
|
|
if vcs_hash:
|
|
vcs_ref = fallback_ref # Use the working ref
|
|
|
|
return {
|
|
'vcs_url': vcs_url,
|
|
'vcs_hash': vcs_hash,
|
|
'vcs_ref': vcs_ref,
|
|
'subdir': subpath, # FIX #32: Return subdir for submodules
|
|
}
|
|
|
|
|
|
def extract_modules(gomodcache_path):
|
|
"""
|
|
Walk GOMODCACHE and extract all module metadata from .info files.
|
|
|
|
Returns list of dicts with complete metadata:
|
|
- module_path: Unescaped module path
|
|
- version: Module version
|
|
- vcs_url: Git repository URL
|
|
- vcs_hash: Full commit hash (40 chars)
|
|
- vcs_ref: Tag/branch reference
|
|
- subdir: Subdirectory in mono-repos
|
|
- timestamp: Commit timestamp
|
|
"""
|
|
cache_dir = Path(gomodcache_path) / "cache" / "download"
|
|
|
|
if not cache_dir.exists():
|
|
raise FileNotFoundError(f"Cache directory not found: {cache_dir}")
|
|
|
|
modules = []
|
|
skipped = 0
|
|
derived = 0
|
|
total_info_files = 0
|
|
|
|
print(f"Scanning GOMODCACHE: {cache_dir}")
|
|
|
|
for info_file in cache_dir.rglob("*.info"):
|
|
total_info_files += 1
|
|
|
|
# Extract module path from directory structure
|
|
rel_path = info_file.parent.relative_to(cache_dir)
|
|
parts = list(rel_path.parts)
|
|
|
|
if parts[-1] != '@v':
|
|
continue
|
|
|
|
# Module path (unescape Go's !-encoding)
|
|
# Example: github.com/!microsoft/go-winio -> github.com/Microsoft/go-winio
|
|
module_path = '/'.join(parts[:-1])
|
|
# Unescape !x -> X (Go's case-insensitive encoding)
|
|
module_path = re.sub(r'!([a-z])', lambda m: m.group(1).upper(), module_path)
|
|
|
|
# Version
|
|
version = info_file.stem
|
|
|
|
# Read .info file for VCS metadata
|
|
try:
|
|
with open(info_file) as f:
|
|
info = json.load(f)
|
|
|
|
origin = info.get('Origin', {})
|
|
|
|
# Check if we have complete VCS info from Origin
|
|
if origin.get('URL') and origin.get('Hash'):
|
|
module = {
|
|
'module_path': module_path,
|
|
'version': version,
|
|
'vcs_url': origin.get('URL', ''),
|
|
'vcs_hash': origin.get('Hash', ''),
|
|
'vcs_ref': origin.get('Ref', ''),
|
|
'subdir': origin.get('Subdir', ''),
|
|
'timestamp': info.get('Time', ''),
|
|
}
|
|
modules.append(module)
|
|
else:
|
|
# FIX #29: Module lacks Origin metadata (common for +incompatible modules)
|
|
# Use derive_vcs_info() to infer VCS URL and ref from module path/version
|
|
derived += 1
|
|
# Progress output for derived modules (these require network calls)
|
|
if derived % 10 == 1:
|
|
print(f" Deriving VCS info... ({derived} modules)", end='\r', flush=True)
|
|
derived_info = derive_vcs_info(module_path, version)
|
|
if derived_info:
|
|
module = {
|
|
'module_path': module_path,
|
|
'version': version,
|
|
'vcs_url': derived_info.get('vcs_url', ''),
|
|
'vcs_hash': derived_info.get('vcs_hash', ''),
|
|
'vcs_ref': derived_info.get('vcs_ref', ''),
|
|
'subdir': derived_info.get('subdir', ''), # FIX #32: Use derived subdir
|
|
'timestamp': info.get('Time', ''),
|
|
}
|
|
modules.append(module)
|
|
else:
|
|
# Cannot derive VCS info - skip this module
|
|
skipped += 1
|
|
derived -= 1 # Don't count as derived if we couldn't derive
|
|
# Only log for debugging
|
|
# print(f" ⚠️ Cannot derive VCS info for {module_path}@{version}")
|
|
|
|
except json.JSONDecodeError as e:
|
|
print(f" ⚠️ Failed to parse {info_file}: {e}")
|
|
skipped += 1
|
|
continue
|
|
except Exception as e:
|
|
print(f" ⚠️ Error processing {info_file}: {e}")
|
|
skipped += 1
|
|
continue
|
|
|
|
print(f"\nProcessed {total_info_files} .info files")
|
|
print(f"Extracted {len(modules)} modules total:")
|
|
print(f" - {len(modules) - derived} with Origin metadata from proxy")
|
|
print(f" - {derived} with derived VCS info (Fix #29)")
|
|
print(f"Skipped {skipped} modules (cannot derive VCS info)")
|
|
|
|
return modules
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Extract module metadata from Go module cache',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Extract from native Go build cache
|
|
%(prog)s --gomodcache /tmp/k3s-discovery-cache --output /tmp/k3s-modules.json
|
|
|
|
# Extract from BitBake discovery build
|
|
%(prog)s --gomodcache /path/to/build/tmp/work/.../discovery-cache --output /tmp/k3s-modules.json
|
|
|
|
# Extract from system GOMODCACHE
|
|
%(prog)s --gomodcache ~/go/pkg/mod --output /tmp/modules.json
|
|
|
|
Output:
|
|
- <output>.json: Complete module metadata (VCS URLs, commits, subdirs)
|
|
- <output>.txt: Simple module@version list (sorted)
|
|
"""
|
|
)
|
|
parser.add_argument(
|
|
'--gomodcache',
|
|
required=True,
|
|
help='Path to GOMODCACHE directory'
|
|
)
|
|
parser.add_argument(
|
|
'--output',
|
|
required=True,
|
|
help='Output JSON file path (e.g., /tmp/k3s-modules.json)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate GOMODCACHE path
|
|
gomodcache = Path(args.gomodcache)
|
|
if not gomodcache.exists():
|
|
print(f"Error: GOMODCACHE directory does not exist: {gomodcache}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Extract modules
|
|
try:
|
|
modules = extract_modules(gomodcache)
|
|
except Exception as e:
|
|
print(f"Error during extraction: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if not modules:
|
|
print("Warning: No modules with VCS metadata found!", file=sys.stderr)
|
|
print("This may indicate:", file=sys.stderr)
|
|
print(" - GOMODCACHE is from BitBake (synthetic .info files)", file=sys.stderr)
|
|
print(" - GOMODCACHE is empty or incomplete", file=sys.stderr)
|
|
print(" - Need to run 'go mod download' first", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Save as JSON
|
|
output_path = Path(args.output)
|
|
try:
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.write_text(json.dumps(modules, indent=2, sort_keys=True))
|
|
print(f"\n✓ Saved {len(modules)} modules to {output_path}")
|
|
except Exception as e:
|
|
print(f"Error writing JSON output: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Also save simple list
|
|
list_path = output_path.with_suffix('.txt')
|
|
try:
|
|
simple_list = [f"{m['module_path']}@{m['version']}" for m in modules]
|
|
list_path.write_text('\n'.join(sorted(simple_list)) + '\n')
|
|
print(f"✓ Saved module list to {list_path}")
|
|
except Exception as e:
|
|
print(f"Error writing module list: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Print summary statistics
|
|
print("\n" + "="*60)
|
|
print("EXTRACTION SUMMARY")
|
|
print("="*60)
|
|
|
|
# Count unique repositories
|
|
unique_repos = len(set(m['vcs_url'] for m in modules))
|
|
print(f"Total modules: {len(modules)}")
|
|
print(f"Unique repositories: {unique_repos}")
|
|
|
|
# Count modules with subdirs (multi-module repos)
|
|
with_subdirs = sum(1 for m in modules if m['subdir'])
|
|
print(f"Multi-module repos: {with_subdirs} modules have subdirs")
|
|
|
|
# Show top repositories by module count
|
|
repo_counts = {}
|
|
for m in modules:
|
|
repo_counts[m['vcs_url']] = repo_counts.get(m['vcs_url'], 0) + 1
|
|
|
|
top_repos = sorted(repo_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
|
print("\nTop 5 repositories by module count:")
|
|
for repo_url, count in top_repos:
|
|
print(f" {count:3d} modules: {repo_url}")
|
|
|
|
print("\n" + "="*60)
|
|
print("Use this JSON file with:")
|
|
print(f" oe-go-mod-fetcher.py --native-modules {output_path}")
|
|
print("="*60)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|