bitbake: fetch2/npm: refactor the npm fetcher

This commit refactors the npm fetcher to improve some points and fix
others:

 - The big change is that the fetcher is only fetching the package
   source and no more the dependencies. Thus the npm fetcher act as the
   other fetchers e.g git, wget. The dependencies will be handled later.

 - The fetcher only resolves the url of the package using 'npm view' and
   then forwards it to a proxy fetcher.

 - This commit also fixes a lot of issues with the package names (exotic
   characters, scoped packages) which were badly handled.

 - The validation files - lockdown.json and npm-shrinkwrap.json - are no
   longer used by the fetcher. Instead, the downloaded tarball is
   verified with the 'integrity' and 'shasum' provided in the 'npm view'
   of the package [1][2].

1: https://docs.npmjs.com/files/package-lock.json#integrity
2: https://www.w3.org/TR/SRI

(Bitbake rev: 0f451cdc43130d503ada53ed1b4fc5a24943f6ef)

Signed-off-by: Jean-Marie LEMETAYER <jean-marie.lemetayer@savoirfairelinux.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Jean-Marie LEMETAYER 2020-01-24 18:08:11 +01:00 committed by Richard Purdie
parent b9ce68a5d7
commit d842e9e738

View File

@ -1,300 +1,296 @@
# Copyright (C) 2020 Savoir-Faire Linux
#
# SPDX-License-Identifier: GPL-2.0-only
#
"""
BitBake 'Fetch' NPM implementation
BitBake 'Fetch' npm implementation
The NPM fetcher is used to retrieve files from the npmjs repository
npm fetcher support the SRC_URI with format of:
SRC_URI = "npm://some.registry.url;OptionA=xxx;OptionB=xxx;..."
Usage in the recipe:
Supported SRC_URI options are:
SRC_URI = "npm://registry.npmjs.org/;name=${PN};version=${PV}"
Suported SRC_URI options are:
- package
The npm package name. This is a mandatory parameter.
- name
- version
- version
The npm package version. This is a mandatory parameter.
npm://registry.npmjs.org/${PN}/-/${PN}-${PV}.tgz would become npm://registry.npmjs.org;name=${PN};version=${PV}
The fetcher all triggers off the existence of ud.localpath. If that exists and has the ".done" stamp, its assumed the fetch is good/done
- downloadfilename
Specifies the filename used when storing the downloaded file.
- destsuffix
Specifies the directory to use to unpack the package (default: npm).
"""
import os
import urllib.request, urllib.parse, urllib.error
import base64
import json
import subprocess
import signal
import os
import re
import shlex
import tempfile
import bb
from bb.fetch2 import FetchMethod
from bb.fetch2 import FetchError
from bb.fetch2 import ChecksumError
from bb.fetch2 import runfetchcmd
from bb.fetch2 import logger
from bb.fetch2 import UnpackError
from bb.fetch2 import ParameterError
from bb.fetch2 import Fetch
from bb.fetch2 import FetchError
from bb.fetch2 import FetchMethod
from bb.fetch2 import MissingParameterError
from bb.fetch2 import ParameterError
from bb.fetch2 import URI
from bb.fetch2 import check_network_access
from bb.fetch2 import runfetchcmd
from bb.utils import is_semver
def subprocess_setup():
# Python installs a SIGPIPE handler by default. This is usually not what
# non-Python subprocesses expect.
# SIGPIPE errors are known issues with gzip/bash
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
def npm_package(package):
"""Convert the npm package name to remove unsupported character"""
# Scoped package names (with the @) use the same naming convention
# as the 'npm pack' command.
if package.startswith("@"):
return re.sub("/", "-", package[1:])
return package
def npm_filename(package, version):
"""Get the filename of a npm package"""
return npm_package(package) + "-" + version + ".tgz"
def npm_localfile(package, version):
"""Get the local filename of a npm package"""
return os.path.join("npm2", npm_filename(package, version))
def npm_integrity(integrity):
"""
Get the checksum name and expected value from the subresource integrity
https://www.w3.org/TR/SRI/
"""
algo, value = integrity.split("-", maxsplit=1)
return "%ssum" % algo, base64.b64decode(value).hex()
def npm_unpack(tarball, destdir, d):
"""Unpack a npm tarball"""
bb.utils.mkdirhier(destdir)
cmd = "tar --extract --gzip --file=%s" % shlex.quote(tarball)
cmd += " --no-same-owner"
cmd += " --strip-components=1"
runfetchcmd(cmd, d, workdir=destdir)
class NpmEnvironment(object):
"""
Using a npm config file seems more reliable than using cli arguments.
This class allows to create a controlled environment for npm commands.
"""
def __init__(self, d, configs=None):
self.d = d
self.configs = configs
def run(self, cmd, args=None, configs=None, workdir=None):
"""Run npm command in a controlled environment"""
with tempfile.TemporaryDirectory() as tmpdir:
d = bb.data.createCopy(self.d)
d.setVar("HOME", tmpdir)
cfgfile = os.path.join(tmpdir, "npmrc")
if not workdir:
workdir = tmpdir
def _run(cmd):
cmd = "NPM_CONFIG_USERCONFIG=%s " % cfgfile + cmd
cmd = "NPM_CONFIG_GLOBALCONFIG=%s " % cfgfile + cmd
return runfetchcmd(cmd, d, workdir=workdir)
if self.configs:
for key, value in self.configs:
_run("npm config set %s %s" % (key, shlex.quote(value)))
if configs:
for key, value in configs:
_run("npm config set %s %s" % (key, shlex.quote(value)))
if args:
for key, value in args:
cmd += " --%s=%s" % (key, shlex.quote(value))
return _run(cmd)
class Npm(FetchMethod):
"""Class to fetch urls via 'npm'"""
def init(self, d):
pass
"""Class to fetch a package from a npm registry"""
def supports(self, ud, d):
"""
Check to see if a given url can be fetched with npm
"""
return ud.type in ['npm']
def debug(self, msg):
logger.debug(1, "NpmFetch: %s", msg)
def clean(self, ud, d):
logger.debug(2, "Calling cleanup %s" % ud.pkgname)
bb.utils.remove(ud.localpath, False)
bb.utils.remove(ud.pkgdatadir, True)
bb.utils.remove(ud.fullmirror, False)
"""Check if a given url can be fetched with npm"""
return ud.type in ["npm"]
def urldata_init(self, ud, d):
"""
init NPM specific variable within url data
"""
if 'downloadfilename' in ud.parm:
ud.basename = ud.parm['downloadfilename']
else:
ud.basename = os.path.basename(ud.path)
"""Init npm specific variables within url data"""
ud.package = None
ud.version = None
ud.registry = None
# Get the 'package' parameter
if "package" in ud.parm:
ud.package = ud.parm.get("package")
if not ud.package:
raise MissingParameterError("Parameter 'package' required", ud.url)
# Get the 'version' parameter
if "version" in ud.parm:
ud.version = ud.parm.get("version")
# can't call it ud.name otherwise fetcher base class will start doing sha1stuff
# TODO: find a way to get an sha1/sha256 manifest of pkg & all deps
ud.pkgname = ud.parm.get("name", None)
if not ud.pkgname:
raise ParameterError("NPM fetcher requires a name parameter", ud.url)
ud.version = ud.parm.get("version", None)
if not ud.version:
raise ParameterError("NPM fetcher requires a version parameter", ud.url)
ud.bbnpmmanifest = "%s-%s.deps.json" % (ud.pkgname, ud.version)
ud.bbnpmmanifest = ud.bbnpmmanifest.replace('/', '-')
ud.registry = "http://%s" % (ud.url.replace('npm://', '', 1).split(';'))[0]
prefixdir = "npm/%s" % ud.pkgname
ud.pkgdatadir = d.expand("${DL_DIR}/%s" % prefixdir)
if not os.path.exists(ud.pkgdatadir):
bb.utils.mkdirhier(ud.pkgdatadir)
ud.localpath = d.expand("${DL_DIR}/npm/%s" % ud.bbnpmmanifest)
raise MissingParameterError("Parameter 'version' required", ud.url)
self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -O -t 2 -T 30 -nv --passive-ftp --no-check-certificate "
ud.prefixdir = prefixdir
if not is_semver(ud.version) and not ud.version == "latest":
raise ParameterError("Invalid 'version' parameter", ud.url)
ud.write_tarballs = ((d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0") != "0")
mirrortarball = 'npm_%s-%s.tar.xz' % (ud.pkgname, ud.version)
mirrortarball = mirrortarball.replace('/', '-')
ud.fullmirror = os.path.join(d.getVar("DL_DIR"), mirrortarball)
ud.mirrortarballs = [mirrortarball]
# Extract the 'registry' part of the url
ud.registry = re.sub(r"^npm://", "http://", ud.url.split(";")[0])
# Using the 'downloadfilename' parameter as local filename
# or the npm package name.
if "downloadfilename" in ud.parm:
ud.localfile = d.expand(ud.parm["downloadfilename"])
else:
ud.localfile = npm_localfile(ud.package, ud.version)
# Get the base 'npm' command
ud.basecmd = d.getVar("FETCHCMD_npm") or "npm"
# This fetcher resolves a URI from a npm package name and version and
# then forwards it to a proxy fetcher. A resolve file containing the
# resolved URI is created to avoid unwanted network access (if the file
# already exists). The management of the donestamp file, the lockfile
# and the checksums are forwarded to the proxy fetcher.
ud.proxy = None
ud.needdonestamp = False
ud.resolvefile = self.localpath(ud, d) + ".resolved"
def _resolve_proxy_url(self, ud, d):
def _npm_view():
configs = []
configs.append(("json", "true"))
configs.append(("registry", ud.registry))
pkgver = shlex.quote(ud.package + "@" + ud.version)
cmd = ud.basecmd + " view %s" % pkgver
env = NpmEnvironment(d)
check_network_access(d, cmd, ud.registry)
view_string = env.run(cmd, configs=configs)
if not view_string:
raise FetchError("Unavailable package %s" % pkgver, ud.url)
try:
view = json.loads(view_string)
error = view.get("error")
if error is not None:
raise FetchError(error.get("summary"), ud.url)
if ud.version == "latest":
bb.warn("The npm package %s is using the latest " \
"version available. This could lead to " \
"non-reproducible builds." % pkgver)
elif ud.version != view.get("version"):
raise ParameterError("Invalid 'version' parameter", ud.url)
return view
except Exception as e:
raise FetchError("Invalid view from npm: %s" % str(e), ud.url)
def _get_url(view):
tarball_url = view.get("dist", {}).get("tarball")
if tarball_url is None:
raise FetchError("Invalid 'dist.tarball' in view", ud.url)
uri = URI(tarball_url)
uri.params["downloadfilename"] = ud.localfile
integrity = view.get("dist", {}).get("integrity")
shasum = view.get("dist", {}).get("shasum")
if integrity is not None:
checksum_name, checksum_expected = npm_integrity(integrity)
uri.params[checksum_name] = checksum_expected
elif shasum is not None:
uri.params["sha1sum"] = shasum
else:
raise FetchError("Invalid 'dist.integrity' in view", ud.url)
return str(uri)
url = _get_url(_npm_view())
bb.utils.mkdirhier(os.path.dirname(ud.resolvefile))
with open(ud.resolvefile, "w") as f:
f.write(url)
def _setup_proxy(self, ud, d):
if ud.proxy is None:
if not os.path.exists(ud.resolvefile):
self._resolve_proxy_url(ud, d)
with open(ud.resolvefile, "r") as f:
url = f.read()
# Avoid conflicts between the environment data and:
# - the proxy url checksum
data = bb.data.createCopy(d)
data.delVarFlags("SRC_URI")
ud.proxy = Fetch([url], data)
def _get_proxy_method(self, ud, d):
self._setup_proxy(ud, d)
proxy_url = ud.proxy.urls[0]
proxy_ud = ud.proxy.ud[proxy_url]
proxy_d = ud.proxy.d
proxy_ud.setup_localpath(proxy_d)
return proxy_ud.method, proxy_ud, proxy_d
def verify_donestamp(self, ud, d):
"""Verify the donestamp file"""
proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d)
return proxy_m.verify_donestamp(proxy_ud, proxy_d)
def update_donestamp(self, ud, d):
"""Update the donestamp file"""
proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d)
proxy_m.update_donestamp(proxy_ud, proxy_d)
def need_update(self, ud, d):
if os.path.exists(ud.localpath):
return False
return True
"""Force a fetch, even if localpath exists ?"""
if not os.path.exists(ud.resolvefile):
return True
if ud.version == "latest":
return True
proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d)
return proxy_m.need_update(proxy_ud, proxy_d)
def _runpack(self, ud, d, pkgfullname: str, quiet=False) -> str:
"""
Runs npm pack on a full package name.
Returns the filename of the downloaded package
"""
bb.fetch2.check_network_access(d, pkgfullname, ud.registry)
dldir = d.getVar("DL_DIR")
dldir = os.path.join(dldir, ud.prefixdir)
command = "npm pack {} --registry {}".format(pkgfullname, ud.registry)
logger.debug(2, "Fetching {} using command '{}' in {}".format(pkgfullname, command, dldir))
filename = runfetchcmd(command, d, quiet, workdir=dldir)
return filename.rstrip()
def _unpackdep(self, ud, pkg, data, destdir, dldir, d):
file = data[pkg]['tgz']
logger.debug(2, "file to extract is %s" % file)
if file.endswith('.tgz') or file.endswith('.tar.gz') or file.endswith('.tar.Z'):
cmd = 'tar xz --strip 1 --no-same-owner --warning=no-unknown-keyword -f %s/%s' % (dldir, file)
else:
bb.fatal("NPM package %s downloaded not a tarball!" % file)
# Change to subdir before executing command
if not os.path.exists(destdir):
os.makedirs(destdir)
path = d.getVar('PATH')
if path:
cmd = "PATH=\"%s\" %s" % (path, cmd)
bb.note("Unpacking %s to %s/" % (file, destdir))
ret = subprocess.call(cmd, preexec_fn=subprocess_setup, shell=True, cwd=destdir)
if ret != 0:
raise UnpackError("Unpack command %s failed with return value %s" % (cmd, ret), ud.url)
if 'deps' not in data[pkg]:
return
for dep in data[pkg]['deps']:
self._unpackdep(ud, dep, data[pkg]['deps'], "%s/node_modules/%s" % (destdir, dep), dldir, d)
def unpack(self, ud, destdir, d):
dldir = d.getVar("DL_DIR")
with open("%s/npm/%s" % (dldir, ud.bbnpmmanifest)) as datafile:
workobj = json.load(datafile)
dldir = "%s/%s" % (os.path.dirname(ud.localpath), ud.pkgname)
if 'subdir' in ud.parm:
unpackdir = '%s/%s' % (destdir, ud.parm.get('subdir'))
else:
unpackdir = '%s/npmpkg' % destdir
self._unpackdep(ud, ud.pkgname, workobj, unpackdir, dldir, d)
def _parse_view(self, output):
'''
Parse the output of npm view --json; the last JSON result
is assumed to be the one that we're interested in.
'''
pdata = json.loads(output);
try:
return pdata[-1]
except:
return pdata
def _getdependencies(self, pkg, data, version, d, ud, optional=False, fetchedlist=None):
if fetchedlist is None:
fetchedlist = []
pkgfullname = pkg
if version != '*' and not '/' in version:
pkgfullname += "@'%s'" % version
if pkgfullname in fetchedlist:
return
logger.debug(2, "Calling getdeps on %s" % pkg)
fetchcmd = "npm view %s --json --registry %s" % (pkgfullname, ud.registry)
output = runfetchcmd(fetchcmd, d, True)
pdata = self._parse_view(output)
if not pdata:
raise FetchError("The command '%s' returned no output" % fetchcmd)
if optional:
pkg_os = pdata.get('os', None)
if pkg_os:
if not isinstance(pkg_os, list):
pkg_os = [pkg_os]
blacklist = False
for item in pkg_os:
if item.startswith('!'):
blacklist = True
break
if (not blacklist and 'linux' not in pkg_os) or '!linux' in pkg_os:
logger.debug(2, "Skipping %s since it's incompatible with Linux" % pkg)
return
filename = self._runpack(ud, d, pkgfullname)
data[pkg] = {}
data[pkg]['tgz'] = filename
fetchedlist.append(pkgfullname)
dependencies = pdata.get('dependencies', {})
optionalDependencies = pdata.get('optionalDependencies', {})
dependencies.update(optionalDependencies)
depsfound = {}
optdepsfound = {}
data[pkg]['deps'] = {}
for dep in dependencies:
if dep in optionalDependencies:
optdepsfound[dep] = dependencies[dep]
else:
depsfound[dep] = dependencies[dep]
for dep, version in optdepsfound.items():
self._getdependencies(dep, data[pkg]['deps'], version, d, ud, optional=True, fetchedlist=fetchedlist)
for dep, version in depsfound.items():
self._getdependencies(dep, data[pkg]['deps'], version, d, ud, fetchedlist=fetchedlist)
def _getshrinkeddependencies(self, pkg, data, version, d, ud, lockdown, manifest, toplevel=True):
logger.debug(2, "NPM shrinkwrap file is %s" % data)
if toplevel:
name = data.get('name', None)
if name and name != pkg:
for obj in data.get('dependencies', []):
if obj == pkg:
self._getshrinkeddependencies(obj, data['dependencies'][obj], data['dependencies'][obj]['version'], d, ud, lockdown, manifest, False)
return
pkgnameWithVersion = "{}@{}".format(pkg, version)
logger.debug(2, "Get dependencies for {}".format(pkgnameWithVersion))
filename = self._runpack(ud, d, pkgnameWithVersion)
manifest[pkg] = {}
manifest[pkg]['tgz'] = filename
manifest[pkg]['deps'] = {}
if pkg in lockdown:
sha1_expected = lockdown[pkg][version]
sha1_data = bb.utils.sha1_file("npm/%s/%s" % (ud.pkgname, manifest[pkg]['tgz']))
if sha1_expected != sha1_data:
msg = "\nFile: '%s' has %s checksum %s when %s was expected" % (manifest[pkg]['tgz'], 'sha1', sha1_data, sha1_expected)
raise ChecksumError('Checksum mismatch!%s' % msg)
else:
logger.debug(2, "No lockdown data for %s@%s" % (pkg, version))
if 'dependencies' in data:
for obj in data['dependencies']:
logger.debug(2, "Found dep is %s" % str(obj))
self._getshrinkeddependencies(obj, data['dependencies'][obj], data['dependencies'][obj]['version'], d, ud, lockdown, manifest[pkg]['deps'], False)
def try_mirrors(self, fetch, ud, d, mirrors):
"""Try to use a mirror"""
proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d)
return proxy_m.try_mirrors(fetch, proxy_ud, proxy_d, mirrors)
def download(self, ud, d):
"""Fetch url"""
jsondepobj = {}
shrinkobj = {}
lockdown = {}
self._setup_proxy(ud, d)
ud.proxy.download()
if not os.listdir(ud.pkgdatadir) and os.path.exists(ud.fullmirror):
dest = d.getVar("DL_DIR")
bb.utils.mkdirhier(dest)
runfetchcmd("tar -xJf %s" % (ud.fullmirror), d, workdir=dest)
return
def unpack(self, ud, rootdir, d):
"""Unpack the downloaded archive"""
destsuffix = ud.parm.get("destsuffix", "npm")
destdir = os.path.join(rootdir, destsuffix)
npm_unpack(ud.localpath, destdir, d)
if ud.parm.get("noverify", None) != '1':
shwrf = d.getVar('NPM_SHRINKWRAP')
logger.debug(2, "NPM shrinkwrap file is %s" % shwrf)
if shwrf:
try:
with open(shwrf) as datafile:
shrinkobj = json.load(datafile)
except Exception as e:
raise FetchError('Error loading NPM_SHRINKWRAP file "%s" for %s: %s' % (shwrf, ud.pkgname, str(e)))
elif not ud.ignore_checksums:
logger.warning('Missing shrinkwrap file in NPM_SHRINKWRAP for %s, this will lead to unreliable builds!' % ud.pkgname)
lckdf = d.getVar('NPM_LOCKDOWN')
logger.debug(2, "NPM lockdown file is %s" % lckdf)
if lckdf:
try:
with open(lckdf) as datafile:
lockdown = json.load(datafile)
except Exception as e:
raise FetchError('Error loading NPM_LOCKDOWN file "%s" for %s: %s' % (lckdf, ud.pkgname, str(e)))
elif not ud.ignore_checksums:
logger.warning('Missing lockdown file in NPM_LOCKDOWN for %s, this will lead to unreproducible builds!' % ud.pkgname)
def clean(self, ud, d):
"""Clean any existing full or partial download"""
if os.path.exists(ud.resolvefile):
self._setup_proxy(ud, d)
ud.proxy.clean()
bb.utils.remove(ud.resolvefile)
if ('name' not in shrinkobj):
self._getdependencies(ud.pkgname, jsondepobj, ud.version, d, ud)
else:
self._getshrinkeddependencies(ud.pkgname, shrinkobj, ud.version, d, ud, lockdown, jsondepobj)
with open(ud.localpath, 'w') as outfile:
json.dump(jsondepobj, outfile)
def build_mirror_data(self, ud, d):
# Generate a mirror tarball if needed
if ud.write_tarballs and not os.path.exists(ud.fullmirror):
# it's possible that this symlink points to read-only filesystem with PREMIRROR
if os.path.islink(ud.fullmirror):
os.unlink(ud.fullmirror)
dldir = d.getVar("DL_DIR")
logger.info("Creating tarball of npm data")
runfetchcmd("tar -cJf %s npm/%s npm/%s" % (ud.fullmirror, ud.bbnpmmanifest, ud.pkgname), d,
workdir=dldir)
runfetchcmd("touch %s.done" % (ud.fullmirror), d, workdir=dldir)
def done(self, ud, d):
"""Is the download done ?"""
if not os.path.exists(ud.resolvefile):
return False
proxy_m, proxy_ud, proxy_d = self._get_proxy_method(ud, d)
return proxy_m.done(proxy_ud, proxy_d)