recipetool: use oe.license_finder

Delete the now redundant code, and import oe.license_finder instead.

(From OE-Core rev: 8bba98be5c87dd6749e5cc95e9553dffc23ada73)

Signed-off-by: Ross Burton <ross.burton@arm.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Ross Burton 2025-06-13 14:16:12 +01:00 committed by Richard Purdie
parent 36adc8135d
commit e9932fca23
3 changed files with 3 additions and 262 deletions

View File

@ -18,6 +18,7 @@ from urllib.parse import urlparse, urldefrag, urlsplit
import hashlib
import bb.fetch2
logger = logging.getLogger('recipetool')
from oe.license_finder import find_licenses
tinfoil = None
plugins = None
@ -1040,230 +1041,6 @@ def handle_license_vars(srctree, lines_before, handled, extravalues, d):
handled.append(('license', licvalues))
return licvalues
def get_license_md5sums(d, static_only=False, linenumbers=False):
import bb.utils
import csv
md5sums = {}
if not static_only and not linenumbers:
# Gather md5sums of license files in common license dir
commonlicdir = d.getVar('COMMON_LICENSE_DIR')
for fn in os.listdir(commonlicdir):
md5value = bb.utils.md5_file(os.path.join(commonlicdir, fn))
md5sums[md5value] = fn
# The following were extracted from common values in various recipes
# (double checking the license against the license file itself, not just
# the LICENSE value in the recipe)
# Read license md5sums from csv file
scripts_path = os.path.dirname(os.path.realpath(__file__))
for path in (d.getVar('BBPATH').split(':')
+ [os.path.join(scripts_path, '..', '..')]):
csv_path = os.path.join(path, 'lib', 'recipetool', 'licenses.csv')
if os.path.isfile(csv_path):
with open(csv_path, newline='') as csv_file:
fieldnames = ['md5sum', 'license', 'beginline', 'endline', 'md5']
reader = csv.DictReader(csv_file, delimiter=',', fieldnames=fieldnames)
for row in reader:
if linenumbers:
md5sums[row['md5sum']] = (
row['license'], row['beginline'], row['endline'], row['md5'])
else:
md5sums[row['md5sum']] = row['license']
return md5sums
def crunch_known_licenses(d):
'''
Calculate the MD5 checksums for the crunched versions of all common
licenses. Also add additional known checksums.
'''
crunched_md5sums = {}
# common licenses
crunched_md5sums['ad4e9d34a2e966dfe9837f18de03266d'] = 'GFDL-1.1-only'
crunched_md5sums['d014fb11a34eb67dc717fdcfc97e60ed'] = 'GFDL-1.2-only'
crunched_md5sums['e020ca655b06c112def28e597ab844f1'] = 'GFDL-1.3-only'
# The following two were gleaned from the "forever" npm package
crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC'
# https://github.com/waffle-gl/waffle/blob/master/LICENSE.txt
crunched_md5sums['50fab24ce589d69af8964fdbfe414c60'] = 'BSD-2-Clause'
# https://github.com/spigwitmer/fakeds1963s/blob/master/LICENSE
crunched_md5sums['88a4355858a1433fea99fae34a44da88'] = 'GPL-2.0-only'
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
crunched_md5sums['063b5c3ebb5f3aa4c85a2ed18a31fbe7'] = 'GPL-2.0-only'
# https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv2.1
crunched_md5sums['7f5202f4d44ed15dcd4915f5210417d8'] = 'LGPL-2.1-only'
# unixODBC-2.3.4 COPYING
crunched_md5sums['3debde09238a8c8e1f6a847e1ec9055b'] = 'LGPL-2.1-only'
# https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3
crunched_md5sums['f90c613c51aa35da4d79dd55fc724ceb'] = 'LGPL-3.0-only'
# https://raw.githubusercontent.com/eclipse/mosquitto/v1.4.14/epl-v10
crunched_md5sums['efe2cb9a35826992b9df68224e3c2628'] = 'EPL-1.0'
# https://raw.githubusercontent.com/jquery/esprima/3.1.3/LICENSE.BSD
crunched_md5sums['80fa7b56a28e8c902e6af194003220a5'] = 'BSD-2-Clause'
# https://raw.githubusercontent.com/npm/npm-install-checks/master/LICENSE
crunched_md5sums['e659f77bfd9002659e112d0d3d59b2c1'] = 'BSD-2-Clause'
# https://raw.githubusercontent.com/silverwind/default-gateway/4.2.0/LICENSE
crunched_md5sums['4c641f2d995c47f5cb08bdb4b5b6ea05'] = 'BSD-2-Clause'
# https://raw.githubusercontent.com/tad-lispy/node-damerau-levenshtein/v1.0.5/LICENSE
crunched_md5sums['2b8c039b2b9a25f0feb4410c4542d346'] = 'BSD-2-Clause'
# https://raw.githubusercontent.com/terser/terser/v3.17.0/LICENSE
crunched_md5sums['8bd23871802951c9ad63855151204c2c'] = 'BSD-2-Clause'
# https://raw.githubusercontent.com/alexei/sprintf.js/1.0.3/LICENSE
crunched_md5sums['008c22318c8ea65928bf730ddd0273e3'] = 'BSD-3-Clause'
# https://raw.githubusercontent.com/Caligatio/jsSHA/v3.2.0/LICENSE
crunched_md5sums['0e46634a01bfef056892949acaea85b1'] = 'BSD-3-Clause'
# https://raw.githubusercontent.com/d3/d3-path/v1.0.9/LICENSE
crunched_md5sums['b5f72aef53d3b2b432702c30b0215666'] = 'BSD-3-Clause'
# https://raw.githubusercontent.com/feross/ieee754/v1.1.13/LICENSE
crunched_md5sums['a39327c997c20da0937955192d86232d'] = 'BSD-3-Clause'
# https://raw.githubusercontent.com/joyent/node-extsprintf/v1.3.0/LICENSE
crunched_md5sums['721f23a96ff4161ca3a5f071bbe18108'] = 'MIT'
# https://raw.githubusercontent.com/pvorb/clone/v0.2.0/LICENSE
crunched_md5sums['b376d29a53c9573006b9970709231431'] = 'MIT'
# https://raw.githubusercontent.com/andris9/encoding/v0.1.12/LICENSE
crunched_md5sums['85d8a977ee9d7c5ab4ac03c9b95431c4'] = 'MIT-0'
# https://raw.githubusercontent.com/faye/websocket-driver-node/0.7.3/LICENSE.md
crunched_md5sums['b66384e7137e41a9b1904ef4d39703b6'] = 'Apache-2.0'
# https://raw.githubusercontent.com/less/less.js/v4.1.1/LICENSE
crunched_md5sums['b27575459e02221ccef97ec0bfd457ae'] = 'Apache-2.0'
# https://raw.githubusercontent.com/microsoft/TypeScript/v3.5.3/LICENSE.txt
crunched_md5sums['a54a1a6a39e7f9dbb4a23a42f5c7fd1c'] = 'Apache-2.0'
# https://raw.githubusercontent.com/request/request/v2.87.0/LICENSE
crunched_md5sums['1034431802e57486b393d00c5d262b8a'] = 'Apache-2.0'
# https://raw.githubusercontent.com/dchest/tweetnacl-js/v0.14.5/LICENSE
crunched_md5sums['75605e6bdd564791ab698fca65c94a4f'] = 'Unlicense'
# https://raw.githubusercontent.com/stackgl/gl-mat3/v2.0.0/LICENSE.md
crunched_md5sums['75512892d6f59dddb6d1c7e191957e9c'] = 'Zlib'
commonlicdir = d.getVar('COMMON_LICENSE_DIR')
for fn in sorted(os.listdir(commonlicdir)):
md5value, lictext = crunch_license(os.path.join(commonlicdir, fn))
if md5value not in crunched_md5sums:
crunched_md5sums[md5value] = fn
elif fn != crunched_md5sums[md5value]:
bb.debug(2, "crunched_md5sums['%s'] is already set to '%s' rather than '%s'" % (md5value, crunched_md5sums[md5value], fn))
else:
bb.debug(2, "crunched_md5sums['%s'] is already set to '%s'" % (md5value, crunched_md5sums[md5value]))
return crunched_md5sums
def crunch_license(licfile):
'''
Remove non-material text from a license file and then calculate its
md5sum. This works well for licenses that contain a copyright statement,
but is also a useful way to handle people's insistence upon reformatting
the license text slightly (with no material difference to the text of the
license).
'''
import oe.utils
# Note: these are carefully constructed!
license_title_re = re.compile(r'^#*\(? *(This is )?([Tt]he )?.{0,15} ?[Ll]icen[sc]e( \(.{1,10}\))?\)?[:\.]? ?#*$')
license_statement_re = re.compile(r'^((This (project|software)|.{1,10}) is( free software)? (released|licen[sc]ed)|(Released|Licen[cs]ed)) under the .{1,10} [Ll]icen[sc]e:?$')
copyright_re = re.compile(r'^ *[#\*]* *(Modified work |MIT LICENSED )?Copyright ?(\([cC]\))? .*$')
disclaimer_re = re.compile(r'^ *\*? ?All [Rr]ights [Rr]eserved\.$')
email_re = re.compile(r'^.*<[\w\.-]*@[\w\.\-]*>$')
header_re = re.compile(r'^(\/\**!?)? ?[\-=\*]* ?(\*\/)?$')
tag_re = re.compile(r'^ *@?\(?([Ll]icense|MIT)\)?$')
url_re = re.compile(r'^ *[#\*]* *https?:\/\/[\w\.\/\-]+$')
lictext = []
with open(licfile, 'r', errors='surrogateescape') as f:
for line in f:
# Drop opening statements
if copyright_re.match(line):
continue
elif disclaimer_re.match(line):
continue
elif email_re.match(line):
continue
elif header_re.match(line):
continue
elif tag_re.match(line):
continue
elif url_re.match(line):
continue
elif license_title_re.match(line):
continue
elif license_statement_re.match(line):
continue
# Strip comment symbols
line = line.replace('*', '') \
.replace('#', '')
# Unify spelling
line = line.replace('sub-license', 'sublicense')
# Squash spaces
line = oe.utils.squashspaces(line.strip())
# Replace smart quotes, double quotes and backticks with single quotes
line = line.replace(u"\u2018", "'").replace(u"\u2019", "'").replace(u"\u201c","'").replace(u"\u201d", "'").replace('"', '\'').replace('`', '\'')
# Unify brackets
line = line.replace("{", "[").replace("}", "]")
if line:
lictext.append(line)
m = hashlib.md5()
try:
m.update(' '.join(lictext).encode('utf-8'))
md5val = m.hexdigest()
except UnicodeEncodeError:
md5val = None
lictext = ''
return md5val, lictext
def find_license_files(srctree):
licspecs = ['*LICEN[CS]E*', 'COPYING*', '*[Ll]icense*', 'LEGAL*', '[Ll]egal*', '*GPL*', 'README.lic*', 'COPYRIGHT*', '[Cc]opyright*', 'e[dp]l-v10']
skip_extensions = (".html", ".js", ".json", ".svg", ".ts", ".go")
licfiles = []
for root, dirs, files in os.walk(srctree):
for fn in files:
if fn.endswith(skip_extensions):
continue
for spec in licspecs:
if fnmatch.fnmatch(fn, spec):
fullpath = os.path.join(root, fn)
if not fullpath in licfiles:
licfiles.append(fullpath)
return licfiles
def match_licenses(licfiles, srctree, d):
import bb
md5sums = get_license_md5sums(d)
crunched_md5sums = crunch_known_licenses(d)
licenses = []
for licfile in sorted(licfiles):
resolved_licfile = d.expand(licfile)
md5value = bb.utils.md5_file(resolved_licfile)
license = md5sums.get(md5value, None)
if not license:
crunched_md5, lictext = crunch_license(resolved_licfile)
license = crunched_md5sums.get(crunched_md5, None)
if lictext and not license:
license = 'Unknown'
logger.info("Please add the following line for '%s' to a 'lib/recipetool/licenses.csv' " \
"and replace `Unknown` with the license:\n" \
"%s,Unknown" % (os.path.relpath(licfile, srctree + "/.."), md5value))
if license:
licenses.append((license, os.path.relpath(licfile, srctree), md5value))
return licenses
def find_licenses(srctree, d):
licfiles = find_license_files(srctree)
licenses = match_licenses(licfiles, srctree, d)
# FIXME should we grab at least one source file with a license header and add that too?
return licenses
def split_pkg_licenses(licvalues, packages, outlines, fallback_licenses=None, pn='${PN}'):
"""
Given a list of (license, path, md5sum) as returned by match_licenses(),

View File

@ -15,8 +15,9 @@ import bb
from bb.fetch2.npm import NpmEnvironment
from bb.fetch2.npm import npm_package
from bb.fetch2.npmsw import foreach_dependencies
from oe.license_finder import match_licenses, find_license_files
from recipetool.create import RecipeHandler
from recipetool.create import match_licenses, find_license_files, generate_common_licenses_chksums
from recipetool.create import generate_common_licenses_chksums
from recipetool.create import split_pkg_licenses
logger = logging.getLogger('recipetool')

View File

@ -1,37 +0,0 @@
0636e73ff0215e8d672dc4c32c317bb3,GPL-2.0-only
12f884d2ae1ff87c09e5b7ccc2c4ca7e,GPL-2.0-only
18810669f13b87348459e611d31ab760,GPL-2.0-only
252890d9eee26aab7b432e8b8a616475,LGPL-2.0-only
2d5025d4aa3495befef8f17206a5b0a1,LGPL-2.1-only
3214f080875748938ba060314b4f727d,LGPL-2.0-only
385c55653886acac3821999a3ccd17b3,Artistic-1.0 | GPL-2.0-only
393a5ca445f6965873eca0259a17f833,GPL-2.0-only
3b83ef96387f14655fc854ddc3c6bd57,Apache-2.0
3bf50002aefd002f49e7bb854063f7e7,LGPL-2.0-only
4325afd396febcb659c36b49533135d4,GPL-2.0-only
4fbd65380cdd255951079008b364516c,LGPL-2.1-only
54c7042be62e169199200bc6477f04d1,BSD-3-Clause
55ca817ccb7d5b5b66355690e9abc605,LGPL-2.0-only
59530bdf33659b29e73d4adb9f9f6552,GPL-2.0-only
5f30f0716dfdd0d91eb439ebec522ec2,LGPL-2.0-only
6a6a8e020838b23406c81b19c1d46df6,LGPL-3.0-only
751419260aa954499f7abaabaa882bbe,GPL-2.0-only
7fbc338309ac38fefcd64b04bb903e34,LGPL-2.1-only
8ca43cbc842c2336e835926c2166c28b,GPL-2.0-only
94d55d512a9ba36caa9b7df079bae19f,GPL-2.0-only
9ac2e7cff1ddaf48b6eab6028f23ef88,GPL-2.0-only
9f604d8a4f8e74f4f5140845a21b6674,LGPL-2.0-only
a6f89e2100d9b6cdffcea4f398e37343,LGPL-2.1-only
b234ee4d69f5fce4486a80fdaf4a4263,GPL-2.0-only
bbb461211a33b134d42ed5ee802b37ff,LGPL-2.1-only
bfe1f75d606912a4111c90743d6c7325,MPL-1.1-only
c93c0550bd3173f4504b2cbd8991e50b,GPL-2.0-only
d32239bcb673463ab874e80d47fae504,GPL-3.0-only
d7810fab7487fb0aad327b76f1be7cd7,GPL-2.0-only
d8045f3b8f929c1cb29a1e3fd737b499,LGPL-2.1-only
db979804f025cf55aabec7129cb671ed,LGPL-2.0-only
eb723b61539feef013de476e68b5c50a,GPL-2.0-only
ebb5c50ab7cab4baeffba14977030c07,GPL-2.0-only
f27defe1e96c2e1ecd4e0c9be8967949,GPL-3.0-only
fad9b3332be894bab9bc501572864b29,LGPL-2.1-only
fbc093901857fcd118f065f900982c24,LGPL-2.1-only
1 0636e73ff0215e8d672dc4c32c317bb3 GPL-2.0-only
2 12f884d2ae1ff87c09e5b7ccc2c4ca7e GPL-2.0-only
3 18810669f13b87348459e611d31ab760 GPL-2.0-only
4 252890d9eee26aab7b432e8b8a616475 LGPL-2.0-only
5 2d5025d4aa3495befef8f17206a5b0a1 LGPL-2.1-only
6 3214f080875748938ba060314b4f727d LGPL-2.0-only
7 385c55653886acac3821999a3ccd17b3 Artistic-1.0 | GPL-2.0-only
8 393a5ca445f6965873eca0259a17f833 GPL-2.0-only
9 3b83ef96387f14655fc854ddc3c6bd57 Apache-2.0
10 3bf50002aefd002f49e7bb854063f7e7 LGPL-2.0-only
11 4325afd396febcb659c36b49533135d4 GPL-2.0-only
12 4fbd65380cdd255951079008b364516c LGPL-2.1-only
13 54c7042be62e169199200bc6477f04d1 BSD-3-Clause
14 55ca817ccb7d5b5b66355690e9abc605 LGPL-2.0-only
15 59530bdf33659b29e73d4adb9f9f6552 GPL-2.0-only
16 5f30f0716dfdd0d91eb439ebec522ec2 LGPL-2.0-only
17 6a6a8e020838b23406c81b19c1d46df6 LGPL-3.0-only
18 751419260aa954499f7abaabaa882bbe GPL-2.0-only
19 7fbc338309ac38fefcd64b04bb903e34 LGPL-2.1-only
20 8ca43cbc842c2336e835926c2166c28b GPL-2.0-only
21 94d55d512a9ba36caa9b7df079bae19f GPL-2.0-only
22 9ac2e7cff1ddaf48b6eab6028f23ef88 GPL-2.0-only
23 9f604d8a4f8e74f4f5140845a21b6674 LGPL-2.0-only
24 a6f89e2100d9b6cdffcea4f398e37343 LGPL-2.1-only
25 b234ee4d69f5fce4486a80fdaf4a4263 GPL-2.0-only
26 bbb461211a33b134d42ed5ee802b37ff LGPL-2.1-only
27 bfe1f75d606912a4111c90743d6c7325 MPL-1.1-only
28 c93c0550bd3173f4504b2cbd8991e50b GPL-2.0-only
29 d32239bcb673463ab874e80d47fae504 GPL-3.0-only
30 d7810fab7487fb0aad327b76f1be7cd7 GPL-2.0-only
31 d8045f3b8f929c1cb29a1e3fd737b499 LGPL-2.1-only
32 db979804f025cf55aabec7129cb671ed LGPL-2.0-only
33 eb723b61539feef013de476e68b5c50a GPL-2.0-only
34 ebb5c50ab7cab4baeffba14977030c07 GPL-2.0-only
35 f27defe1e96c2e1ecd4e0c9be8967949 GPL-3.0-only
36 fad9b3332be894bab9bc501572864b29 LGPL-2.1-only
37 fbc093901857fcd118f065f900982c24 LGPL-2.1-only