poky/meta/recipes-devtools/python/python3/create_manifest3.py
Alejandro Enedino Hernandez Samaniego d3148222f0 python3: Modify create_manifest to make it versionless
This patch improves the create_manifest script by making it
use PYTHON_MAJMIN instead of hard coded paths containing the
version number when looking at the necessary modules for
every package, the script should now be independent of the
python(3) version on which were working

(From OE-Core rev: b94af33b5ffdd62617cf69fca4d99e927447740a)

Signed-off-by: Alejandro Enedino Hernandez Samaniego <alejandr@xilinx.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2018-09-06 10:36:31 +01:00

363 lines
16 KiB
Python

# This script is used as a bitbake task to create a new python manifest
# $ bitbake python -c create_manifest
#
# Our goal is to keep python-core as small as posible and add other python
# packages only when the user needs them, hence why we split upstream python
# into several packages.
#
# In a very simplistic way what this does is:
# Launch python and see specifically what is required for it to run at a minimum
#
# Go through the python-manifest file and launch a separate task for every single
# one of the files on each package, this task will check what was required for that
# specific module to run, these modules will be called dependencies.
# The output of such task will be a list of the modules or dependencies that were
# found for that file.
#
# Such output will be parsed by this script, we will look for each dependency on the
# manifest and if we find that another package already includes it, then we will add
# that package as an RDEPENDS to the package we are currently checking; in case we dont
# find the current dependency on any other package we will add it to the current package
# as part of FILES.
#
#
# This way we will create a new manifest from the data structure that was built during
# this process, ont this new manifest each package will contain specifically only
# what it needs to run.
#
# There are some caveats which we try to deal with, such as repeated files on different
# packages, packages that include folders, wildcards, and special packages.
# Its also important to note that this method only works for python files, and shared
# libraries. Static libraries, header files and binaries need to be dealt with manually.
#
# This script differs from its python2 version mostly on how shared libraries are handled
# The manifest file for python3 has an extra field which contains the cached files for
# each package.
# Tha method to handle cached files does not work when a module includes a folder which
# itself contains the pycache folder, gladly this is almost never the case.
#
# Author: Alejandro Enedino Hernandez Samaniego "aehs29" <aehs29@gmail.com>
import sys
import subprocess
import json
import os
# Get python version from ${PYTHON_MAJMIN}
pyversion = str(sys.argv[1])
# Hack to get native python search path (for folders), not fond of it but it works for now
pivot='recipe-sysroot-native'
for p in sys.path:
if pivot in p:
nativelibfolder=p[:p.find(pivot)+len(pivot)]
# Empty dict to hold the whole manifest
new_manifest = {}
# Check for repeated files, folders and wildcards
allfiles=[]
repeated=[]
wildcards=[]
hasfolders=[]
allfolders=[]
def isFolder(value):
value = value.replace('${PYTHON_MAJMIN}',pyversion)
if os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib64')) or os.path.isdir(value.replace('${libdir}',nativelibfolder+'/usr/lib32')):
return True
else:
return False
def isCached(item):
if '__pycache__' in item:
return True
else:
return False
# Read existing JSON manifest
with open('python3-manifest.json') as manifest:
old_manifest=json.load(manifest)
# First pass to get core-package functionality, because we base everything on the fact that core is actually working
# Not exactly the same so it should not be a function
print ('Getting dependencies for package: core')
# Special call to check for core package
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', 'python-core-package']).decode('utf8')
for item in output.split():
item = item.replace(pyversion,'${PYTHON_MAJMIN}')
# We append it so it doesnt hurt what we currently have:
if isCached(item):
if item not in old_manifest['core']['cached']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['cached'].append(item)
else:
if item not in old_manifest['core']['files']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['files'].append(item)
for value in old_manifest['core']['files']:
value = value.replace(pyversion,'${PYTHON_MAJMIN}')
# Ignore folders, since we don't import those, difficult to handle multilib
if isFolder(value):
# Pass it directly
if isCached(value):
if value not in old_manifest['core']['cached']:
old_manifest['core']['cached'].append(value)
else:
if value not in old_manifest['core']['files']:
old_manifest['core']['files'].append(value)
continue
# Ignore binaries, since we don't import those, assume it was added correctly (manually)
if '${bindir}' in value:
# Pass it directly
if value not in old_manifest['core']['files']:
old_manifest['core']['files'].append(value)
continue
# Ignore empty values
if value == '':
continue
if '${includedir}' in value:
if value not in old_manifest['core']['files']:
old_manifest['core']['files'].append(value)
continue
# Get module name , shouldnt be affected by libdir/bindir
value = os.path.splitext(os.path.basename(os.path.normpath(value)))[0]
# Launch separate task for each module for deterministic behavior
# Each module will only import what is necessary for it to work in specific
print ('Getting dependencies for module: %s' % value)
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
print ('The following dependencies were found for module %s:\n' % value)
print (output)
for item in output.split():
item = item.replace(pyversion,'${PYTHON_MAJMIN}')
# We append it so it doesnt hurt what we currently have:
if isCached(item):
if item not in old_manifest['core']['cached']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['cached'].append(item)
else:
if item not in old_manifest['core']['files']:
# We use the same data structure since its the one which will be used to check
# dependencies for other packages
old_manifest['core']['files'].append(item)
# We check which packages include folders
for key in old_manifest:
for value in old_manifest[key]['files']:
# Ignore folders, since we don't import those, difficult to handle multilib
if isFolder(value):
print ('%s is a folder' % value)
if key not in hasfolders:
hasfolders.append(key)
if value not in allfolders:
allfolders.append(value)
for key in old_manifest:
# Use an empty dict as data structure to hold data for each package and fill it up
new_manifest[key]={}
new_manifest[key]['files']=[]
new_manifest[key]['rdepends']=[]
# All packages should depend on core
if key != 'core':
new_manifest[key]['rdepends'].append('core')
new_manifest[key]['cached']=[]
else:
new_manifest[key]['cached']=old_manifest[key]['cached']
new_manifest[key]['summary']=old_manifest[key]['summary']
# Handle special cases, we assume that when they were manually added
# to the manifest we knew what we were doing.
print('\n')
print('--------------------------')
print ('Handling package %s' % key)
print('--------------------------')
special_packages=['misc', 'modules', 'dev']
if key in special_packages or 'staticdev' in key:
print('Passing %s package directly' % key)
new_manifest[key]=old_manifest[key]
continue
for value in old_manifest[key]['files']:
# We already handled core on the first pass
if key == 'core':
new_manifest[key]['files'].append(value)
continue
# Ignore folders, since we don't import those, difficult to handle multilib
if isFolder(value):
# Pass folders directly
new_manifest[key]['files'].append(value)
# Ignore binaries, since we don't import those
if '${bindir}' in value:
# Pass it directly to the new manifest data structure
if value not in new_manifest[key]['files']:
new_manifest[key]['files'].append(value)
continue
# Ignore empty values
if value == '':
continue
if '${includedir}' in value:
if value not in new_manifest[key]['files']:
new_manifest[key]['files'].append(value)
continue
# Get module name , shouldnt be affected by libdir/bindir
# We need to check if the imported module comes from another (e.g. sqlite3.dump)
path,value = os.path.split(value)
path = os.path.basename(path)
value = os.path.splitext(os.path.basename(value))[0]
# If this condition is met, it means we need to import it from another module
# or its the folder itself (e.g. unittest)
if path == key:
if value:
value = path + '.' + value
else:
value = path
# Launch separate task for each module for deterministic behavior
# Each module will only import what is necessary for it to work in specific
print ('\nGetting dependencies for module: %s' % value)
output = subprocess.check_output([sys.executable, 'get_module_deps3.py', '%s' % value]).decode('utf8')
# We can print dependencies for debugging purposes
print ('The following dependencies were found for module %s:\n' % value)
print (output)
# Output will have all dependencies
reportFILES = []
reportRDEPS = []
for item in output.split():
# Warning: This first part is ugly
# One of the dependencies that was found, could be inside of one of the folders included by another package
# We need to check if this happens so we can add the package containing the folder as an rdependency
# e.g. Folder encodings contained in codecs
# This would be solved if no packages included any folders
# This can be done in two ways:
# 1 - We assume that if we take out the filename from the path we would get
# the folder string, then we would check if folder string is in the list of folders
# This would not work if a package contains a folder which contains another folder
# e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
# folder_string would not match any value contained in the list of folders
#
# 2 - We do it the other way around, checking if the folder is contained in the path
# e.g. path/folder1/folder2/filename folder_string= path/folder1/folder2
# is folder_string inside path/folder1/folder2/filename?,
# Yes, it works, but we waste a couple of milliseconds.
item = item.replace(pyversion,'${PYTHON_MAJMIN}')
inFolders=False
for folder in allfolders:
if folder in item:
inFolders = True # Did we find a folder?
folderFound = False # Second flag to break inner for
# Loop only through packages which contain folders
for keyfolder in hasfolders:
if (folderFound == False):
#print('Checking folder %s on package %s' % (item,keyfolder))
for file_folder in old_manifest[keyfolder]['files'] or file_folder in old_manifest[keyfolder]['cached']:
if file_folder==folder:
print ('%s folder found in %s' % (folder, keyfolder))
folderFound = True
if keyfolder not in new_manifest[key]['rdepends'] and keyfolder != key:
new_manifest[key]['rdepends'].append(keyfolder)
else:
break
# A folder was found so we're done with this item, we can go on
if inFolders:
continue
# We might already have it on the dictionary since it could depend on a (previously checked) module
if item not in new_manifest[key]['files'] and item not in new_manifest[key]['cached']:
# Handle core as a special package, we already did it so we pass it to NEW data structure directly
if key=='core':
print('Adding %s to %s FILES' % (item, key))
if item.endswith('*'):
wildcards.append(item)
if isCached(item):
new_manifest[key]['cached'].append(item)
else:
new_manifest[key]['files'].append(item)
# Check for repeated files
if item not in allfiles:
allfiles.append(item)
else:
repeated.append(item)
else:
# Check if this dependency is already contained on another package, so we add it
# as an RDEPENDS, or if its not, it means it should be contained on the current
# package, so we should add it to FILES
for newkey in old_manifest:
# Debug
#print('Checking %s ' % item + ' in %s' % newkey)
if item in old_manifest[newkey]['files'] or item in old_manifest[newkey]['cached']:
# Since were nesting, we need to check its not the same key
if(newkey!=key):
if newkey not in new_manifest[key]['rdepends']:
# Add it to the new manifest data struct
reportRDEPS.append('Adding %s to %s RDEPENDS, because it contains %s\n' % (newkey, key, item))
new_manifest[key]['rdepends'].append(newkey)
break
else:
# A module shouldn't contain itself (${libdir}/python3/sqlite3 shouldnt be on sqlite3 files)
if os.path.basename(item) != key:
reportFILES.append(('Adding %s to %s FILES\n' % (item, key)))
# Since it wasnt found on another package, its not an RDEP, so add it to FILES for this package
if isCached(item):
new_manifest[key]['cached'].append(item)
else:
new_manifest[key]['files'].append(item)
if item.endswith('*'):
wildcards.append(item)
if item not in allfiles:
allfiles.append(item)
else:
repeated.append(item)
print('\n')
print('#################################')
print('Summary for module %s' % value)
print('FILES found for module %s:' % value)
print(''.join(reportFILES))
print('RDEPENDS found for module %s:' % value)
print(''.join(reportRDEPS))
print('#################################')
print ('The following files are repeated (contained in more than one package), please check which package should get it:')
print (repeated)
print('The following files contain wildcards, please check they are necessary')
print(wildcards)
print('The following files contain folders, please check they are necessary')
print(hasfolders)
# Sort it just so it looks nicer
for key in new_manifest:
new_manifest[key]['files'].sort()
new_manifest[key]['cached'].sort()
new_manifest[key]['rdepends'].sort()
# Create the manifest from the data structure that was built
with open('python3-manifest.json.new','w') as outfile:
json.dump(new_manifest,outfile,sort_keys=True, indent=4)
outfile.write('\n')