diff --git a/layerindex/tools/fedora-fetch.py b/layerindex/tools/fedora-fetch.py new file mode 100755 index 0000000..492082a --- /dev/null +++ b/layerindex/tools/fedora-fetch.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 + +# Fedora Pagure (e.g. https://src.fedoraproject.org/) fetch utility + +# Copyright (C) 2017, 2018 Intel Corporation +# +# Licensed under the MIT license, see COPYING.MIT for details + +import sys +import os +import argparse +import json +import requests +import re +import subprocess + + +def get_repos_api(url, toplevel_item=None, name_attr='name', use_link_header=True, link_item='pagination', link_next_attr='next'): + link_re = re.compile('<(http[^>]+)>; rel="([a-zA-Z0-9]+)"') + repos = None + while True: + session = requests.Session() + print('getting %s' % url) + with requests.Session() as s: + r = s.get(url) + if not r.ok: + print('Request failed: %d: %s' % (r.status_code, r.text)) + sys.exit(2) + st = r.text + jdata = json.loads(st) + if toplevel_item: + repos = jdata[toplevel_item] + else: + repos = jdata + for repo in repos: + name = repo[name_attr] + yield repo + + link_url = None + if use_link_header: + link = r.headers.get('Link', None) + if link: + linkitems = dict([reversed(x) for x in link_re.findall(link)]) + link_url = linkitems.get('next', None) + else: + link = jdata.get(link_item, None) + if link: + link_url = link.get(link_next_attr, None) + if not link_url: + break + url = link_url + + +def fetchall(args): + + site = args.site + if site.endswith('/'): + site = site[:-1] + + keys = {'site': site, + 'start_page': args.resume_page, + 'per_page': 100 + } + url = '{site}/api/0/projects?page={start_page}&per_page={per_page}' + url = url.format(**keys) + + existing = os.listdir(args.outdir) + for name in existing: + if name.endswith('.deleted'): + print('Directories marked deleted (suffix .deleted) still exist in output path - remove these to continue') + sys.exit(1) + + if args.resume_from: + fetching = False + else: + fetching = True + + failed = [] + gotsomething = False + for repo in get_repos_api(url, toplevel_item='projects', use_link_header=False, link_item='pagination', link_next_attr='next'): + gotsomething = True + if repo['parent']: + print('ignoring %s' % repo['fullname']) + continue + if not repo['fullname'].startswith('rpms/'): + print('ignoring %s' % repo['fullname']) + continue + name = repo['name'] + clone_url = site + '/' + repo['url_path'] + '.git' + if name in existing: + existing.remove(name) + if args.resume_from and name == args.resume_from: + fetching = True + elif not fetching: + print('Skipping %s' % name) + continue + + outpath = os.path.join(args.outdir, name) + for retry in [0, 1]: + if os.path.exists(outpath): + print('Update %s' % outpath) + ret = subprocess.call(['git', 'pull'], cwd=outpath) + else: + print('Fetch %s' % clone_url) + ret = subprocess.call(['git', 'clone', clone_url, name], cwd=args.outdir) + if ret == 0: + break + if ret != 0: + failed.append(name) + + if not gotsomething: + print('Something went wrong - no repositories were found') + sys.exit(1) + + if not args.resume_page: + deleted = False + for dirname in existing: + dirpath = os.path.join(outpath, dirname) + print('Marking %s as deleted' % dirname) + os.rename(dirpath, dirpath + '.deleted') + deleted = True + if deleted: + print('You will need to delete the above marked directories manually') + + if failed: + print('The following repositories failed to fetch properly:') + for name in failed: + try: + dirlist = os.listdir(os.path.join(args.outdir, name)) + except FileNotFoundError: + dirlist = None + if dirlist == [] or dirlist == ['.git']: + print('%s (empty)' % name) + else: + print('%s' % name) + + + + +def main(): + parser = argparse.ArgumentParser(description="Fedora package source fetch utility", + epilog="Use %(prog)s --help to get help on a specific command") + #parser.add_argument('-d', '--debug', help='Enable debug output', action='store_true') + subparsers = parser.add_subparsers(title='subcommands', metavar='') + subparsers.required = True + + parser_fetchall = subparsers.add_parser('fetchall', + help='Fetch/update all repos from Fedora\'s pagure instance', + description='Fetches/updates all repos in a pagure instance') + parser_fetchall.add_argument('site', nargs='?', default='https://src.fedoraproject.org', help='URL to Pagure site (default %(default)s)') + parser_fetchall.add_argument('outdir', nargs='?', default='.', help='Output directory (default %(default)s)') + parser_fetchall.add_argument('-r', '--resume-from', help='Resume from the specified repository') + parser_fetchall.add_argument('-p', '--resume-page', default='1', help='Resume from the specified page (disables deleting)') + parser_fetchall.set_defaults(func=fetchall) + + args = parser.parse_args() + + ret = args.func(args) + + return ret + + +if __name__ == "__main__": + try: + ret = main() + except Exception: + ret = 1 + import traceback + traceback.print_exc() + sys.exit(ret)