mirror.py 6.93 KB
Newer Older
1
2
3
4
5
6
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :


import os
import click
7
import tempfile
8
9
10
11
12
13
14
15
16

import conda_build.api

from . import bdt
from ..mirror import (
        get_json,
        get_local_contents,
        load_glob_list,
        blacklist_filter,
17
        whitelist_filter,
18
19
        download_packages,
        remove_packages,
20
        copy_and_clean_json,
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
        )
from ..log import verbosity_option, get_logger, echo_info, echo_warning

logger = get_logger(__name__)


@click.command(
    epilog="""
Examples:

  1. Mirrors a conda channel:

\b
     $ bdt mirror -vv https://www.idiap.ch/software/bob/label/beta

    """
)
@click.argument(
    "channel-url",
    required=True,
)
@click.argument(
    "dest-dir",
    type=click.Path(exists=False, dir_okay=True, file_okay=False,
        writable=True, readable=True, resolve_path=True),
    required=True,
)
@click.option(
    "-b",
    "--blacklist",
    type=click.Path(exists=True, dir_okay=False, file_okay=True,
        readable=True, resolve_path=True),
    help="A file containing a list of globs to exclude from local " \
            "mirroring, one per line",
)
56
57
58
59
60
61
62
63
64
@click.option(
    "-w",
    "--whitelist",
    type=click.Path(exists=True, dir_okay=False, file_okay=True,
        readable=True, resolve_path=True),
    help="A file containing a list of globs to include at local " \
            "mirroring, one per line.  This is considered *after* " \
            "the blacklisting.  It is here just for testing purposes",
)
65
66
67
68
69
70
71
72
73
74
75
76
77
78
@click.option(
    "-m",
    "--check-md5/--no-check-md5",
    default=False,
    help="If set, then check MD5 sums of all packages during conda-index",
)
@click.option(
    "-d",
    "--dry-run/--no-dry-run",
    default=False,
    help="Only goes through the actions, but does not execute them "
    "(combine with the verbosity flags - e.g. ``-vvv``) to enable "
    "printing to help you understand what will be done",
)
79
80
81
82
83
84
85
@click.option(
    "-t",
    "--tmpdir",
    type=click.Path(exists=True, dir_okay=True, file_okay=False,
        readable=True, writable=True, resolve_path=True),
    help="A directory where to store temporary files",
)
86
87
88
89
90
91
92
93
@click.option(
    "-p",
    "--patch/--no-patch",
    default=False,
    help="If set, then consider we are mirroring the defaults channel "
    "where a patch_instructions.json exists and must be downloaded and "
    "prunned so the mirror works adequately",
)
94
95
96
97
98
99
@verbosity_option()
@bdt.raise_on_error
def mirror(
        channel_url,
        dest_dir,
        blacklist,
100
        whitelist,
101
102
        check_md5,
        dry_run,
103
        tmpdir,
104
        patch,
105
106
107
108
109
110
111
112
113
114
        ):
    """Mirrors a conda channel to a particular local destination

    This command is capable of completely mirroring a valid conda channel,
    excluding packages that you may not be interested on via globs.  It works
    to minimize channel usage by first downloading the channel repository data
    (in compressed format), analysing what is available locally and what is
    available on the channel, and only downloading the missing files.
    """

115
116
117
118
119
120
    # creates a self destructing temporary directory that will act as temporary
    # directory for the rest of this program
    tmpdir2 = tempfile.TemporaryDirectory(prefix='bdt-mirror-tmp', dir=tmpdir)
    os.environ['TMPDIR'] = tmpdir2.name
    logger.info('Setting $TMPDIR to %s', tmpdir2.name)

121
122
123
124
125
126
127
128
129
130
131
132
133
    # if we are in a dry-run mode, let's let it be known
    if dry_run:
        logger.warn("!!!! DRY RUN MODE !!!!")
        logger.warn("Nothing will be really mirrored")

    DEFAULT_SUBDIRS = ['noarch', 'linux-64', 'osx-64']

    noarch = os.path.join(dest_dir, 'noarch')
    if not os.path.exists(noarch):  #first time
        # calls conda index to create basic infrastructure
        logger.info("Creating conda channel at %s...", dest_dir)
        if not dry_run:
            conda_build.api.update_index([dest_dir], subdir=DEFAULT_SUBDIRS,
134
                    progress=False, verbose=False)
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

    for arch in DEFAULT_SUBDIRS:

        remote_repodata = get_json(channel_url, arch, 'repodata.json.bz2')
        logger.info('%d packages available in remote index',
                len(remote_repodata.get('packages', {})))
        local_packages = get_local_contents(dest_dir, arch)
        logger.info('%d packages available in local mirror', len(local_packages))

        remote_packages = set(list(remote_repodata.get('packages', {}).keys()) +
                list(remote_repodata.get('packages.conda', {}).keys()))

        if blacklist is not None and os.path.exists(blacklist):
            globs_to_remove = set(load_glob_list(blacklist))
        else:
            globs_to_remove = set()

        # in the remote packages, subset those that need to be downloaded
        # according to our own interest
        to_download = blacklist_filter(remote_packages - local_packages,
                globs_to_remove)

157
158
159
160
        if whitelist is not None and os.path.exists(whitelist):
            globs_to_consider = set(load_glob_list(whitelist))
            to_download = whitelist_filter(to_download, globs_to_consider)

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        # in the local packages, subset those that we no longer need, be it
        # because they have been removed from the remote repository, or because
        # we decided to blacklist them.
        disappeared_remotely = local_packages - remote_packages
        to_keep = blacklist_filter(local_packages, globs_to_remove)
        to_delete_locally = (local_packages - to_keep) | disappeared_remotely

        # execute the transaction
        if to_download:
            download_packages(to_download, remote_repodata, channel_url, dest_dir,
                    arch, dry_run)
        else:
            echo_info("Mirror at %s/%s is up-to-date w.r.t. %s/%s. " \
                    "No packages to download." % (dest_dir, arch, channel_url,
                        arch))

        if to_delete_locally:
            echo_warning("%d packages will be removed at %s/%s" % \
                    (len(to_delete_locally), dest_dir, arch))
            remove_packages(to_delete_locally, dest_dir, arch, dry_run)
        else:
            echo_info("Mirror at %s/%s is up-to-date w.r.t. blacklist. " \
                    "No packages to be removed." % (dest_dir, arch))

185
186
187
188
189
        if patch:
            # download/cleanup patch instructions, otherwise conda installs may
            # go crazy.  Do this before the indexing, that will use that file
            # to do its magic.
            patch_file = 'patch_instructions.json'
190
            name = copy_and_clean_json(channel_url, dest_dir, arch, patch_file)
191
192
193
            echo_info("Cleaned copy of %s/%s/%s installed at %s" %
                    (channel_url, arch, patch_file, name))

194
195
196
    # re-indexes the channel to produce a conda-compatible setup
    echo_info("Re-indexing %s..." % dest_dir)
    if not dry_run:
197
        from conda_build.index import MAX_THREADS_DEFAULT
198
        conda_build.api.update_index([dest_dir], check_md5=check_md5,
199
                progress=True, verbose=False, subdir=DEFAULT_SUBDIRS,
200
                threads=MAX_THREADS_DEFAULT)