databases.py 26.6 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.cmdline module of the BEAT platform.          #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

import os
29
import click
André Anjos's avatar
André Anjos committed
30
import glob
31
import random
Samuel GAIST's avatar
Samuel GAIST committed
32
import zmq
André Anjos's avatar
André Anjos committed
33
34
35
import logging
import simplejson

Samuel GAIST's avatar
Samuel GAIST committed
36
37
from beat.core.hash import toPath
from beat.core.hash import hashDataset
André Anjos's avatar
André Anjos committed
38
39
from beat.core.utils import NumpyJSONEncoder
from beat.core.database import Database
40
from beat.core.data import RemoteDataSource
41
42
43
from beat.core import dock
from beat.core import inputs
from beat.core import utils
André Anjos's avatar
André Anjos committed
44
45

from . import common
46
from .decorators import raise_on_error
47
48
from .click_helper import AliasedGroup

André Anjos's avatar
André Anjos committed
49

Samuel GAIST's avatar
Samuel GAIST committed
50
logger = logging.getLogger(__name__)
André Anjos's avatar
André Anjos committed
51

52

53
54
55
56
CMD_DB_INDEX = 'index'
CMD_VIEW_OUTPUTS = 'databases_provider'


Samuel GAIST's avatar
Samuel GAIST committed
57
# ----------------------------------------------------------
58
59
60
61
62
63
64


def load_database_sets(configuration, database_name):
    # Process the name of the database
    parts = database_name.split('/')

    if len(parts) == 2:
Samuel GAIST's avatar
Samuel GAIST committed
65
66
67
        db_name = os.path.join(*parts[:2])
        protocol_filter = None
        set_filter = None
68
69

    elif len(parts) == 3:
Samuel GAIST's avatar
Samuel GAIST committed
70
71
72
        db_name = os.path.join(*parts[:2])
        protocol_filter = parts[2]
        set_filter = None
73
74

    elif len(parts) == 4:
Samuel GAIST's avatar
Samuel GAIST committed
75
76
77
        db_name = os.path.join(*parts[:2])
        protocol_filter = parts[2]
        set_filter = parts[3]
78
79

    else:
Samuel GAIST's avatar
Samuel GAIST committed
80
        logger.error("Database specification should have the format "
Samuel GAIST's avatar
Samuel GAIST committed
81
                     "`<database>/<version>/[<protocol>/[<set>]]', the value "
Samuel GAIST's avatar
Samuel GAIST committed
82
                     "you passed (%s) is not valid", database_name)
Samuel GAIST's avatar
Samuel GAIST committed
83
        return (None, None)
84
85
86

    # Load the dataformat
    dataformat_cache = {}
87
    database = Database(configuration.path,
Samuel GAIST's avatar
Samuel GAIST committed
88
                        db_name, dataformat_cache)
89
    if not database.valid:
Samuel GAIST's avatar
Samuel GAIST committed
90
91
92
93
        logger.error("Failed to load the database `%s':", db_name)
        for e in database.errors:
            logger.error('  * %s', e)
        return (None, None, None)
94
95
96
97
98

    # Filter the protocols
    protocols = database.protocol_names

    if protocol_filter is not None:
Samuel GAIST's avatar
Samuel GAIST committed
99
100
101
102
        if protocol_filter not in protocols:
            logger.error("The database `%s' does not have the protocol `%s' - "
                         "choose one of `%s'", db_name, protocol_filter,
                         ', '.join(protocols))
103

Samuel GAIST's avatar
Samuel GAIST committed
104
            return (None, None, None)
105

Samuel GAIST's avatar
Samuel GAIST committed
106
        protocols = [protocol_filter]
107
108
109
110
111

    # Filter the sets
    loaded_sets = []

    for protocol_name in protocols:
Samuel GAIST's avatar
Samuel GAIST committed
112
        sets = database.set_names(protocol_name)
113

Samuel GAIST's avatar
Samuel GAIST committed
114
115
116
        if set_filter is not None:
            if set_filter not in sets:
                logger.error("The database/protocol `%s/%s' does not have the "
Samuel GAIST's avatar
Samuel GAIST committed
117
118
119
                             "set `%s' - choose one of `%s'",
                             db_name, protocol_name, set_filter,
                             ', '.join(sets))
Samuel GAIST's avatar
Samuel GAIST committed
120
                return (None, None, None)
121

Samuel GAIST's avatar
Samuel GAIST committed
122
            sets = [z for z in sets if z == set_filter]
123

Samuel GAIST's avatar
Samuel GAIST committed
124
125
        loaded_sets.extend([(protocol_name, set_name,
                             database.set(protocol_name, set_name))
Samuel GAIST's avatar
Samuel GAIST committed
126
                            for set_name in sets])
127
128
129
130

    return (db_name, database, loaded_sets)


Samuel GAIST's avatar
Samuel GAIST committed
131
# ----------------------------------------------------------
132
133


134
135
def start_db_container(configuration, cmd, host,
                       db_name, protocol_name, set_name, database, db_set,
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
                       excluded_outputs=None, uid=None, db_root=None):

    input_list = inputs.InputList()

    input_group = inputs.InputGroup(set_name, restricted_access=False)
    input_list.add(input_group)

    db_configuration = {
        'inputs': {},
        'channel': set_name,
    }

    if uid is not None:
        db_configuration['datasets_uid'] = uid

    if db_root is not None:
        db_configuration['datasets_root_path'] = db_root

    for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
155
        if excluded_outputs is not None and output_name in excluded_outputs:
156
157
            continue

158
        dataset_hash = hashDataset(db_name, protocol_name, set_name)
159
        db_configuration['inputs'][output_name] = dict(
Samuel GAIST's avatar
Samuel GAIST committed
160
161
162
163
164
165
166
            database=db_name,
            protocol=protocol_name,
            set=set_name,
            output=output_name,
            channel=set_name,
            hash=dataset_hash,
            path=toPath(dataset_hash, '.db')
167
168
169
170
        )

    db_tempdir = utils.temporary_directory()

171
    with open(os.path.join(db_tempdir, 'configuration.json'), 'wt') as f:
172
173
174
175
176
177
        simplejson.dump(db_configuration, f, indent=4)

    tmp_prefix = os.path.join(db_tempdir, 'prefix')
    if not os.path.exists(tmp_prefix):
        os.makedirs(tmp_prefix)

178
    database.export(tmp_prefix)
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

    if db_root is None:
        json_path = os.path.join(tmp_prefix, 'databases', db_name + '.json')

        with open(json_path, 'r') as f:
            db_data = simplejson.load(f)

        database_path = db_data['root_folder']
        db_data['root_folder'] = os.path.join('/databases', db_name)

        with open(json_path, 'w') as f:
            simplejson.dump(db_data, f, indent=4)

    try:
        db_envkey = host.db2docker([db_name])
    except:
Samuel GAIST's avatar
Samuel GAIST committed
195
        raise RuntimeError("No environment found for the database `%s' "
196
197
198
199
200
201
                           "- available environments are %s" % (
                               db_name,
                               ", ".join(host.db_environments.keys())))

    # Creation of the container
    # Note: we only support one databases image loaded at the same time
202
203
204
205
206
    CONTAINER_PREFIX = '/beat/prefix'
    CONTAINER_CACHE = '/beat/cache'

    database_port = random.randint(51000, 60000)
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
207
208
209
210
211
212
        db_cmd = [
            cmd,
            '0.0.0.0:{}'.format(database_port),
            CONTAINER_PREFIX,
            CONTAINER_CACHE
        ]
213
    else:
Samuel GAIST's avatar
Samuel GAIST committed
214
215
216
217
218
219
220
221
        db_cmd = [
            cmd,
            CONTAINER_PREFIX,
            CONTAINER_CACHE,
            db_name,
            protocol_name,
            set_name
        ]
222
223

    databases_container = host.create_container(db_envkey, db_cmd)
224
225
    databases_container.uid = uid

226
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
227
228
        databases_container.add_port(
            database_port, database_port, host_address=host.ip)
229
230
    databases_container.add_volume(tmp_prefix, '/beat/prefix')
    databases_container.add_volume(configuration.cache, '/beat/cache', read_only=False)
231
232

    # Specify the volumes to mount inside the container
Samuel GAIST's avatar
Samuel GAIST committed
233
    if 'datasets_root_path' not in db_configuration:
Samuel GAIST's avatar
Samuel GAIST committed
234
235
        databases_container.add_volume(
            database_path, os.path.join('/databases', db_name))
236
237
238
239
240
241
242
    else:
        databases_container.add_volume(db_configuration['datasets_root_path'],
                                       db_configuration['datasets_root_path'])

    # Start the container
    host.start(databases_container)

243
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
244
245
246
247
248
        # Communicate with container
        zmq_context = zmq.Context()
        db_socket = zmq_context.socket(zmq.PAIR)
        db_address = 'tcp://{}:{}'.format(host.ip, database_port)
        db_socket.connect(db_address)
249

Samuel GAIST's avatar
Samuel GAIST committed
250
        for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
251
252
            if excluded_outputs is not None and \
               output_name in excluded_outputs:
Samuel GAIST's avatar
Samuel GAIST committed
253
                continue
254

Samuel GAIST's avatar
Samuel GAIST committed
255
256
257
            data_source = RemoteDataSource()
            data_source.setup(db_socket, output_name,
                              dataformat_name, configuration.path)
258

Samuel GAIST's avatar
Samuel GAIST committed
259
260
261
262
            input_ = inputs.Input(output_name,
                                  database.dataformats[dataformat_name],
                                  data_source)
            input_group.add(input_)
263

Samuel GAIST's avatar
Samuel GAIST committed
264
        return (databases_container, db_socket, zmq_context, input_list)
265
266

    return databases_container
267
268


Samuel GAIST's avatar
Samuel GAIST committed
269
# ----------------------------------------------------------
270
271


272
def pull_impl(webapi, prefix, names, force, indentation, format_cache):
Samuel GAIST's avatar
Samuel GAIST committed
273
    """Copies databases (and required dataformats) from the server.
André Anjos's avatar
André Anjos committed
274

Samuel GAIST's avatar
Samuel GAIST committed
275
    Parameters:
André Anjos's avatar
André Anjos committed
276

Samuel GAIST's avatar
Samuel GAIST committed
277
278
      webapi (object): An instance of our WebAPI class, prepared to access the
        BEAT server of interest
André Anjos's avatar
André Anjos committed
279

Samuel GAIST's avatar
Samuel GAIST committed
280
281
      prefix (str): A string representing the root of the path in which the
        user objects are stored
André Anjos's avatar
André Anjos committed
282

André Anjos's avatar
André Anjos committed
283
284
285
286
287
      names (:py:class:`list`): A list of strings, each representing the unique
        relative path of the objects to retrieve or a list of usernames from
        which to retrieve objects. If the list is empty, then we pull all
        available objects of a given type. If no user is set, then pull all
        public objects of a given type.
André Anjos's avatar
André Anjos committed
288

Samuel GAIST's avatar
Samuel GAIST committed
289
290
      force (bool): If set to ``True``, then overwrites local changes with the
        remotely retrieved copies.
André Anjos's avatar
André Anjos committed
291

Samuel GAIST's avatar
Samuel GAIST committed
292
293
294
      indentation (int): The indentation level, useful if this function is
        called recursively while downloading different object types. This is
        normally set to ``0`` (zero).
André Anjos's avatar
André Anjos committed
295

Samuel GAIST's avatar
Samuel GAIST committed
296
297
      format_cache (dict): A dictionary containing all dataformats already
        downloaded.
André Anjos's avatar
André Anjos committed
298
299


Samuel GAIST's avatar
Samuel GAIST committed
300
    Returns:
André Anjos's avatar
André Anjos committed
301

Samuel GAIST's avatar
Samuel GAIST committed
302
303
      int: Indicating the exit status of the command, to be reported back to
        the calling process. This value should be zero if everything works OK,
Samuel GAIST's avatar
Samuel GAIST committed
304
        otherwise, different than zero (POSIX compliance).
André Anjos's avatar
André Anjos committed
305

Samuel GAIST's avatar
Samuel GAIST committed
306
    """
André Anjos's avatar
André Anjos committed
307

308
    from .dataformats import pull_impl as dataformats_pull
André Anjos's avatar
André Anjos committed
309

Samuel GAIST's avatar
Samuel GAIST committed
310
    status, names = common.pull(webapi, prefix, 'database', names,
Samuel GAIST's avatar
Samuel GAIST committed
311
312
                                ['declaration', 'code', 'description'],
                                force, indentation)
André Anjos's avatar
André Anjos committed
313

Samuel GAIST's avatar
Samuel GAIST committed
314
315
316
317
318
    # see what dataformats one needs to pull
    dataformats = []
    for name in names:
        obj = Database(prefix, name)
        dataformats.extend(obj.dataformats.keys())
André Anjos's avatar
André Anjos committed
319

Samuel GAIST's avatar
Samuel GAIST committed
320
321
322
    # downloads any formats to which we depend on
    df_status = dataformats_pull(webapi, prefix, dataformats, force,
                                 indentation + 2, format_cache)
André Anjos's avatar
André Anjos committed
323

Samuel GAIST's avatar
Samuel GAIST committed
324
    return status + df_status
André Anjos's avatar
André Anjos committed
325
326


Samuel GAIST's avatar
Samuel GAIST committed
327
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
328
329


330
def index_outputs(configuration, names, uid=None, db_root=None, docker=False):
André Anjos's avatar
André Anjos committed
331

332
333
    names = common.make_up_local_list(configuration.path, 'database', names)
    retcode = 0
André Anjos's avatar
André Anjos committed
334

Philip ABBET's avatar
Philip ABBET committed
335
    if docker:
Samuel GAIST's avatar
Samuel GAIST committed
336
        host = dock.Host(raise_on_errors=False)
André Anjos's avatar
André Anjos committed
337

338
    for database_name in names:
Samuel GAIST's avatar
Samuel GAIST committed
339
        logger.info("Indexing database %s...", database_name)
André Anjos's avatar
André Anjos committed
340

Samuel GAIST's avatar
Samuel GAIST committed
341
342
343
344
345
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
        if database is None:
            retcode += 1
            continue
André Anjos's avatar
André Anjos committed
346

Samuel GAIST's avatar
Samuel GAIST committed
347
348
        for protocol_name, set_name, db_set in sets:
            if not docker:
349
350
351
352
353
354
355
                try:
                    view = database.view(protocol_name, set_name)
                except SyntaxError as error:
                    logger.error("Failed to load the database `%s':",
                                 database_name)
                    logger.error('  * Syntax error: %s', error)
                    view = None
André Anjos's avatar
André Anjos committed
356

Samuel GAIST's avatar
Samuel GAIST committed
357
358
359
                if view is None:
                    retcode += 1
                    continue
360

Samuel GAIST's avatar
Samuel GAIST committed
361
                dataset_hash = hashDataset(db_name, protocol_name, set_name)
362
363
364
365
366
367
368
369
370
                try:
                    view.index(os.path.join(configuration.cache,
                                            toPath(dataset_hash, '.db')))
                except RuntimeError as error:
                  logger.error("Failed to load the database `%s':",
                               database_name)
                  logger.error('  * Runtime error %s', error)
                  retcode += 1
                  continue
371

Samuel GAIST's avatar
Samuel GAIST committed
372
373
374
            else:
                databases_container = \
                    start_db_container(configuration, CMD_DB_INDEX,
Samuel GAIST's avatar
Samuel GAIST committed
375
376
                                       host, db_name, protocol_name, set_name,
                                       database, db_set,
Samuel GAIST's avatar
Samuel GAIST committed
377
378
379
                                       uid=uid, db_root=db_root
                                       )
                status = host.wait(databases_container)
380
381
382
                logs = host.logs(databases_container)
                host.rm(databases_container)

Samuel GAIST's avatar
Samuel GAIST committed
383
                if status != 0:
384
                    logger.error("Error occurred: %s", logs)
Samuel GAIST's avatar
Samuel GAIST committed
385
                    retcode += 1
André Anjos's avatar
André Anjos committed
386

387
    return retcode
André Anjos's avatar
André Anjos committed
388
389


Samuel GAIST's avatar
Samuel GAIST committed
390
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
391
392


393
def list_index_files(configuration, names):
394

395
    names = common.make_up_local_list(configuration.path, 'database', names)
396

397
    retcode = 0
398

399
400
    for database_name in names:
        logger.info("Listing database %s indexes...", database_name)
401

Samuel GAIST's avatar
Samuel GAIST committed
402
403
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
404
405
406
        if database is None:
            retcode += 1
            continue
André Anjos's avatar
André Anjos committed
407

408
        for protocol_name, set_name, db_set in sets:
409
410
411
412
413
            dataset_hash = hashDataset(db_name, protocol_name, set_name)
            index_filename = toPath(dataset_hash)
            basename = os.path.splitext(index_filename)[0]
            for g in glob.glob(basename + '.*'):
                logger.info(g)
414

415
    return retcode
416
417


Samuel GAIST's avatar
Samuel GAIST committed
418
# ----------------------------------------------------------
419
420


421
def delete_index_files(configuration, names):
422

423
    names = common.make_up_local_list(configuration.path, 'database', names)
424

425
    retcode = 0
426

427
428
    for database_name in names:
        logger.info("Deleting database %s indexes...", database_name)
429

Samuel GAIST's avatar
Samuel GAIST committed
430
431
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
432
433
434
        if database is None:
            retcode += 1
            continue
435

436
437
        for protocol_name, set_name, db_set in sets:
            for output_name in db_set['outputs'].keys():
438
439
440
441
                dataset_hash = hashDataset(db_name, protocol_name, set_name)
                index_filename = toPath(dataset_hash)
                basename = os.path.join(configuration.cache,
                                        os.path.splitext(index_filename)[0])
442

443
444
445
                for g in glob.glob(basename + '.*'):
                    logger.info("removing `%s'...", g)
                    os.unlink(g)
446

447
448
                common.recursive_rmdir_if_empty(os.path.dirname(basename),
                                                configuration.cache)
449

450
    return retcode
451
452


Samuel GAIST's avatar
Samuel GAIST committed
453
# ----------------------------------------------------------
454

André Anjos's avatar
André Anjos committed
455

456
457
def view_outputs(configuration, dataset_name, excluded_outputs=None, uid=None,
                 db_root=None, docker=False):
André Anjos's avatar
André Anjos committed
458

459
460
    def data_to_json(data, indent):
        value = common.stringify(data.as_dict())
André Anjos's avatar
André Anjos committed
461

462
463
464
465
466
467
        value = simplejson.dumps(value, indent=4, cls=NumpyJSONEncoder) \
            .replace('"BEAT_LIST_DELIMITER[', '[') \
            .replace(']BEAT_LIST_DELIMITER"', ']') \
            .replace('"...",', '...') \
            .replace('"BEAT_LIST_SIZE(', '(') \
            .replace(')BEAT_LIST_SIZE"', ')')
André Anjos's avatar
André Anjos committed
468

469
        return ('\n' + ' ' * indent).join(value.split('\n'))
André Anjos's avatar
André Anjos committed
470

471
472
473
474
    # Load the infos about the database set
    (db_name, database, sets) = load_database_sets(configuration, dataset_name)
    if (database is None) or (len(sets) != 1):
        return 1
André Anjos's avatar
André Anjos committed
475

476
477
478
    (protocol_name, set_name, db_set) = sets[0]

    if excluded_outputs is not None:
Samuel GAIST's avatar
Samuel GAIST committed
479
480
        excluded_outputs = map(lambda x: x.strip(),
                               excluded_outputs.split(','))
André Anjos's avatar
André Anjos committed
481

482
483
    # Setup the view so the outputs can be used
    if not docker:
Samuel GAIST's avatar
Samuel GAIST committed
484
        view = database.view(protocol_name, set_name)
485

486
487
        if view is None:
            return 1
488
489
490
491
492
493
494

        dataset_hash = hashDataset(db_name, protocol_name, set_name)
        view.setup(os.path.join(configuration.cache,
                                toPath(dataset_hash, '.db')), pack=False)
        input_group = inputs.InputGroup(set_name, restricted_access=False)

        for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
495
496
            if excluded_outputs is not None and \
               output_name in excluded_outputs:
497
498
                continue

Samuel GAIST's avatar
Samuel GAIST committed
499
500
501
            input = inputs.Input(output_name,
                                 database.dataformats[dataformat_name],
                                 view.data_sources[output_name])
502
503
            input_group.add(input)

504
505
506
507
    else:
        host = dock.Host(raise_on_errors=False)

        (databases_container, db_socket, zmq_context, input_list) = \
508
            start_db_container(configuration, CMD_VIEW_OUTPUTS,
Samuel GAIST's avatar
Samuel GAIST committed
509
510
511
512
                               host, db_name, protocol_name,
                               set_name, database, db_set,
                               excluded_outputs=excluded_outputs,
                               uid=uid, db_root=db_root)
513

514
        input_group = input_list.group(set_name)
André Anjos's avatar
André Anjos committed
515

516
517
    retvalue = 0

518
519
520
    # Display the data
    try:
        previous_start = -1
André Anjos's avatar
André Anjos committed
521

522
523
        while input_group.hasMoreData():
            input_group.next()
André Anjos's avatar
André Anjos committed
524

525
526
            start = input_group.data_index
            end = input_group.data_index_end
André Anjos's avatar
André Anjos committed
527

528
529
            if start != previous_start:
                print(80 * '-')
André Anjos's avatar
André Anjos committed
530

531
                print('FROM %d TO %d' % (start, end))
André Anjos's avatar
André Anjos committed
532

Samuel GAIST's avatar
Samuel GAIST committed
533
534
535
                whole_inputs = [input_ for input_ in input_group
                                if input_.data_index == start and
                                input_.data_index_end == end]
André Anjos's avatar
André Anjos committed
536

537
538
                for input in whole_inputs:
                    label = ' - ' + str(input.name) + ': '
539
                    print(label + data_to_json(input.data, len(label)))
André Anjos's avatar
André Anjos committed
540

541
                previous_start = start
André Anjos's avatar
André Anjos committed
542

Samuel GAIST's avatar
Samuel GAIST committed
543
544
545
546
547
            selected_inputs = \
                [input_ for input_ in input_group
                 if input_.data_index == input_group.first_data_index and
                 (input_.data_index != start or
                  input_.data_index_end != end)]
André Anjos's avatar
André Anjos committed
548

549
            grouped_inputs = {}
Samuel GAIST's avatar
Samuel GAIST committed
550
551
552
            for input_ in selected_inputs:
                key = (input_.data_index, input_.data_index_end)
                if key not in grouped_inputs:
553
554
                    grouped_inputs[key] = []
                grouped_inputs[key].append(input)
André Anjos's avatar
André Anjos committed
555

556
            sorted_keys = sorted(grouped_inputs.keys())
557
558
559

            for key in sorted_keys:
                print
560
                print('  FROM %d TO %d' % key)
561
562
563

                for input in grouped_inputs[key]:
                    label = '   - ' + str(input.name) + ': '
564
                    print(label + data_to_json(input.data, len(label)))
André Anjos's avatar
André Anjos committed
565
566

    except Exception as e:
567
        logger.error("Failed to retrieve the next data: %s", e)
568
569
570
571
572
573
574
575
576
577
        retvalue = 1

    if docker:
        host.kill(databases_container)
        status = host.wait(databases_container)
        logs = host.logs(databases_container)
        host.rm(databases_container)
        if status != 0:
            logger.error("Docker error: %s", logs)

578

579
    return retvalue
André Anjos's avatar
André Anjos committed
580

581

Samuel GAIST's avatar
Samuel GAIST committed
582
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
583
584


585
@click.group(cls=AliasedGroup)
586
@click.pass_context
587
def databases(ctx):
588
589
    """Database commands"""
    pass
590

Samuel GAIST's avatar
Samuel GAIST committed
591

592
@databases.command()
593
594
595
@click.option('--remote', help='Only acts on the remote copy of the database.',
              is_flag=True)
@click.pass_context
596
@raise_on_error
597
598
def list(ctx, remote):
    '''Lists all the databases available on the platform.
599

600
    To list all existing databases on your local prefix:
601

602
        $ beat databases list
603
604
605
    '''
    configuration = ctx.meta['config']
    if remote:
606
        with common.make_webapi(configuration) as webapi:
607
608
609
610
611
            return common.display_remote_list(webapi, 'database')
    else:
        return common.display_local_list(configuration.path, 'database')


612
613
614
@databases.command()
@click.argument('names', nargs=-1)
@click.pass_context
615
@raise_on_error
616
617
618
619
620
621
622
623
624
625
626
627
def path(ctx, names):
  '''Displays local path of databases files

  Example:
    $ beat databases path xxx
  '''
  return common.display_local_path(ctx.meta['config'].path, 'database', names)


@databases.command()
@click.argument('name', nargs=1)
@click.pass_context
628
@raise_on_error
629
630
631
632
633
634
635
636
637
638
639
640
def edit(ctx, name):
  '''Edit local database file

  Example:
    $ beat databases edit xxx
  '''
  return common.edit_local_file(ctx.meta['config'].path,
                                ctx.meta['config'].editor, 'database',
                                name)



641
@databases.command()
642
643
@click.argument('db_names', nargs=-1)
@click.pass_context
644
@raise_on_error
645
646
647
def check(ctx, db_names):
    '''Checks a local database for validity.

648
    $ beat databases check [<name>]...
649
650
651
652
653
654
655

    <name>:
        Database name formatted as "<database>/<version>"
    '''
    return common.check(ctx.meta['config'].path, 'database', db_names)


656
@databases.command()
657
@click.argument('db_names', nargs=-1)
658
@click.option('--force', help='Performs operation regardless of conflicts',
659
660
              is_flag=True)
@click.pass_context
661
@raise_on_error
662
663
664
def pull(ctx, db_names, force):
    '''Downloads the specified databases from the server.

665
       $ beat databases pull [<name>]...
666
667
668
669
670
671
672
673
674

    <name>:
        Database name formatted as "<database>/<version>"
    '''
    configuration = ctx.meta['config']
    with common.make_webapi(configuration) as webapi:
        return pull_impl(webapi, configuration.path, db_names, force, 0, {})


675
@databases.command()
676
@click.argument('db_names', nargs=-1)
677
@click.option('--force', help='Performs operation regardless of conflicts',
678
679
680
681
              is_flag=True)
@click.option('--dry-run', help='Dry run',
              is_flag=True)
@click.pass_context
682
@raise_on_error
683
684
685
def push(ctx, db_names, force, dry_run):
    '''Uploads databases to the server (must provide a valid admin token).

686
    $ beat databases push [<name>]...
687
688
689
690
691
692
693
694
695
696
697
698
699

    <name>:
        Database name formatted as "<database>/<version>"

    '''
    configuration = ctx.meta['config']
    with common.make_webapi(configuration) as webapi:
        return common.push(webapi, configuration.path, 'database',
                           db_names, ['name', 'declaration',
                                      'code', 'description'],
                           {}, force, dry_run, 0)


700
@databases.command()
701
702
@click.argument('db_names', nargs=-1)
@click.pass_context
703
@raise_on_error
704
705
706
def diff(ctx, db_names):
    '''Shows changes between the local database and the remote version.

707
    $ beat databases diff [<name>]...
708
709
710
711
712
713
714
715
716
717
718
719
720

    <name>:
        Database name formatted as "<database>/<version>"
    '''
    configuration = ctx.meta['config']
    if len(db_names) < 1:
        raise click.ClickException("Requires at least one database name")
    with common.make_webapi(configuration) as webapi:
        return common.diff(webapi, configuration.path, 'database',
                           db_names[0],
                           ['declaration', 'code', 'description'])


721
@databases.command()
722
@click.pass_context
723
@raise_on_error
724
725
726
727
728
729
730
def status(ctx):
    '''Shows (editing) status for all available databases'''
    configuration = ctx.meta['config']
    with common.make_webapi(configuration) as webapi:
        return common.status(webapi, configuration.path, 'database')[0]


731
@databases.command()
732
733
@click.argument('db_names', nargs=-1)
@click.pass_context
734
@raise_on_error
735
736
737
def version(ctx, db_names):
    '''Creates a new version of an existing database.

738
    $ beat databases version [<name>]...
739
740
741
742
743
744
745
746
747
748
749

    <name>:
        Database name formatted as "<database>/<version>"

    '''
    configuration = ctx.meta['config']
    if len(db_names) < 1:
        raise click.ClickException("Requires at least one database name")
    return common.new_version(configuration.path, 'database', db_names[0])


750
@databases.command()
751
752
753
754
755
756
@click.argument('db_names', nargs=-1)
@click.option('--list', help='List index files matching output if they exist',
              is_flag=True)
@click.option('--delete', help='Delete index files matching output if they '
              'exist (also, recursively deletes empty directories)',
              is_flag=True)
757
758
@click.option('--checksum', help='Checksums index files', is_flag=True,
              default=True)
759
760
761
762
@click.option('--uid', type=click.INT, default=None)
@click.option('--db-root', help="Database root")
@click.option('--docker', is_flag=True)
@click.pass_context
763
@raise_on_error
764
765
766
767
768
def index(ctx, db_names, list, delete, checksum, uid, db_root, docker):
    '''Indexes all outputs (of all sets) of a database.

    To index the contents of a database

769
        $ beat databases index simple/1
770
771
772

    To index the contents of a protocol on a database

773
        $ beat databases index simple/1/double
774
775
776

    To index the contents of a set in a protocol on a database

777
        $ beat databases index simple/1/double/double
778
779
    '''
    configuration = ctx.meta['config']
780
    code = 1
781
    if list:
782
        code = list_index_files(configuration, db_names)
783
    elif delete:
784
        code = delete_index_files(configuration, db_names)
785
    elif checksum:
786
787
788
        code = index_outputs(configuration, db_names, uid=uid,
                             db_root=db_root, docker=docker)
    return code
Samuel GAIST's avatar
Samuel GAIST committed
789

790
791
@databases.command()
@click.argument('set_name', nargs=1)
792
793
794
795
796
797
@click.option('--exclude', help='When viewing, excludes this output',
              default=None)
@click.option('--uid', type=click.INT, default=None)
@click.option('--db-root', help="Database root")
@click.option('--docker', is_flag=True)
@click.pass_context
798
@raise_on_error
799
800
801
802
803
def view(ctx, set_name, exclude, uid, db_root, docker):
    '''View the data of the specified dataset.

    To view the contents of a specific set

804
    $ beat databases view simple/1/protocol/set
805
806
807
808
809
810
811
812
813
    '''
    configuration = ctx.meta['config']
    if exclude is not None:
        return view_outputs(
            configuration, set_name, exclude, uid=uid, db_root=db_root,
            docker=docker)
    return view_outputs(
        configuration, set_name, uid=uid, db_root=db_root, docker=docker
    )