databases.py 25 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.cmdline module of the BEAT platform.          #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

import os
29
import click
André Anjos's avatar
André Anjos committed
30
import glob
31
import random
Samuel GAIST's avatar
Samuel GAIST committed
32
import zmq
33

André Anjos's avatar
André Anjos committed
34
import logging
Samuel GAIST's avatar
Samuel GAIST committed
35

André Anjos's avatar
André Anjos committed
36
37
38

import simplejson

Samuel GAIST's avatar
Samuel GAIST committed
39
40
from beat.core.hash import toPath
from beat.core.hash import hashDataset
André Anjos's avatar
André Anjos committed
41
42
from beat.core.utils import NumpyJSONEncoder
from beat.core.database import Database
43
from beat.core.data import load_data_index, RemoteDataSource
44
45
46
from beat.core import dock
from beat.core import inputs
from beat.core import utils
André Anjos's avatar
André Anjos committed
47
48
49

from . import common

Samuel GAIST's avatar
Samuel GAIST committed
50
logger = logging.getLogger(__name__)
André Anjos's avatar
André Anjos committed
51

52
53
54
55
CMD_DB_INDEX = 'index'
CMD_VIEW_OUTPUTS = 'databases_provider'


Samuel GAIST's avatar
Samuel GAIST committed
56
# ----------------------------------------------------------
57
58
59
60
61
62
63


def load_database_sets(configuration, database_name):
    # Process the name of the database
    parts = database_name.split('/')

    if len(parts) == 2:
Samuel GAIST's avatar
Samuel GAIST committed
64
65
66
        db_name = os.path.join(*parts[:2])
        protocol_filter = None
        set_filter = None
67
68

    elif len(parts) == 3:
Samuel GAIST's avatar
Samuel GAIST committed
69
70
71
        db_name = os.path.join(*parts[:2])
        protocol_filter = parts[2]
        set_filter = None
72
73

    elif len(parts) == 4:
Samuel GAIST's avatar
Samuel GAIST committed
74
75
76
        db_name = os.path.join(*parts[:2])
        protocol_filter = parts[2]
        set_filter = parts[3]
77
78

    else:
Samuel GAIST's avatar
Samuel GAIST committed
79
        logger.error("Database specification should have the format "
Samuel GAIST's avatar
Samuel GAIST committed
80
                     "`<database>/<version>/[<protocol>/[<set>]]', the value "
Samuel GAIST's avatar
Samuel GAIST committed
81
                     "you passed (%s) is not valid", database_name)
Samuel GAIST's avatar
Samuel GAIST committed
82
        return (None, None)
83
84
85

    # Load the dataformat
    dataformat_cache = {}
86
    database = Database(configuration.path,
Samuel GAIST's avatar
Samuel GAIST committed
87
                        db_name, dataformat_cache)
88
    if not database.valid:
Samuel GAIST's avatar
Samuel GAIST committed
89
90
91
92
        logger.error("Failed to load the database `%s':", db_name)
        for e in database.errors:
            logger.error('  * %s', e)
        return (None, None, None)
93
94
95
96
97

    # Filter the protocols
    protocols = database.protocol_names

    if protocol_filter is not None:
Samuel GAIST's avatar
Samuel GAIST committed
98
99
100
101
        if protocol_filter not in protocols:
            logger.error("The database `%s' does not have the protocol `%s' - "
                         "choose one of `%s'", db_name, protocol_filter,
                         ', '.join(protocols))
102

Samuel GAIST's avatar
Samuel GAIST committed
103
            return (None, None, None)
104

Samuel GAIST's avatar
Samuel GAIST committed
105
        protocols = [protocol_filter]
106
107
108
109
110

    # Filter the sets
    loaded_sets = []

    for protocol_name in protocols:
Samuel GAIST's avatar
Samuel GAIST committed
111
        sets = database.set_names(protocol_name)
112

Samuel GAIST's avatar
Samuel GAIST committed
113
114
115
        if set_filter is not None:
            if set_filter not in sets:
                logger.error("The database/protocol `%s/%s' does not have the "
Samuel GAIST's avatar
Samuel GAIST committed
116
117
118
                             "set `%s' - choose one of `%s'",
                             db_name, protocol_name, set_filter,
                             ', '.join(sets))
Samuel GAIST's avatar
Samuel GAIST committed
119
                return (None, None, None)
120

Samuel GAIST's avatar
Samuel GAIST committed
121
            sets = [z for z in sets if z == set_filter]
122

Samuel GAIST's avatar
Samuel GAIST committed
123
124
        loaded_sets.extend([(protocol_name, set_name,
                             database.set(protocol_name, set_name))
Samuel GAIST's avatar
Samuel GAIST committed
125
                            for set_name in sets])
126
127
128
129

    return (db_name, database, loaded_sets)


Samuel GAIST's avatar
Samuel GAIST committed
130
# ----------------------------------------------------------
131
132


133
134
def start_db_container(configuration, cmd, host,
                       db_name, protocol_name, set_name, database, db_set,
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
                       excluded_outputs=None, uid=None, db_root=None):

    input_list = inputs.InputList()

    input_group = inputs.InputGroup(set_name, restricted_access=False)
    input_list.add(input_group)

    db_configuration = {
        'inputs': {},
        'channel': set_name,
    }

    if uid is not None:
        db_configuration['datasets_uid'] = uid

    if db_root is not None:
        db_configuration['datasets_root_path'] = db_root

    for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
154
        if excluded_outputs is not None and output_name in excluded_outputs:
155
156
            continue

157
        dataset_hash = hashDataset(db_name, protocol_name, set_name)
158
        db_configuration['inputs'][output_name] = dict(
Samuel GAIST's avatar
Samuel GAIST committed
159
160
161
162
163
164
165
            database=db_name,
            protocol=protocol_name,
            set=set_name,
            output=output_name,
            channel=set_name,
            hash=dataset_hash,
            path=toPath(dataset_hash, '.db')
166
167
168
169
170
171
172
173
174
175
176
        )

    db_tempdir = utils.temporary_directory()

    with open(os.path.join(db_tempdir, 'configuration.json'), 'wb') as f:
        simplejson.dump(db_configuration, f, indent=4)

    tmp_prefix = os.path.join(db_tempdir, 'prefix')
    if not os.path.exists(tmp_prefix):
        os.makedirs(tmp_prefix)

177
    database.export(tmp_prefix)
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193

    if db_root is None:
        json_path = os.path.join(tmp_prefix, 'databases', db_name + '.json')

        with open(json_path, 'r') as f:
            db_data = simplejson.load(f)

        database_path = db_data['root_folder']
        db_data['root_folder'] = os.path.join('/databases', db_name)

        with open(json_path, 'w') as f:
            simplejson.dump(db_data, f, indent=4)

    try:
        db_envkey = host.db2docker([db_name])
    except:
Samuel GAIST's avatar
Samuel GAIST committed
194
        raise RuntimeError("No environment found for the database `%s' "
195
196
197
198
199
200
                           "- available environments are %s" % (
                               db_name,
                               ", ".join(host.db_environments.keys())))

    # Creation of the container
    # Note: we only support one databases image loaded at the same time
201
202
203
204
205
    CONTAINER_PREFIX = '/beat/prefix'
    CONTAINER_CACHE = '/beat/cache'

    database_port = random.randint(51000, 60000)
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
206
207
208
209
210
211
        db_cmd = [
            cmd,
            '0.0.0.0:{}'.format(database_port),
            CONTAINER_PREFIX,
            CONTAINER_CACHE
        ]
212
    else:
Samuel GAIST's avatar
Samuel GAIST committed
213
214
215
216
217
218
219
220
        db_cmd = [
            cmd,
            CONTAINER_PREFIX,
            CONTAINER_CACHE,
            db_name,
            protocol_name,
            set_name
        ]
221
222

    databases_container = host.create_container(db_envkey, db_cmd)
223
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
224
225
        databases_container.add_port(
            database_port, database_port, host_address=host.ip)
226
227
    databases_container.add_volume(db_tempdir, '/beat/prefix')
    databases_container.add_volume(configuration.cache, '/beat/cache')
228
229

    # Specify the volumes to mount inside the container
Samuel GAIST's avatar
Samuel GAIST committed
230
    if 'datasets_root_path' not in db_configuration:
Samuel GAIST's avatar
Samuel GAIST committed
231
232
        databases_container.add_volume(
            database_path, os.path.join('/databases', db_name))
233
234
235
236
237
238
239
    else:
        databases_container.add_volume(db_configuration['datasets_root_path'],
                                       db_configuration['datasets_root_path'])

    # Start the container
    host.start(databases_container)

240
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
241
242
243
244
245
        # Communicate with container
        zmq_context = zmq.Context()
        db_socket = zmq_context.socket(zmq.PAIR)
        db_address = 'tcp://{}:{}'.format(host.ip, database_port)
        db_socket.connect(db_address)
246

Samuel GAIST's avatar
Samuel GAIST committed
247
        for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
248
249
            if excluded_outputs is not None and \
               output_name in excluded_outputs:
Samuel GAIST's avatar
Samuel GAIST committed
250
                continue
251

Samuel GAIST's avatar
Samuel GAIST committed
252
253
254
            data_source = RemoteDataSource()
            data_source.setup(db_socket, output_name,
                              dataformat_name, configuration.path)
255

Samuel GAIST's avatar
Samuel GAIST committed
256
257
258
259
            input_ = inputs.Input(output_name,
                                  database.dataformats[dataformat_name],
                                  data_source)
            input_group.add(input_)
260

Samuel GAIST's avatar
Samuel GAIST committed
261
        return (databases_container, db_socket, zmq_context, input_list)
262
263

    return databases_container
264
265


Samuel GAIST's avatar
Samuel GAIST committed
266
# ----------------------------------------------------------
267
268


269
def pull_impl(webapi, prefix, names, force, indentation, format_cache):
Samuel GAIST's avatar
Samuel GAIST committed
270
    """Copies databases (and required dataformats) from the server.
André Anjos's avatar
André Anjos committed
271

Samuel GAIST's avatar
Samuel GAIST committed
272
    Parameters:
André Anjos's avatar
André Anjos committed
273

Samuel GAIST's avatar
Samuel GAIST committed
274
275
      webapi (object): An instance of our WebAPI class, prepared to access the
        BEAT server of interest
André Anjos's avatar
André Anjos committed
276

Samuel GAIST's avatar
Samuel GAIST committed
277
278
      prefix (str): A string representing the root of the path in which the
        user objects are stored
André Anjos's avatar
André Anjos committed
279

André Anjos's avatar
André Anjos committed
280
281
282
283
284
      names (:py:class:`list`): A list of strings, each representing the unique
        relative path of the objects to retrieve or a list of usernames from
        which to retrieve objects. If the list is empty, then we pull all
        available objects of a given type. If no user is set, then pull all
        public objects of a given type.
André Anjos's avatar
André Anjos committed
285

Samuel GAIST's avatar
Samuel GAIST committed
286
287
      force (bool): If set to ``True``, then overwrites local changes with the
        remotely retrieved copies.
André Anjos's avatar
André Anjos committed
288

Samuel GAIST's avatar
Samuel GAIST committed
289
290
291
      indentation (int): The indentation level, useful if this function is
        called recursively while downloading different object types. This is
        normally set to ``0`` (zero).
André Anjos's avatar
André Anjos committed
292

Samuel GAIST's avatar
Samuel GAIST committed
293
294
      format_cache (dict): A dictionary containing all dataformats already
        downloaded.
André Anjos's avatar
André Anjos committed
295
296


Samuel GAIST's avatar
Samuel GAIST committed
297
    Returns:
André Anjos's avatar
André Anjos committed
298

Samuel GAIST's avatar
Samuel GAIST committed
299
300
      int: Indicating the exit status of the command, to be reported back to
        the calling process. This value should be zero if everything works OK,
Samuel GAIST's avatar
Samuel GAIST committed
301
        otherwise, different than zero (POSIX compliance).
André Anjos's avatar
André Anjos committed
302

Samuel GAIST's avatar
Samuel GAIST committed
303
    """
André Anjos's avatar
André Anjos committed
304

Samuel GAIST's avatar
Samuel GAIST committed
305
    from .dataformats import pull as dataformats_pull
André Anjos's avatar
André Anjos committed
306

Samuel GAIST's avatar
Samuel GAIST committed
307
    status, names = common.pull(webapi, prefix, 'database', names,
Samuel GAIST's avatar
Samuel GAIST committed
308
309
                                ['declaration', 'code', 'description'],
                                force, indentation)
André Anjos's avatar
André Anjos committed
310

Samuel GAIST's avatar
Samuel GAIST committed
311
312
313
314
315
    # see what dataformats one needs to pull
    dataformats = []
    for name in names:
        obj = Database(prefix, name)
        dataformats.extend(obj.dataformats.keys())
André Anjos's avatar
André Anjos committed
316

Samuel GAIST's avatar
Samuel GAIST committed
317
318
319
    # downloads any formats to which we depend on
    df_status = dataformats_pull(webapi, prefix, dataformats, force,
                                 indentation + 2, format_cache)
André Anjos's avatar
André Anjos committed
320

Samuel GAIST's avatar
Samuel GAIST committed
321
    return status + df_status
André Anjos's avatar
André Anjos committed
322
323


Samuel GAIST's avatar
Samuel GAIST committed
324
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
325
326


327
def index_outputs(configuration, names, uid=None, db_root=None, docker=False):
André Anjos's avatar
André Anjos committed
328

329
330
    names = common.make_up_local_list(configuration.path, 'database', names)
    retcode = 0
André Anjos's avatar
André Anjos committed
331

Philip ABBET's avatar
Philip ABBET committed
332
    if docker:
Samuel GAIST's avatar
Samuel GAIST committed
333
        host = dock.Host(raise_on_errors=False)
André Anjos's avatar
André Anjos committed
334

335
    for database_name in names:
Samuel GAIST's avatar
Samuel GAIST committed
336
        logger.info("Indexing database %s...", database_name)
André Anjos's avatar
André Anjos committed
337

Samuel GAIST's avatar
Samuel GAIST committed
338
339
340
341
342
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
        if database is None:
            retcode += 1
            continue
André Anjos's avatar
André Anjos committed
343

Samuel GAIST's avatar
Samuel GAIST committed
344
345
        for protocol_name, set_name, db_set in sets:
            if not docker:
346
347
348
349
350
351
352
                try:
                    view = database.view(protocol_name, set_name)
                except SyntaxError as error:
                    logger.error("Failed to load the database `%s':",
                                 database_name)
                    logger.error('  * Syntax error: %s', error)
                    view = None
André Anjos's avatar
André Anjos committed
353

Samuel GAIST's avatar
Samuel GAIST committed
354
355
356
                if view is None:
                    retcode += 1
                    continue
357

Samuel GAIST's avatar
Samuel GAIST committed
358
                dataset_hash = hashDataset(db_name, protocol_name, set_name)
359
360
361
362
363
364
365
366
367
                try:
                    view.index(os.path.join(configuration.cache,
                                            toPath(dataset_hash, '.db')))
                except RuntimeError as error:
                  logger.error("Failed to load the database `%s':",
                               database_name)
                  logger.error('  * Runtime error %s', error)
                  retcode += 1
                  continue
368

Samuel GAIST's avatar
Samuel GAIST committed
369
370
371
            else:
                databases_container = \
                    start_db_container(configuration, CMD_DB_INDEX,
Samuel GAIST's avatar
Samuel GAIST committed
372
373
                                       host, db_name, protocol_name, set_name,
                                       database, db_set,
Samuel GAIST's avatar
Samuel GAIST committed
374
375
376
377
378
                                       uid=uid, db_root=db_root
                                       )
                status = host.wait(databases_container)
                if status != 0:
                    retcode += 1
André Anjos's avatar
André Anjos committed
379

380
    return retcode
André Anjos's avatar
André Anjos committed
381
382


Samuel GAIST's avatar
Samuel GAIST committed
383
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
384
385


386
def list_index_files(configuration, names):
387

388
    names = common.make_up_local_list(configuration.path, 'database', names)
389

390
    retcode = 0
391

392
393
    for database_name in names:
        logger.info("Listing database %s indexes...", database_name)
394

Samuel GAIST's avatar
Samuel GAIST committed
395
396
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
397
398
399
        if database is None:
            retcode += 1
            continue
André Anjos's avatar
André Anjos committed
400

401
        for protocol_name, set_name, db_set in sets:
402
403
404
405
406
            dataset_hash = hashDataset(db_name, protocol_name, set_name)
            index_filename = toPath(dataset_hash)
            basename = os.path.splitext(index_filename)[0]
            for g in glob.glob(basename + '.*'):
                logger.info(g)
407

408
    return retcode
409
410


Samuel GAIST's avatar
Samuel GAIST committed
411
# ----------------------------------------------------------
412
413


414
def delete_index_files(configuration, names):
415

416
    names = common.make_up_local_list(configuration.path, 'database', names)
417

418
    retcode = 0
419

420
421
    for database_name in names:
        logger.info("Deleting database %s indexes...", database_name)
422

Samuel GAIST's avatar
Samuel GAIST committed
423
424
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
425
426
427
        if database is None:
            retcode += 1
            continue
428

429
430
        for protocol_name, set_name, db_set in sets:
            for output_name in db_set['outputs'].keys():
431
432
433
434
                dataset_hash = hashDataset(db_name, protocol_name, set_name)
                index_filename = toPath(dataset_hash)
                basename = os.path.join(configuration.cache,
                                        os.path.splitext(index_filename)[0])
435

436
437
438
                for g in glob.glob(basename + '.*'):
                    logger.info("removing `%s'...", g)
                    os.unlink(g)
439

440
441
                common.recursive_rmdir_if_empty(os.path.dirname(basename),
                                                configuration.cache)
442

443
    return retcode
444
445


Samuel GAIST's avatar
Samuel GAIST committed
446
# ----------------------------------------------------------
447

André Anjos's avatar
André Anjos committed
448

449
450
def view_outputs(configuration, dataset_name, excluded_outputs=None, uid=None,
                 db_root=None, docker=False):
André Anjos's avatar
André Anjos committed
451

452
453
    def data_to_json(data, indent):
        value = common.stringify(data.as_dict())
André Anjos's avatar
André Anjos committed
454

455
456
457
458
459
460
        value = simplejson.dumps(value, indent=4, cls=NumpyJSONEncoder) \
            .replace('"BEAT_LIST_DELIMITER[', '[') \
            .replace(']BEAT_LIST_DELIMITER"', ']') \
            .replace('"...",', '...') \
            .replace('"BEAT_LIST_SIZE(', '(') \
            .replace(')BEAT_LIST_SIZE"', ')')
André Anjos's avatar
André Anjos committed
461

462
        return ('\n' + ' ' * indent).join(value.split('\n'))
André Anjos's avatar
André Anjos committed
463

464
465
466
467
    # Load the infos about the database set
    (db_name, database, sets) = load_database_sets(configuration, dataset_name)
    if (database is None) or (len(sets) != 1):
        return 1
André Anjos's avatar
André Anjos committed
468

469
470
471
    (protocol_name, set_name, db_set) = sets[0]

    if excluded_outputs is not None:
Samuel GAIST's avatar
Samuel GAIST committed
472
473
        excluded_outputs = map(lambda x: x.strip(),
                               excluded_outputs.split(','))
André Anjos's avatar
André Anjos committed
474

475
476
    # Setup the view so the outputs can be used
    if not docker:
Samuel GAIST's avatar
Samuel GAIST committed
477
        view = database.view(protocol_name, set_name)
478

479
480
        if view is None:
            return 1
481
482
483
484
485
486
487

        dataset_hash = hashDataset(db_name, protocol_name, set_name)
        view.setup(os.path.join(configuration.cache,
                                toPath(dataset_hash, '.db')), pack=False)
        input_group = inputs.InputGroup(set_name, restricted_access=False)

        for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
488
489
            if excluded_outputs is not None and \
               output_name in excluded_outputs:
490
491
                continue

Samuel GAIST's avatar
Samuel GAIST committed
492
493
494
            input = inputs.Input(output_name,
                                 database.dataformats[dataformat_name],
                                 view.data_sources[output_name])
495
496
            input_group.add(input)

497
498
499
500
    else:
        host = dock.Host(raise_on_errors=False)

        (databases_container, db_socket, zmq_context, input_list) = \
501
            start_db_container(configuration, CMD_VIEW_OUTPUTS,
Samuel GAIST's avatar
Samuel GAIST committed
502
503
504
505
                               host, db_name, protocol_name,
                               set_name, database, db_set,
                               excluded_outputs=excluded_outputs,
                               uid=uid, db_root=db_root)
506

507
        input_group = input_list.group(set_name)
André Anjos's avatar
André Anjos committed
508

509
510
511
    # Display the data
    try:
        previous_start = -1
André Anjos's avatar
André Anjos committed
512

513
514
        while input_group.hasMoreData():
            input_group.next()
André Anjos's avatar
André Anjos committed
515

516
517
            start = input_group.data_index
            end = input_group.data_index_end
André Anjos's avatar
André Anjos committed
518

519
520
            if start != previous_start:
                print(80 * '-')
André Anjos's avatar
André Anjos committed
521

522
                print('FROM %d TO %d' % (start, end))
André Anjos's avatar
André Anjos committed
523

Samuel GAIST's avatar
Samuel GAIST committed
524
525
526
                whole_inputs = [input_ for input_ in input_group
                                if input_.data_index == start and
                                input_.data_index_end == end]
André Anjos's avatar
André Anjos committed
527

528
529
                for input in whole_inputs:
                    label = ' - ' + str(input.name) + ': '
530
                    print(label + data_to_json(input.data, len(label)))
André Anjos's avatar
André Anjos committed
531

532
                previous_start = start
André Anjos's avatar
André Anjos committed
533

Samuel GAIST's avatar
Samuel GAIST committed
534
535
536
537
538
            selected_inputs = \
                [input_ for input_ in input_group
                 if input_.data_index == input_group.first_data_index and
                 (input_.data_index != start or
                  input_.data_index_end != end)]
André Anjos's avatar
André Anjos committed
539

540
            grouped_inputs = {}
Samuel GAIST's avatar
Samuel GAIST committed
541
542
543
            for input_ in selected_inputs:
                key = (input_.data_index, input_.data_index_end)
                if key not in grouped_inputs:
544
545
                    grouped_inputs[key] = []
                grouped_inputs[key].append(input)
André Anjos's avatar
André Anjos committed
546

547
            sorted_keys = sorted(grouped_inputs.keys())
548
549
550

            for key in sorted_keys:
                print
551
                print('  FROM %d TO %d' % key)
552
553
554

                for input in grouped_inputs[key]:
                    label = '   - ' + str(input.name) + ': '
555
                    print(label + data_to_json(input.data, len(label)))
André Anjos's avatar
André Anjos committed
556
557

    except Exception as e:
558
559
560
561
        logger.error("Failed to retrieve the next data: %s", e)
        return 1

    return 0
André Anjos's avatar
André Anjos committed
562

563

Samuel GAIST's avatar
Samuel GAIST committed
564
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
565
566


567
568
569
570
571
@click.group()
@click.pass_context
def db(ctx):
    """Database commands"""
    pass
572

Samuel GAIST's avatar
Samuel GAIST committed
573

574
575
576
577
578
579
@db.command()
@click.option('--remote', help='Only acts on the remote copy of the database.',
              is_flag=True)
@click.pass_context
def list(ctx, remote):
    '''Lists all the databases available on the platform.
580

581
    To list all existing databases on your local prefix:
582

583
584
585
586
        $ beat db list
    '''
    configuration = ctx.meta['config']
    if remote:
587
        with common.make_webapi(configuration) as webapi:
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
            return common.display_remote_list(webapi, 'database')
    else:
        return common.display_local_list(configuration.path, 'database')


@db.command()
@click.argument('db_names', nargs=-1)
@click.pass_context
def check(ctx, db_names):
    '''Checks a local database for validity.

    $ beat db check [<name>]...

    <name>:
        Database name formatted as "<database>/<version>"
    '''
    return common.check(ctx.meta['config'].path, 'database', db_names)


@db.command()
@click.argument('db_names', nargs=-1)
609
@click.option('--force', help='Performs operation regardless of conflicts',
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
              is_flag=True)
@click.pass_context
def pull(ctx, db_names, force):
    '''Downloads the specified databases from the server.

       $ beat db pull [<name>]...

    <name>:
        Database name formatted as "<database>/<version>"
    '''
    configuration = ctx.meta['config']
    with common.make_webapi(configuration) as webapi:
        return pull_impl(webapi, configuration.path, db_names, force, 0, {})


@db.command()
@click.argument('db_names', nargs=-1)
627
@click.option('--force', help='Performs operation regardless of conflicts',
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
              is_flag=True)
@click.option('--dry-run', help='Dry run',
              is_flag=True)
@click.pass_context
def push(ctx, db_names, force, dry_run):
    '''Uploads databases to the server (must provide a valid admin token).

    $ beat db push [<name>]...

    <name>:
        Database name formatted as "<database>/<version>"

    '''
    configuration = ctx.meta['config']
    with common.make_webapi(configuration) as webapi:
        return common.push(webapi, configuration.path, 'database',
                           db_names, ['name', 'declaration',
                                      'code', 'description'],
                           {}, force, dry_run, 0)


@db.command()
@click.argument('db_names', nargs=-1)
@click.pass_context
def diff(ctx, db_names):
    '''Shows changes between the local database and the remote version.

    $ beat db diff [<name>]...

    <name>:
        Database name formatted as "<database>/<version>"
    '''
    configuration = ctx.meta['config']
    if len(db_names) < 1:
        raise click.ClickException("Requires at least one database name")
    with common.make_webapi(configuration) as webapi:
        return common.diff(webapi, configuration.path, 'database',
                           db_names[0],
                           ['declaration', 'code', 'description'])


@db.command()
@click.pass_context
def status(ctx):
    '''Shows (editing) status for all available databases'''
    configuration = ctx.meta['config']
    with common.make_webapi(configuration) as webapi:
        return common.status(webapi, configuration.path, 'database')[0]


@db.command()
@click.argument('db_names', nargs=-1)
@click.pass_context
def version(ctx, db_names):
    '''Creates a new version of an existing database.

Theophile GENTILHOMME's avatar
Theophile GENTILHOMME committed
684
    $ beat db version [<name>]...
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732

    <name>:
        Database name formatted as "<database>/<version>"

    '''
    configuration = ctx.meta['config']
    if len(db_names) < 1:
        raise click.ClickException("Requires at least one database name")
    return common.new_version(configuration.path, 'database', db_names[0])


@db.command()
@click.argument('db_names', nargs=-1)
@click.option('--list', help='List index files matching output if they exist',
              is_flag=True)
@click.option('--delete', help='Delete index files matching output if they '
              'exist (also, recursively deletes empty directories)',
              is_flag=True)
@click.option('--checksum', help='Checksums index files', is_flag=True)
@click.option('--uid', type=click.INT, default=None)
@click.option('--db-root', help="Database root")
@click.option('--docker', is_flag=True)
@click.pass_context
def index(ctx, db_names, list, delete, checksum, uid, db_root, docker):
    '''Indexes all outputs (of all sets) of a database.

    To index the contents of a database

        $ beat db index simple/1

    To index the contents of a protocol on a database

        $ beat db index simple/1/double

    To index the contents of a set in a protocol on a database

        $ beat db index simple/1/double/double
    '''
    configuration = ctx.meta['config']
    if list:
        return list_index_files(configuration, db_names)
    elif delete:
        return delete_index_files(configuration, db_names)
    elif checksum:
        return index_outputs(
            configuration, db_names, uid=uid,
            db_root=db_root, docker=docker
        )
Samuel GAIST's avatar
Samuel GAIST committed
733
734


735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
@db.command()
@click.argument('db_names', nargs=1)
@click.option('--exclude', help='When viewing, excludes this output',
              default=None)
@click.option('--uid', type=click.INT, default=None)
@click.option('--db-root', help="Database root")
@click.option('--docker', is_flag=True)
@click.pass_context
def view(ctx, set_name, exclude, uid, db_root, docker):
    '''View the data of the specified dataset.

    To view the contents of a specific set

    $ beat db view simple/1/protocol/set
    '''
    configuration = ctx.meta['config']
    if exclude is not None:
        return view_outputs(
            configuration, set_name, exclude, uid=uid, db_root=db_root,
            docker=docker)
    return view_outputs(
        configuration, set_name, uid=uid, db_root=db_root, docker=docker
    )