databases.py 24.8 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.cmdline module of the BEAT platform.          #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################


"""Usage:
  %(prog)s databases list [--remote]
  %(prog)s databases check [<name>]...
  %(prog)s databases pull [--force] [<name>]...
  %(prog)s databases push [--force] [--dry-run] [<name>]...
  %(prog)s databases diff <name>
  %(prog)s databases status
  %(prog)s databases version <name>
37
38
  %(prog)s databases index [--list | --delete | --checksum] [--uid=<uid>] [--db-root=<path>] [--docker] [<name>]...
  %(prog)s databases view [--exclude=<output>] [--uid=<uid>] [--db-root=<path>] [--docker] <set_name>
André Anjos's avatar
André Anjos committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
  %(prog)s databases --help


Arguments:
  <name>           Database name formated as "<database>/<version>"
  <set_name>       Set formatted as "<database>/<version>/<protocol>/<set>"


Commands:
  list      Lists all the databases available on the platform
  check     Checks a local database for validity
  pull      Downloads the specified databases from the server
  push      Uploads databases to the server (must provide a valid admin token)
  diff      Shows changes between the local database and the remote version
  status    Shows (editing) status for all available databases
  version   Creates a new version of an existing database
  index     Indexes all outputs (of all sets) of a database.
  view      View the data of the specified dataset.


Options:
  --help              Display this screen
  --remote            Only acts on the remote copy of the database
  --exclude=<output>  When viewing, excludes this output
  --list              List index files matching output if they exist
  --delete            Delete index files matching output if they exist (also,
                      recursively deletes empty directories)
  --checksum          Checksums index files


Examples:

  To list all existing databases on your local prefix:

    $ %(prog)s db list

  To view the contents of a specific set

    $ %(prog)s db view simple/1/protocol/set

  To index the contents of a database

    $ %(prog)s db index simple/1

  To index the contents of a protocol on a database

    $ %(prog)s db index simple/1/double

  To index the contents of a set in a protocol on a database

    $ %(prog)s db index simple/1/double/double
"""

import os
import glob
94
import random
Samuel GAIST's avatar
Samuel GAIST committed
95
import zmq
96

André Anjos's avatar
André Anjos committed
97
import logging
Samuel GAIST's avatar
Samuel GAIST committed
98

André Anjos's avatar
André Anjos committed
99
100
101

import simplejson

Samuel GAIST's avatar
Samuel GAIST committed
102
103
from beat.core.hash import toPath
from beat.core.hash import hashDataset
André Anjos's avatar
André Anjos committed
104
105
from beat.core.utils import NumpyJSONEncoder
from beat.core.database import Database
106
from beat.core.data import load_data_index, RemoteDataSource
107
108
109
from beat.core import dock
from beat.core import inputs
from beat.core import utils
André Anjos's avatar
André Anjos committed
110
111
112

from . import common

Samuel GAIST's avatar
Samuel GAIST committed
113
logger = logging.getLogger(__name__)
André Anjos's avatar
André Anjos committed
114

115
116
117
118
CMD_DB_INDEX = 'index'
CMD_VIEW_OUTPUTS = 'databases_provider'


Samuel GAIST's avatar
Samuel GAIST committed
119
# ----------------------------------------------------------
120
121
122
123
124
125
126


def load_database_sets(configuration, database_name):
    # Process the name of the database
    parts = database_name.split('/')

    if len(parts) == 2:
Samuel GAIST's avatar
Samuel GAIST committed
127
128
129
        db_name = os.path.join(*parts[:2])
        protocol_filter = None
        set_filter = None
130
131

    elif len(parts) == 3:
Samuel GAIST's avatar
Samuel GAIST committed
132
133
134
        db_name = os.path.join(*parts[:2])
        protocol_filter = parts[2]
        set_filter = None
135
136

    elif len(parts) == 4:
Samuel GAIST's avatar
Samuel GAIST committed
137
138
139
        db_name = os.path.join(*parts[:2])
        protocol_filter = parts[2]
        set_filter = parts[3]
140
141

    else:
Samuel GAIST's avatar
Samuel GAIST committed
142
        logger.error("Database specification should have the format "
Samuel GAIST's avatar
Samuel GAIST committed
143
144
                     "`<database>/<version>/[<protocol>/[<set>]]', the value "
                     "you passed (%s) is not valid", (dataset_name))
Samuel GAIST's avatar
Samuel GAIST committed
145
        return (None, None)
146
147
148

    # Load the dataformat
    dataformat_cache = {}
Samuel GAIST's avatar
Samuel GAIST committed
149
150
    database = Database(utils.Prefix(configuration.path),
                        db_name, dataformat_cache)
151
    if not database.valid:
Samuel GAIST's avatar
Samuel GAIST committed
152
153
154
155
        logger.error("Failed to load the database `%s':", db_name)
        for e in database.errors:
            logger.error('  * %s', e)
        return (None, None, None)
156
157
158
159
160

    # Filter the protocols
    protocols = database.protocol_names

    if protocol_filter is not None:
Samuel GAIST's avatar
Samuel GAIST committed
161
162
163
164
        if protocol_filter not in protocols:
            logger.error("The database `%s' does not have the protocol `%s' - "
                         "choose one of `%s'", db_name, protocol_filter,
                         ', '.join(protocols))
165

Samuel GAIST's avatar
Samuel GAIST committed
166
            return (None, None, None)
167

Samuel GAIST's avatar
Samuel GAIST committed
168
        protocols = [protocol_filter]
169
170
171
172
173

    # Filter the sets
    loaded_sets = []

    for protocol_name in protocols:
Samuel GAIST's avatar
Samuel GAIST committed
174
        sets = database.set_names(protocol_name)
175

Samuel GAIST's avatar
Samuel GAIST committed
176
177
178
        if set_filter is not None:
            if set_filter not in sets:
                logger.error("The database/protocol `%s/%s' does not have the "
Samuel GAIST's avatar
Samuel GAIST committed
179
180
181
                             "set `%s' - choose one of `%s'",
                             db_name, protocol_name, set_filter,
                             ', '.join(sets))
Samuel GAIST's avatar
Samuel GAIST committed
182
                return (None, None, None)
183

Samuel GAIST's avatar
Samuel GAIST committed
184
            sets = [z for z in sets if z == set_filter]
185

Samuel GAIST's avatar
Samuel GAIST committed
186
187
        loaded_sets.extend([(protocol_name, set_name,
                             database.set(protocol_name, set_name))
Samuel GAIST's avatar
Samuel GAIST committed
188
                            for set_name in sets])
189
190
191
192

    return (db_name, database, loaded_sets)


Samuel GAIST's avatar
Samuel GAIST committed
193
# ----------------------------------------------------------
194
195


196
197
def start_db_container(configuration, cmd, host,
                       db_name, protocol_name, set_name, database, db_set,
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
                       excluded_outputs=None, uid=None, db_root=None):

    input_list = inputs.InputList()

    input_group = inputs.InputGroup(set_name, restricted_access=False)
    input_list.add(input_group)

    db_configuration = {
        'inputs': {},
        'channel': set_name,
    }

    if uid is not None:
        db_configuration['datasets_uid'] = uid

    if db_root is not None:
        db_configuration['datasets_root_path'] = db_root

    for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
217
        if excluded_outputs is not None and output_name in excluded_outputs:
218
219
            continue

220
        dataset_hash = hashDataset(db_name, protocol_name, set_name)
221
        db_configuration['inputs'][output_name] = dict(
Samuel GAIST's avatar
Samuel GAIST committed
222
223
224
225
226
227
228
            database=db_name,
            protocol=protocol_name,
            set=set_name,
            output=output_name,
            channel=set_name,
            hash=dataset_hash,
            path=toPath(dataset_hash, '.db')
229
230
231
232
233
234
235
236
237
238
239
        )

    db_tempdir = utils.temporary_directory()

    with open(os.path.join(db_tempdir, 'configuration.json'), 'wb') as f:
        simplejson.dump(db_configuration, f, indent=4)

    tmp_prefix = os.path.join(db_tempdir, 'prefix')
    if not os.path.exists(tmp_prefix):
        os.makedirs(tmp_prefix)

240
    database.export(utils.Prefix(tmp_prefix))
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256

    if db_root is None:
        json_path = os.path.join(tmp_prefix, 'databases', db_name + '.json')

        with open(json_path, 'r') as f:
            db_data = simplejson.load(f)

        database_path = db_data['root_folder']
        db_data['root_folder'] = os.path.join('/databases', db_name)

        with open(json_path, 'w') as f:
            simplejson.dump(db_data, f, indent=4)

    try:
        db_envkey = host.db2docker([db_name])
    except:
Samuel GAIST's avatar
Samuel GAIST committed
257
        raise RuntimeError("No environment found for the database `%s' "
258
259
260
261
262
263
                           "- available environments are %s" % (
                               db_name,
                               ", ".join(host.db_environments.keys())))

    # Creation of the container
    # Note: we only support one databases image loaded at the same time
264
265
266
267
268
    CONTAINER_PREFIX = '/beat/prefix'
    CONTAINER_CACHE = '/beat/cache'

    database_port = random.randint(51000, 60000)
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
269
270
271
272
273
274
        db_cmd = [
            cmd,
            '0.0.0.0:{}'.format(database_port),
            CONTAINER_PREFIX,
            CONTAINER_CACHE
        ]
275
    else:
Samuel GAIST's avatar
Samuel GAIST committed
276
277
278
279
280
281
282
283
        db_cmd = [
            cmd,
            CONTAINER_PREFIX,
            CONTAINER_CACHE,
            db_name,
            protocol_name,
            set_name
        ]
284
285

    databases_container = host.create_container(db_envkey, db_cmd)
286
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
287
288
        databases_container.add_port(
            database_port, database_port, host_address=host.ip)
289
290
    databases_container.add_volume(db_tempdir, '/beat/prefix')
    databases_container.add_volume(configuration.cache, '/beat/cache')
291
292
293

    # Specify the volumes to mount inside the container
    if not db_configuration.has_key('datasets_root_path'):
Samuel GAIST's avatar
Samuel GAIST committed
294
295
        databases_container.add_volume(
            database_path, os.path.join('/databases', db_name))
296
297
298
299
300
301
302
    else:
        databases_container.add_volume(db_configuration['datasets_root_path'],
                                       db_configuration['datasets_root_path'])

    # Start the container
    host.start(databases_container)

303
    if cmd == CMD_VIEW_OUTPUTS:
Samuel GAIST's avatar
Samuel GAIST committed
304
305
306
307
308
        # Communicate with container
        zmq_context = zmq.Context()
        db_socket = zmq_context.socket(zmq.PAIR)
        db_address = 'tcp://{}:{}'.format(host.ip, database_port)
        db_socket.connect(db_address)
309

Samuel GAIST's avatar
Samuel GAIST committed
310
        for output_name, dataformat_name in db_set['outputs'].items():
Samuel GAIST's avatar
Samuel GAIST committed
311
            if excluded_outputs is not None and output_name in excluded_outputs:
Samuel GAIST's avatar
Samuel GAIST committed
312
                continue
313

Samuel GAIST's avatar
Samuel GAIST committed
314
315
316
            data_source = RemoteDataSource()
            data_source.setup(db_socket, output_name,
                              dataformat_name, configuration.path)
317

Samuel GAIST's avatar
Samuel GAIST committed
318
319
320
            input = inputs.Input(
                output_name, database.dataformats[dataformat_name], data_source)
            input_group.add(input)
321

Samuel GAIST's avatar
Samuel GAIST committed
322
        return (databases_container, db_socket, zmq_context, input_list)
323
324

    return databases_container
325
326


Samuel GAIST's avatar
Samuel GAIST committed
327
# ----------------------------------------------------------
328
329


André Anjos's avatar
André Anjos committed
330
def pull(webapi, prefix, names, force, indentation, format_cache):
Samuel GAIST's avatar
Samuel GAIST committed
331
    """Copies databases (and required dataformats) from the server.
André Anjos's avatar
André Anjos committed
332

Samuel GAIST's avatar
Samuel GAIST committed
333
    Parameters:
André Anjos's avatar
André Anjos committed
334

Samuel GAIST's avatar
Samuel GAIST committed
335
336
      webapi (object): An instance of our WebAPI class, prepared to access the
        BEAT server of interest
André Anjos's avatar
André Anjos committed
337

Samuel GAIST's avatar
Samuel GAIST committed
338
339
      prefix (str): A string representing the root of the path in which the
        user objects are stored
André Anjos's avatar
André Anjos committed
340

Samuel GAIST's avatar
Samuel GAIST committed
341
342
343
344
345
      names (list): A list of strings, each representing the unique relative
        path of the objects to retrieve or a list of usernames from which to
        retrieve objects. If the list is empty, then we pull all available
        objects of a given type. If no user is set, then pull all public
        objects of a given type.
André Anjos's avatar
André Anjos committed
346

Samuel GAIST's avatar
Samuel GAIST committed
347
348
      force (bool): If set to ``True``, then overwrites local changes with the
        remotely retrieved copies.
André Anjos's avatar
André Anjos committed
349

Samuel GAIST's avatar
Samuel GAIST committed
350
351
352
      indentation (int): The indentation level, useful if this function is
        called recursively while downloading different object types. This is
        normally set to ``0`` (zero).
André Anjos's avatar
André Anjos committed
353

Samuel GAIST's avatar
Samuel GAIST committed
354
355
      format_cache (dict): A dictionary containing all dataformats already
        downloaded.
André Anjos's avatar
André Anjos committed
356
357


Samuel GAIST's avatar
Samuel GAIST committed
358
    Returns:
André Anjos's avatar
André Anjos committed
359

Samuel GAIST's avatar
Samuel GAIST committed
360
361
      int: Indicating the exit status of the command, to be reported back to
        the calling process. This value should be zero if everything works OK,
Samuel GAIST's avatar
Samuel GAIST committed
362
        otherwise, different than zero (POSIX compliance).
André Anjos's avatar
André Anjos committed
363

Samuel GAIST's avatar
Samuel GAIST committed
364
    """
André Anjos's avatar
André Anjos committed
365

Samuel GAIST's avatar
Samuel GAIST committed
366
    from .dataformats import pull as dataformats_pull
André Anjos's avatar
André Anjos committed
367

Samuel GAIST's avatar
Samuel GAIST committed
368
    status, names = common.pull(webapi, prefix, 'database', names,
Samuel GAIST's avatar
Samuel GAIST committed
369
370
                                ['declaration', 'code', 'description'],
                                force, indentation)
André Anjos's avatar
André Anjos committed
371

Samuel GAIST's avatar
Samuel GAIST committed
372
373
    # see what dataformats one needs to pull
    indent = indentation * ' '
André Anjos's avatar
André Anjos committed
374

Samuel GAIST's avatar
Samuel GAIST committed
375
376
377
378
    dataformats = []
    for name in names:
        obj = Database(prefix, name)
        dataformats.extend(obj.dataformats.keys())
André Anjos's avatar
André Anjos committed
379

Samuel GAIST's avatar
Samuel GAIST committed
380
381
382
    # downloads any formats to which we depend on
    df_status = dataformats_pull(webapi, prefix, dataformats, force,
                                 indentation + 2, format_cache)
André Anjos's avatar
André Anjos committed
383

Samuel GAIST's avatar
Samuel GAIST committed
384
    return status + df_status
André Anjos's avatar
André Anjos committed
385
386


Samuel GAIST's avatar
Samuel GAIST committed
387
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
388
389


390
def index_outputs(configuration, names, uid=None, db_root=None, docker=False):
André Anjos's avatar
André Anjos committed
391

392
    names = common.make_up_local_list(configuration.path, 'database', names)
André Anjos's avatar
André Anjos committed
393

394
    retcode = 0
André Anjos's avatar
André Anjos committed
395

Philip ABBET's avatar
Philip ABBET committed
396
    if docker:
Samuel GAIST's avatar
Samuel GAIST committed
397
        host = dock.Host(raise_on_errors=False)
André Anjos's avatar
André Anjos committed
398

399
    for database_name in names:
Samuel GAIST's avatar
Samuel GAIST committed
400
        logger.info("Indexing database %s...", database_name)
André Anjos's avatar
André Anjos committed
401

Samuel GAIST's avatar
Samuel GAIST committed
402
403
404
405
406
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
        if database is None:
            retcode += 1
            continue
André Anjos's avatar
André Anjos committed
407

Samuel GAIST's avatar
Samuel GAIST committed
408
409
410
        for protocol_name, set_name, db_set in sets:
            if not docker:
                view = database.view(protocol_name, set_name)
André Anjos's avatar
André Anjos committed
411

Samuel GAIST's avatar
Samuel GAIST committed
412
413
414
                if view is None:
                    retcode += 1
                    continue
415

Samuel GAIST's avatar
Samuel GAIST committed
416
417
                dataset_hash = hashDataset(db_name, protocol_name, set_name)
                view.index(os.path.join(configuration.cache,
418
419
                                        toPath(dataset_hash, '.db')))

Samuel GAIST's avatar
Samuel GAIST committed
420
421
422
            else:
                databases_container = \
                    start_db_container(configuration, CMD_DB_INDEX,
Samuel GAIST's avatar
Samuel GAIST committed
423
424
                                       host, db_name, protocol_name, set_name,
                                       database, db_set,
Samuel GAIST's avatar
Samuel GAIST committed
425
426
427
428
429
                                       uid=uid, db_root=db_root
                                       )
                status = host.wait(databases_container)
                if status != 0:
                    retcode += 1
André Anjos's avatar
André Anjos committed
430

431
    return retcode
André Anjos's avatar
André Anjos committed
432
433


Samuel GAIST's avatar
Samuel GAIST committed
434
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
435
436


437
def list_index_files(configuration, names):
438

439
    names = common.make_up_local_list(configuration.path, 'database', names)
440

441
    retcode = 0
442

443
444
    for database_name in names:
        logger.info("Listing database %s indexes...", database_name)
445

Samuel GAIST's avatar
Samuel GAIST committed
446
447
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
448
449
450
        if database is None:
            retcode += 1
            continue
André Anjos's avatar
André Anjos committed
451

452
453
        for protocol_name, set_name, db_set in sets:
            for output_name in db_set['outputs'].keys():
Samuel GAIST's avatar
Samuel GAIST committed
454
455
                index_hash = database.hash_output(
                    protocol_name, set_name, output_name)
456
457
                index_filename = os.path.join(configuration.cache,
                                              toPath(index_hash, '.index'))
458

459
460
461
                basename = os.path.splitext(index_filename)[0]
                for g in glob.glob(basename + '.*'):
                    logger.info(g)
462

463
    return retcode
464
465


Samuel GAIST's avatar
Samuel GAIST committed
466
# ----------------------------------------------------------
467
468


469
def delete_index_files(configuration, names):
470

471
    names = common.make_up_local_list(configuration.path, 'database', names)
472

473
    retcode = 0
474

475
476
    for database_name in names:
        logger.info("Deleting database %s indexes...", database_name)
477

Samuel GAIST's avatar
Samuel GAIST committed
478
479
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
480
481
482
        if database is None:
            retcode += 1
            continue
483

484
485
        for protocol_name, set_name, db_set in sets:
            for output_name in db_set['outputs'].keys():
Samuel GAIST's avatar
Samuel GAIST committed
486
487
                index_hash = database.hash_output(
                    protocol_name, set_name, output_name)
488
489
                index_filename = os.path.join(configuration.cache,
                                              toPath(index_hash, '.index'))
490

491
492
493
494
                basename = os.path.splitext(index_filename)[0]
                for g in glob.glob(basename + '.*'):
                    logger.info("removing `%s'...", g)
                    os.unlink(g)
495

496
497
                common.recursive_rmdir_if_empty(os.path.dirname(basename),
                                                configuration.cache)
498

499
    return retcode
500
501


Samuel GAIST's avatar
Samuel GAIST committed
502
# ----------------------------------------------------------
503

André Anjos's avatar
André Anjos committed
504

505
def checksum_index_files(configuration, names):
André Anjos's avatar
André Anjos committed
506

507
    names = common.make_up_local_list(configuration.path, 'database', names)
André Anjos's avatar
André Anjos committed
508

509
    retcode = 0
André Anjos's avatar
André Anjos committed
510

511
512
    for database_name in names:
        logger.info("Checksumming database %s indexes...", database_name)
André Anjos's avatar
André Anjos committed
513

Samuel GAIST's avatar
Samuel GAIST committed
514
515
        (db_name, database, sets) = load_database_sets(
            configuration, database_name)
516
517
518
        if database is None:
            retcode += 1
            continue
André Anjos's avatar
André Anjos committed
519

520
521
        for protocol_name, set_name, db_set in sets:
            for output_name in db_set['outputs'].keys():
Samuel GAIST's avatar
Samuel GAIST committed
522
523
                index_hash = database.hash_output(
                    protocol_name, set_name, output_name)
524
525
                index_filename = os.path.join(configuration.cache,
                                              toPath(index_hash, '.index'))
André Anjos's avatar
André Anjos committed
526

Samuel GAIST's avatar
Samuel GAIST committed
527
528
                assert load_data_index(
                    configuration.cache, toPath(index_hash, '.index'))
529
530
                logger.info("index for `%s' can be loaded and checksumed",
                            index_filename)
André Anjos's avatar
André Anjos committed
531

532
    return retcode
André Anjos's avatar
André Anjos committed
533
534


Samuel GAIST's avatar
Samuel GAIST committed
535
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
536
537


538
539
def view_outputs(configuration, dataset_name, excluded_outputs=None, uid=None,
                 db_root=None, docker=False):
André Anjos's avatar
André Anjos committed
540

541
542
    def data_to_json(data, indent):
        value = common.stringify(data.as_dict())
André Anjos's avatar
André Anjos committed
543

544
545
546
547
548
549
        value = simplejson.dumps(value, indent=4, cls=NumpyJSONEncoder) \
            .replace('"BEAT_LIST_DELIMITER[', '[') \
            .replace(']BEAT_LIST_DELIMITER"', ']') \
            .replace('"...",', '...') \
            .replace('"BEAT_LIST_SIZE(', '(') \
            .replace(')BEAT_LIST_SIZE"', ')')
André Anjos's avatar
André Anjos committed
550

551
        return ('\n' + ' ' * indent).join(value.split('\n'))
André Anjos's avatar
André Anjos committed
552

553
554
555
556
    # Load the infos about the database set
    (db_name, database, sets) = load_database_sets(configuration, dataset_name)
    if (database is None) or (len(sets) != 1):
        return 1
André Anjos's avatar
André Anjos committed
557

558
559
560
    (protocol_name, set_name, db_set) = sets[0]

    if excluded_outputs is not None:
Samuel GAIST's avatar
Samuel GAIST committed
561
562
        excluded_outputs = map(lambda x: x.strip(),
                               excluded_outputs.split(','))
André Anjos's avatar
André Anjos committed
563

564
565
    # Setup the view so the outputs can be used
    if not docker:
Samuel GAIST's avatar
Samuel GAIST committed
566
        view = database.view(protocol_name, set_name)
567

568
569
        if view is None:
            return 1
570
571
572
573
574
575
576
577
578
579

        dataset_hash = hashDataset(db_name, protocol_name, set_name)
        view.setup(os.path.join(configuration.cache,
                                toPath(dataset_hash, '.db')), pack=False)
        input_group = inputs.InputGroup(set_name, restricted_access=False)

        for output_name, dataformat_name in db_set['outputs'].items():
            if (excluded_outputs is not None) and (output_name in excluded_outputs):
                continue

Samuel GAIST's avatar
Samuel GAIST committed
580
581
            input = inputs.Input(
                output_name, database.dataformats[dataformat_name], view.data_sources[output_name])
582
583
            input_group.add(input)

584
585
586
587
    else:
        host = dock.Host(raise_on_errors=False)

        (databases_container, db_socket, zmq_context, input_list) = \
588
            start_db_container(configuration, CMD_VIEW_OUTPUTS,
Samuel GAIST's avatar
Samuel GAIST committed
589
590
591
                               host, db_name, protocol_name, set_name, database, db_set,
                               excluded_outputs=excluded_outputs, uid=uid, db_root=db_root
                               )
592

593
        input_group = input_list.group(set_name)
André Anjos's avatar
André Anjos committed
594

595
596
597
    # Display the data
    try:
        previous_start = -1
André Anjos's avatar
André Anjos committed
598

599
600
        while input_group.hasMoreData():
            input_group.next()
André Anjos's avatar
André Anjos committed
601

602
603
            start = input_group.data_index
            end = input_group.data_index_end
André Anjos's avatar
André Anjos committed
604

605
606
            if start != previous_start:
                print(80 * '-')
André Anjos's avatar
André Anjos committed
607

608
                print 'FROM %d TO %d' % (start, end)
André Anjos's avatar
André Anjos committed
609

Samuel GAIST's avatar
Samuel GAIST committed
610
611
612
                whole_inputs = [input for input in input_group
                                if (input.data_index == start) and
                                (input.data_index_end == end)]
André Anjos's avatar
André Anjos committed
613

614
615
616
                for input in whole_inputs:
                    label = ' - ' + str(input.name) + ': '
                    print label + data_to_json(input.data, len(label))
André Anjos's avatar
André Anjos committed
617

618
                previous_start = start
André Anjos's avatar
André Anjos committed
619

Samuel GAIST's avatar
Samuel GAIST committed
620
621
622
623
            selected_inputs = [input for input in input_group
                               if (input.data_index == input_group.first_data_index) and
                               ((input.data_index != start) or
                                (input.data_index_end != end))]
André Anjos's avatar
André Anjos committed
624

625
626
627
628
629
630
            grouped_inputs = {}
            for input in selected_inputs:
                key = (input.data_index, input.data_index_end)
                if not grouped_inputs.has_key(key):
                    grouped_inputs[key] = []
                grouped_inputs[key].append(input)
André Anjos's avatar
André Anjos committed
631

632
633
634
635
636
637
638
639
640
641
            sorted_keys = grouped_inputs.keys()
            sorted_keys.sort()

            for key in sorted_keys:
                print
                print '  FROM %d TO %d' % key

                for input in grouped_inputs[key]:
                    label = '   - ' + str(input.name) + ': '
                    print label + data_to_json(input.data, len(label))
André Anjos's avatar
André Anjos committed
642
643

    except Exception as e:
644
645
646
647
        logger.error("Failed to retrieve the next data: %s", e)
        return 1

    return 0
André Anjos's avatar
André Anjos committed
648

649

Samuel GAIST's avatar
Samuel GAIST committed
650
# ----------------------------------------------------------
André Anjos's avatar
André Anjos committed
651
652
653
654


def process(args):

655
656
657
658
    uid = int(args['--uid']) if args['--uid'] is not None else None
    configuration = args['config']
    db_names = args['<name>']

Samuel GAIST's avatar
Samuel GAIST committed
659
660
    if args['list']:
        if args['--remote']:
661
            with common.make_webapi(configuration) as webapi:
Samuel GAIST's avatar
Samuel GAIST committed
662
663
                return common.display_remote_list(webapi, 'database')
        else:
664
            return common.display_local_list(configuration.path, 'database')
Samuel GAIST's avatar
Samuel GAIST committed
665
666

    elif args['check']:
667
        return common.check(configuration.path, 'database', db_names)
Samuel GAIST's avatar
Samuel GAIST committed
668
669

    elif args['pull']:
670
671
        with common.make_webapi(configuration) as webapi:
            return pull(webapi, configuration.path, db_names,
Samuel GAIST's avatar
Samuel GAIST committed
672
673
674
                        args['--force'], 0, {})

    elif args['push']:
675
676
677
678
        with common.make_webapi(configuration) as webapi:
            return common.push(webapi, configuration.path, 'database',
                               db_names, ['name', 'declaration',
                                          'code', 'description'],
Samuel GAIST's avatar
Samuel GAIST committed
679
680
681
                               {}, args['--force'], args['--dry-run'], 0)

    elif args['diff']:
682
683
684
685
        with common.make_webapi(configuration) as webapi:
            return common.diff(webapi, configuration.path, 'database',
                               db_names[0],
                               ['declaration', 'code', 'description'])
Samuel GAIST's avatar
Samuel GAIST committed
686
687

    elif args['status']:
688
689
        with common.make_webapi(configuration) as webapi:
            return common.status(webapi, configuration.path, 'database')[0]
Samuel GAIST's avatar
Samuel GAIST committed
690
691

    elif args['version']:
692
693
        return common.new_version(configuration.path, 'database',
                                  db_names[0])
Samuel GAIST's avatar
Samuel GAIST committed
694
695
696

    elif args['view']:
        if args['--exclude']:
697
698
699
700
701
            return view_outputs(configuration, args['<set_name>'],
                                args['--exclude'],
                                uid=uid,
                                db_root=args['--db-root'],
                                docker=args['--docker'])
Samuel GAIST's avatar
Samuel GAIST committed
702
        else:
703
704
705
706
            return view_outputs(configuration, args['<set_name>'],
                                uid=uid,
                                db_root=args['--db-root'],
                                docker=args['--docker'])
Samuel GAIST's avatar
Samuel GAIST committed
707
708
709

    elif args['index']:
        if args['--list']:
710
            return list_index_files(configuration, db_names)
Samuel GAIST's avatar
Samuel GAIST committed
711
        elif args['--delete']:
712
            return delete_index_files(configuration, db_names)
Samuel GAIST's avatar
Samuel GAIST committed
713
        elif args['--checksum']:
714
            return checksum_index_files(configuration, db_names)
Samuel GAIST's avatar
Samuel GAIST committed
715
        else:
716
717
718
719
            return index_outputs(configuration, db_names,
                                 uid=uid,
                                 db_root=args['--db-root'],
                                 docker=args['--docker'])
Samuel GAIST's avatar
Samuel GAIST committed
720
721
722
723

    # Should not happen
    logger.error("unrecognized `databases' subcommand")
    return 1