Commit 7c34e985 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

[pre-commit] Add pre-commit and run it on all files

parent 543a29d0
Pipeline #41453 passed with stage
in 4 minutes and 47 seconds
[flake8]
max-line-length = 88
select = B,C,E,F,W,T4,B9,B950
ignore = E501, W503, E203
include: 'https://gitlab.idiap.ch/bob/bob.devtools/raw/master/bob/devtools/data/gitlab-ci/single-package.yaml'
\ No newline at end of file
include: 'https://gitlab.idiap.ch/bob/bob.devtools/raw/master/bob/devtools/data/gitlab-ci/single-package.yaml'
[settings]
line_length=88
order_by_type=true
lines_between_types=1
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/timothycrosley/isort
rev: 4.3.21-2
hooks:
- id: isort
args: [-sl]
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.0.0
hooks:
- id: check-ast
- id: check-case-conflict
- id: trailing-whitespace
- id: end-of-file-fixer
- id: debug-statements
- id: check-added-large-files
- id: flake8
- repo: local
hooks:
- id: sphinx-build
name: sphinx build
entry: python -m sphinx.cmd.build
args: [-a, -E, -W, doc, sphinx]
language: system
files: ^doc/
types: [file]
pass_filenames: false
- id: sphinx-doctest
name: sphinx doctest
entry: python -m sphinx.cmd.build
args: [-a, -E, -b, doctest, doc, sphinx]
language: system
files: ^doc/
types: [file]
pass_filenames: false
......@@ -24,4 +24,4 @@ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from . import utils
from .sample import Sample, DelayedSample, SampleSet, DelayedSampleSet, sample_to_hdf5, hdf5_to_sample
from .wrappers import (
BaseWrapper,
DelayedSamplesCall,
SampleWrapper,
CheckpointWrapper,
DaskWrapper,
ToDaskBag,
wrap,
dask_tags,
)
from . import distributed
from . import transformers
from . import xarray as xr
from . import distributed # noqa
from . import transformers # noqa
from . import utils # noqa
from . import xarray as xr # noqa
from .sample import DelayedSample
from .sample import DelayedSampleSet
from .sample import Sample
from .sample import SampleSet
from .sample import hdf5_to_sample # noqa
from .sample import sample_to_hdf5 # noqa
from .wrappers import BaseWrapper
from .wrappers import CheckpointWrapper
from .wrappers import DaskWrapper
from .wrappers import DelayedSamplesCall
from .wrappers import SampleWrapper
from .wrappers import ToDaskBag
from .wrappers import dask_tags # noqa
from .wrappers import wrap # noqa
def __appropriate__(*args):
......@@ -37,6 +40,7 @@ __appropriate__(
Sample,
DelayedSample,
SampleSet,
DelayedSampleSet,
BaseWrapper,
DelayedSamplesCall,
SampleWrapper,
......
import bob.pipelines as mario
pipeline = mario.wrap(["dask"], pipeline)
pipeline = mario.wrap(["dask"], pipeline) # noqa
from dask.distributed import Client, LocalCluster
from multiprocessing import cpu_count
from dask.distributed import Client
from dask.distributed import LocalCluster
n_nodes = cpu_count()
threads_per_worker = 1
......
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
cluster = SGEMultipleQueuesCluster(min_jobs=20)
dask_client = Client(cluster)
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
from bob.pipelines.distributed.sge_queues import QUEUE_DEMANDING
from dask.distributed import Client
cluster = SGEMultipleQueuesCluster(min_jobs=20, sge_job_spec=QUEUE_DEMANDING)
dask_client = Client(cluster)
from dask.distributed import Client
from bob.pipelines.distributed.sge import SGEMultipleQueuesCluster
from bob.pipelines.distributed.sge_queues import QUEUE_LIGHT
from dask.distributed import Client
cluster = SGEMultipleQueuesCluster(min_jobs=20, sge_job_spec=QUEUE_LIGHT)
dask_client = Client(cluster)
#!/usr/bin/env python
# coding=utf-8
import os
import csv
import pathlib
import logging
import os
import pathlib
logger = logging.getLogger(__name__)
......@@ -84,7 +83,7 @@ class CSVDataset:
"""
logger.info(f"Checking dataset...")
logger.info("Checking dataset...")
errors = 0
for name in self._subsets.keys():
logger.info(f"Checking subset '{name}'...")
......@@ -100,7 +99,7 @@ class CSVDataset:
logger.error(
f"Found error loading entry {pos} in subset {name} "
f"from file '{self._subsets[name]}': {e}"
)
)
errors += 1
return errors
......@@ -152,8 +151,6 @@ class CSVDataset:
fileobj.seek(0)
return [
self._loader(
dict(subset=subset, order=n), dict(zip(self.fieldnames, k))
)
self._loader(dict(subset=subset, order=n), dict(zip(self.fieldnames, k)))
for n, k in enumerate(samples)
]
#!/usr/bin/env python
# coding=utf-8
import os
import json
import pathlib
import logging
import os
import pathlib
logger = logging.getLogger(__name__)
......@@ -103,7 +102,7 @@ class JSONDataset:
"""
logger.info(f"Checking dataset...")
logger.info("Checking dataset...")
errors = 0
for proto in self._protocols:
logger.info(f"Checking protocol '{proto}'...")
......@@ -121,7 +120,7 @@ class JSONDataset:
f"Found error loading entry {pos} in subset {name} "
f"of protocol {proto} from file "
f"'{self._protocols[proto]}': {e}"
)
)
errors += 1
except Exception as e:
logger.error(f"{sample.key}: {e}")
......@@ -164,7 +163,7 @@ class JSONDataset:
retval[subset] = [
self._loader(
dict(protocol=protocol, subset=subset, order=n),
dict(zip(self.fieldnames, k))
dict(zip(self.fieldnames, k)),
)
for n, k in enumerate(samples)
]
......
......@@ -2,24 +2,23 @@
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
import sys
import logging
import sys
logger = logging.getLogger(__name__)
from dask_jobqueue.core import JobQueueCluster, Job
from dask_jobqueue.sge import SGEJob
from distributed.scheduler import Scheduler
from distributed import SpecCluster
import dask
from distributed.scheduler import Scheduler
from dask_jobqueue.core import Job
from dask_jobqueue.core import JobQueueCluster
from distributed.deploy import Adaptive
from distributed.scheduler import Scheduler
from .sge_queues import QUEUE_DEFAULT
from bob.extension import rc
from .sge_queues import QUEUE_DEFAULT
logger = logging.getLogger(__name__)
class SGEIdiapJob(Job):
"""Launches a SGE Job in the IDIAP cluster. This class basically encodes
the CLI command that bootstrap the worker in a SGE job. Check here
......@@ -121,7 +120,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
dashboard_address: str
Default port for the dask dashboard,
env_extra: str,
Extra environment variables to send to the workers
......@@ -152,7 +151,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
>>> client = Client(cluster) # doctest: +SKIP
It's possible to demand a resource specification yourself:
>>> Q_1DAY_IO_BIG_SPEC = {
... "default": {
... "queue": "q_1day",
......@@ -207,9 +206,8 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
env_extra=None,
sge_job_spec=QUEUE_DEFAULT,
min_jobs=10,
project=rc.get('sge.project'),
project=rc.get("sge.project"),
**kwargs,
):
# Defining the job launcher
......@@ -221,12 +219,10 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
self.project = project
silence_logs = "error"
secutity = None
interface = None
host = None
security = None
if env_extra is None:
env_extra = []
elif not isinstance(env_extra, list):
......@@ -256,7 +252,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
loop=loop,
silence_logs=silence_logs,
asynchronous=asynchronous,
name=name,
name=name,
)
max_jobs = get_max_jobs(sge_job_spec)
......@@ -283,9 +279,8 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
"io_big=TRUE," if "io_big" in job_spec and job_spec["io_big"] else ""
)
memory = _get_key_from_spec(job_spec, "memory")[:-1]
new_resource_spec += (f"mem_free={memory},")
new_resource_spec += f"mem_free={memory},"
queue = _get_key_from_spec(job_spec, "queue")
if queue != "all.q":
......@@ -306,7 +301,7 @@ class SGEMultipleQueuesCluster(JobQueueCluster):
"protocol": self.protocol,
"security": None,
"resources": _get_key_from_spec(job_spec, "resources"),
"env_extra": self.env_extra,
"env_extra": self.env_extra,
}
def scale(self, n_jobs, sge_job_spec_key="default"):
......@@ -374,8 +369,6 @@ class AdaptiveMultipleQueue(Adaptive):
target."""
plan = self.plan
requested = self.requested
observed = self.observed
# Get tasks with no worker associated due to
# resource restrictions
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
"""SGE QUEUES"""
"""This queue setup has a DEMANDING arrangement.
For CPU jobs, it prioritizes q_1day and io_big This HAS to be the
default
"""
QUEUE_DEFAULT = {
"default": {
"queue": "q_1day",
......@@ -48,12 +43,13 @@ QUEUE_DEFAULT = {
"resources": {"q_long_gpu": 1},
},
}
"""This queue setup has a DEMANDING arrangement.
"""
This queue setup has a light arrangement.
For CPU jobs, it prioritizes all.q and not io_big
For CPU jobs, it prioritizes q_1day and io_big This HAS to be the
default
"""
QUEUE_LIGHT = {
"default": {
"queue": "q_1day",
......@@ -101,13 +97,12 @@ QUEUE_LIGHT = {
"resources": {"q_long_gpu": 1},
},
}
"""
This queue setup has a light arrangement.
For CPU jobs, it prioritizes all.q and not io_big
"""
QUEUE_DEMANDING = {
"default": {
"queue": "q_1day",
......@@ -155,3 +150,7 @@ QUEUE_DEMANDING = {
"resources": {"q_long_gpu": 1},
},
}
"""
This queue setup has a light arrangement.
For CPU jobs, it prioritizes all.q and not io_big
"""
"""Base definition of sample."""
from collections.abc import MutableSequence, Sequence
from .utils import vstack_features
import numpy as np
from collections.abc import MutableSequence
from collections.abc import Sequence
import h5py
import numpy as np
from .utils import vstack_features
SAMPLE_DATA_ATTRS = ("data", "load", "samples", "_data")
......
......@@ -4,11 +4,12 @@
"""Test code for datasets"""
import os
import pkg_resources
import nose.tools
import pkg_resources
from ..datasets.json import JSONDataset
from ..datasets.csv import CSVDataset
from ..datasets.json import JSONDataset
from ..sample import Sample
......@@ -18,32 +19,32 @@ def _data_file(f):
def _raw_data_loader(context, d):
return Sample(
data=[
float(d["sepal_length"]),
float(d["sepal_width"]),
float(d["petal_length"]),
float(d["petal_width"]),
d["species"][5:],
],
key=(context["subset"] + str(context["order"]))
)
data=[
float(d["sepal_length"]),
float(d["sepal_width"]),
float(d["petal_length"]),
float(d["petal_width"]),
d["species"][5:],
],
key=(context["subset"] + str(context["order"])),
)
def test_csv_loading():
# tests if we can build a simple CSV loader for the Iris Flower dataset
subsets = {
"train": _data_file("iris-train.csv"),
"test": _data_file("iris-test.csv")
}
"train": _data_file("iris-train.csv"),
"test": _data_file("iris-test.csv"),
}
fieldnames = (
"sepal_length",
"sepal_width",
"petal_length",
"petal_width",
"species",
)
"sepal_length",
"sepal_width",
"petal_length",
"petal_width",
"species",
)
dataset = CSVDataset(subsets, fieldnames, _raw_data_loader)
dataset.check()
......@@ -72,12 +73,12 @@ def test_json_loading():
protocols = {"default": _data_file("iris.json")}
fieldnames = (
"sepal_length",
"sepal_width",
"petal_length",
"petal_width",
"species",
)
"sepal_length",
"sepal_width",
"petal_length",
"petal_width",
"species",
)
dataset = JSONDataset(protocols, fieldnames, _raw_data_loader)
......
from bob.pipelines import (
Sample,
SampleSet,
DelayedSampleSet,
sample_to_hdf5,
hdf5_to_sample,
)
import numpy as np
import copy
import pickle
import tempfile
import functools
import os
import pickle
import tempfile
import h5py
import numpy as np
from bob.pipelines import DelayedSampleSet
from bob.pipelines import Sample
from bob.pipelines import SampleSet
from bob.pipelines import hdf5_to_sample
from bob.pipelines import sample_to_hdf5
def test_sampleset_collection():
......
import os
import tempfile
import numpy as np
from sklearn.utils.validation import check_is_fitted
import bob.pipelines as mario
import numpy as np
def test_linearize():
def _assert(Xt, oracle):
assert np.allclose(Xt, oracle), (Xt, oracle)
......
import os
from tempfile import NamedTemporaryFile
import nose
import numpy as np
import os
import bob.pipelines as mario
from tempfile import NamedTemporaryFile
def test_io_vstack():
......@@ -117,8 +120,8 @@ def test_io_vstack():
# create the file back so NamedTemporaryFile does not complain
np.save(paths[0], reader(i + 1))
def test_isinstance_nested():
def test_isinstance_nested():
class A:
pass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment