Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
mednet
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
medai
software
mednet
Commits
795e4494
Commit
795e4494
authored
1 year ago
by
André Anjos
Browse files
Options
Downloads
Patches
Plain Diff
[data.split] Make variables private
parent
bfc106ab
No related branches found
No related tags found
1 merge request
!6
Making use of LightningDataModule and simplification of data loading
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/ptbench/data/datamodule.py
+2
-4
2 additions, 4 deletions
src/ptbench/data/datamodule.py
src/ptbench/data/split.py
+15
-15
15 additions, 15 deletions
src/ptbench/data/split.py
with
17 additions
and
19 deletions
src/ptbench/data/datamodule.py
+
2
−
4
View file @
795e4494
...
@@ -686,9 +686,7 @@ class CachingDataModule(lightning.LightningDataModule):
...
@@ -686,9 +686,7 @@ class CachingDataModule(lightning.LightningDataModule):
def
_val_dataset_keys
(
self
)
->
list
[
str
]:
def
_val_dataset_keys
(
self
)
->
list
[
str
]:
"""
Returns list of validation dataset names.
"""
"""
Returns list of validation dataset names.
"""
return
[
"
validation
"
]
+
[
return
[
"
validation
"
]
+
[
k
k
for
k
in
self
.
database_split
.
keys
()
if
k
.
startswith
(
"
monitor-
"
)
for
k
in
self
.
database_split
.
subsets
.
keys
()
if
k
.
startswith
(
"
monitor-
"
)
]
]
def
setup
(
self
,
stage
:
str
)
->
None
:
def
setup
(
self
,
stage
:
str
)
->
None
:
...
@@ -729,7 +727,7 @@ class CachingDataModule(lightning.LightningDataModule):
...
@@ -729,7 +727,7 @@ class CachingDataModule(lightning.LightningDataModule):
self
.
_setup_dataset
(
"
test
"
)
self
.
_setup_dataset
(
"
test
"
)
elif
stage
==
"
predict
"
:
elif
stage
==
"
predict
"
:
for
k
in
self
.
database_split
.
subsets
.
keys
():
for
k
in
self
.
database_split
.
keys
():
self
.
_setup_dataset
(
k
)
self
.
_setup_dataset
(
k
)
def
teardown
(
self
,
stage
:
str
)
->
None
:
def
teardown
(
self
,
stage
:
str
)
->
None
:
...
...
This diff is collapsed.
Click to expand it.
src/ptbench/data/split.py
+
15
−
15
View file @
795e4494
...
@@ -68,8 +68,8 @@ class JSONDatabaseSplit(DatabaseSplit):
...
@@ -68,8 +68,8 @@ class JSONDatabaseSplit(DatabaseSplit):
def
__init__
(
self
,
path
:
pathlib
.
Path
|
str
|
importlib
.
abc
.
Traversable
):
def
__init__
(
self
,
path
:
pathlib
.
Path
|
str
|
importlib
.
abc
.
Traversable
):
if
isinstance
(
path
,
str
):
if
isinstance
(
path
,
str
):
path
=
pathlib
.
Path
(
path
)
path
=
pathlib
.
Path
(
path
)
self
.
path
=
path
self
.
_
path
=
path
self
.
subsets
=
self
.
_load_split_from_disk
()
self
.
_
subsets
=
self
.
_load_split_from_disk
()
def
_load_split_from_disk
(
self
)
->
DatabaseSplit
:
def
_load_split_from_disk
(
self
)
->
DatabaseSplit
:
"""
Loads all subsets in a split from its file system representation.
"""
Loads all subsets in a split from its file system representation.
...
@@ -86,25 +86,25 @@ class JSONDatabaseSplit(DatabaseSplit):
...
@@ -86,25 +86,25 @@ class JSONDatabaseSplit(DatabaseSplit):
A dictionary mapping subset names to lists of JSON objects
A dictionary mapping subset names to lists of JSON objects
"""
"""
if
str
(
self
.
path
).
endswith
(
"
.bz2
"
):
if
str
(
self
.
_
path
).
endswith
(
"
.bz2
"
):
logger
.
debug
(
f
"
Loading database split from
{
str
(
self
.
path
)
}
...
"
)
logger
.
debug
(
f
"
Loading database split from
{
str
(
self
.
_
path
)
}
...
"
)
with
__import__
(
"
bz2
"
).
open
(
self
.
path
)
as
f
:
with
__import__
(
"
bz2
"
).
open
(
self
.
_
path
)
as
f
:
return
json
.
load
(
f
)
return
json
.
load
(
f
)
else
:
else
:
with
self
.
path
.
open
()
as
f
:
with
self
.
_
path
.
open
()
as
f
:
return
json
.
load
(
f
)
return
json
.
load
(
f
)
def
__getitem__
(
self
,
key
:
str
)
->
typing
.
Sequence
[
typing
.
Any
]:
def
__getitem__
(
self
,
key
:
str
)
->
typing
.
Sequence
[
typing
.
Any
]:
"""
Accesses subset ``key`` from this split.
"""
"""
Accesses subset ``key`` from this split.
"""
return
self
.
subsets
[
key
]
return
self
.
_
subsets
[
key
]
def
__iter__
(
self
):
def
__iter__
(
self
):
"""
Iterates over the subsets.
"""
"""
Iterates over the subsets.
"""
return
iter
(
self
.
subsets
)
return
iter
(
self
.
_
subsets
)
def
__len__
(
self
)
->
int
:
def
__len__
(
self
)
->
int
:
"""
How many subsets we currently have.
"""
"""
How many subsets we currently have.
"""
return
len
(
self
.
subsets
)
return
len
(
self
.
_
subsets
)
class
CSVDatabaseSplit
(
DatabaseSplit
):
class
CSVDatabaseSplit
(
DatabaseSplit
):
...
@@ -149,8 +149,8 @@ class CSVDatabaseSplit(DatabaseSplit):
...
@@ -149,8 +149,8 @@ class CSVDatabaseSplit(DatabaseSplit):
assert
(
assert
(
directory
.
is_dir
()
directory
.
is_dir
()
),
f
"
`
{
str
(
directory
)
}
` is not a valid directory
"
),
f
"
`
{
str
(
directory
)
}
` is not a valid directory
"
self
.
directory
=
directory
self
.
_
directory
=
directory
self
.
subsets
=
self
.
_load_split_from_disk
()
self
.
_
subsets
=
self
.
_load_split_from_disk
()
def
_load_split_from_disk
(
self
)
->
DatabaseSplit
:
def
_load_split_from_disk
(
self
)
->
DatabaseSplit
:
"""
Loads all subsets in a split from its file system representation.
"""
Loads all subsets in a split from its file system representation.
...
@@ -168,7 +168,7 @@ class CSVDatabaseSplit(DatabaseSplit):
...
@@ -168,7 +168,7 @@ class CSVDatabaseSplit(DatabaseSplit):
"""
"""
retval
:
DatabaseSplit
=
{}
retval
:
DatabaseSplit
=
{}
for
subset
in
self
.
directory
.
iterdir
():
for
subset
in
self
.
_
directory
.
iterdir
():
if
str
(
subset
).
endswith
(
"
.csv.bz2
"
):
if
str
(
subset
).
endswith
(
"
.csv.bz2
"
):
logger
.
debug
(
f
"
Loading database split from
{
subset
}
...
"
)
logger
.
debug
(
f
"
Loading database split from
{
subset
}
...
"
)
with
__import__
(
"
bz2
"
).
open
(
subset
)
as
f
:
with
__import__
(
"
bz2
"
).
open
(
subset
)
as
f
:
...
@@ -188,15 +188,15 @@ class CSVDatabaseSplit(DatabaseSplit):
...
@@ -188,15 +188,15 @@ class CSVDatabaseSplit(DatabaseSplit):
def
__getitem__
(
self
,
key
:
str
)
->
typing
.
Sequence
[
typing
.
Any
]:
def
__getitem__
(
self
,
key
:
str
)
->
typing
.
Sequence
[
typing
.
Any
]:
"""
Accesses subset ``key`` from this split.
"""
"""
Accesses subset ``key`` from this split.
"""
return
self
.
subsets
[
key
]
return
self
.
_
subsets
[
key
]
def
__iter__
(
self
):
def
__iter__
(
self
):
"""
Iterates over the subsets.
"""
"""
Iterates over the subsets.
"""
return
iter
(
self
.
subsets
)
return
iter
(
self
.
_
subsets
)
def
__len__
(
self
)
->
int
:
def
__len__
(
self
)
->
int
:
"""
How many subsets we currently have.
"""
"""
How many subsets we currently have.
"""
return
len
(
self
.
subsets
)
return
len
(
self
.
_
subsets
)
def
check_database_split_loading
(
def
check_database_split_loading
(
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment