Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
mednet
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
medai
software
mednet
Commits
a16730af
Commit
a16730af
authored
8 months ago
by
Daniel CARRON
Committed by
André Anjos
8 months ago
Browse files
Options
Downloads
Patches
Plain Diff
[datamodule] More generic sample size computation
parent
6f2b1a75
No related branches found
No related tags found
1 merge request
!46
Create common library
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/mednet/libs/common/data/datamodule.py
+32
-80
32 additions, 80 deletions
src/mednet/libs/common/data/datamodule.py
with
32 additions
and
80 deletions
src/mednet/libs/common/data/datamodule.py
+
32
−
80
View file @
a16730af
...
...
@@ -30,62 +30,13 @@ from .typing import (
logger
=
logging
.
getLogger
(
__name__
)
def
_
tensor
_size_bytes
(
t
:
torch
.
Tensor
)
->
int
:
"""
Re
t
ur
n a tensor siz
e in bytes.
def
_
sample
_size_bytes
(
dataset
:
Sample
)
:
"""
Re
c
ur
se into the first sample of a dataset and figures out its total occupanc
e in bytes.
Parameters
----------
t
A torch Tensor.
Returns
-------
int
The size of the Tensor in bytes.
"""
return
int
(
t
.
element_size
()
*
torch
.
prod
(
torch
.
tensor
(
t
.
shape
)))
def
_sample_size_bytes
(
s
:
Sample
)
->
int
:
"""
Recurse into the sample and figures out its total occupance in bytes.
Parameters
----------
s
The sample to be analyzed.
Returns
-------
int
The size in bytes occupied by this sample.
"""
size
=
sys
.
getsizeof
(
s
[
0
])
# tensor metadata
size
+=
int
(
s
[
0
].
element_size
()
*
torch
.
prod
(
torch
.
tensor
(
s
[
0
].
shape
)))
size
+=
sys
.
getsizeof
(
s
[
1
])
# check each element - if it is a tensor, then adds its total space in
# bytes
for
v
in
s
[
1
].
values
():
if
isinstance
(
v
,
torch
.
Tensor
):
size
+=
_tensor_size_bytes
(
v
)
return
size
def
_sample_dict_size_bytes
(
s
:
Sample
)
->
int
:
"""
Recurse into the sample and figures out its total occupance in bytes.
Parameters
----------
s
The sample to be analyzed.
Returns
-------
int
The size in bytes occupied by this sample.
dataset
The dataset containing the samples to load.
"""
def
_tensor_size_bytes
(
t
:
torch
.
Tensor
)
->
int
:
...
...
@@ -102,42 +53,43 @@ def _sample_dict_size_bytes(s: Sample) -> int:
The size of the Tensor in bytes.
"""
logger
.
info
(
f
"
{
list
(
t
.
shape
)
}
@
{
t
.
dtype
}
"
)
return
int
(
t
.
element_size
()
*
torch
.
prod
(
torch
.
tensor
(
t
.
shape
)))
size
=
sys
.
getsizeof
(
s
[
0
])
# tensor metadata
size
+=
sys
.
getsizeof
(
s
[
1
])
def
_dict_size_bytes
(
d
):
"""
Return a dictionary size in bytes.
# check each element - if it is a tensor, then adds its total space in
# bytes
for
s_
in
s
:
for
v
in
s_
.
values
():
Parameters
----------
d
A dictionary.
Returns
-------
int
The size of the dictionary in bytes.
"""
size
=
0
for
v
in
d
.
values
():
if
isinstance
(
v
,
torch
.
Tensor
):
size
+=
_tensor_size_bytes
(
v
)
return
size
return
size
size
=
0
def
_estimate_data_footprint
(
dataset
):
"""
Compute the estimated memory required to load samples in memory.
Parameters
----------
dataset
The dataset containing the samples to load.
"""
first_sample
=
dataset
[
0
]
logger
.
info
(
"
Delayed loading dataset (first tensor):
"
)
if
isinstance
(
first_sample
[
0
],
dict
):
for
k
,
v
in
first_sample
[
0
].
items
(
):
logger
.
info
(
f
"
{
k
}
:
{
list
(
v
.
shape
)
}
@
{
v
.
dtype
}
"
)
sample_size_mb
=
_sample_dict_size_bytes
(
first_sample
)
/
(
1024.0
*
1024.0
)
logger
.
info
(
f
"
Estimated sample size:
{
sample_size_mb
:
.
1
f
}
Mb
"
)
for
s
in
first_sample
:
size
+=
sys
.
getsizeof
(
s
)
if
isinstance
(
s
,
dict
):
size
+=
_dict_size_bytes
(
s
)
else
:
size
+=
_tensor_size_bytes
(
s
)
else
:
logger
.
info
(
f
"
{
list
(
first_sample
[
0
].
shape
)
}
@
{
first_sample
[
0
].
dtype
}
"
)
sample_size_mb
=
_sample_size_bytes
(
first_sample
)
/
(
1024.0
*
1024.0
)
logger
.
info
(
f
"
Estimated sample size:
{
sample_size_mb
:
.
1
f
}
Mb
"
)
sample_size_mb
=
size
/
(
1024.0
*
1024.0
)
logger
.
info
(
f
"
Estimated sample size:
{
sample_size_mb
:
.
1
f
}
Mb
"
)
def
transform_tensors
(
data
,
transforms
):
...
...
@@ -197,7 +149,7 @@ class _DelayedLoadingDataset(Dataset):
self
.
loader
=
loader
self
.
transform
=
torchvision
.
transforms
.
Compose
(
transforms
)
_
estimate_data_footprint
(
self
)
_
sample_size_bytes
(
self
)
def
__getitem__
(
self
,
key
:
int
)
->
Sample
:
tensor
,
metadata
=
self
.
loader
.
sample
(
self
.
raw_dataset
[
key
])
...
...
@@ -291,7 +243,7 @@ class _CachedDataset(Dataset):
),
)
_
estimate_data_footprint
(
self
)
_
sample_size_bytes
(
self
)
def
targets
(
self
)
->
list
[
int
|
list
[
int
]]:
"""
Return the integer targets for all samples in the dataset.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment