Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
deepdraw
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
medai
software
deepdraw
Commits
c6b0ad72
Commit
c6b0ad72
authored
5 years ago
by
André Anjos
Browse files
Options
Downloads
Patches
Plain Diff
[utils.resources] Fix gpu logging
parent
6ad254bf
No related branches found
No related tags found
1 merge request
!12
Streamlining
Pipeline
#39251
failed
5 years ago
Stage: build
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
bob/ip/binseg/engine/trainer.py
+5
-5
5 additions, 5 deletions
bob/ip/binseg/engine/trainer.py
bob/ip/binseg/utils/resources.py
+77
-57
77 additions, 57 deletions
bob/ip/binseg/utils/resources.py
with
82 additions
and
62 deletions
bob/ip/binseg/engine/trainer.py
+
5
−
5
View file @
c6b0ad72
...
@@ -13,7 +13,7 @@ from tqdm import tqdm
...
@@ -13,7 +13,7 @@ from tqdm import tqdm
from
..utils.metric
import
SmoothedValue
from
..utils.metric
import
SmoothedValue
from
..utils.summary
import
summary
from
..utils.summary
import
summary
from
..utils.resources
import
cpu_
info
,
gpu_info
,
cpu_log
,
gpu_log
from
..utils.resources
import
cpu_
constants
,
gpu_constants
,
cpu_log
,
gpu_log
import
logging
import
logging
...
@@ -79,7 +79,7 @@ def run(
...
@@ -79,7 +79,7 @@ def run(
if
device
!=
"
cpu
"
:
if
device
!=
"
cpu
"
:
# asserts we do have a GPU
# asserts we do have a GPU
assert
bool
(
gpu_
info
()),
(
assert
bool
(
gpu_
constants
()),
(
f
"
Device set to
'
{
device
}
'
, but cannot
"
f
"
Device set to
'
{
device
}
'
, but cannot
"
f
"
find a GPU (maybe nvidia-smi is not installed?)
"
f
"
find a GPU (maybe nvidia-smi is not installed?)
"
)
)
...
@@ -104,10 +104,10 @@ def run(
...
@@ -104,10 +104,10 @@ def run(
os
.
unlink
(
backup
)
os
.
unlink
(
backup
)
shutil
.
move
(
static_logfile_name
,
backup
)
shutil
.
move
(
static_logfile_name
,
backup
)
with
open
(
static_logfile_name
,
"
w
"
,
newline
=
""
)
as
f
:
with
open
(
static_logfile_name
,
"
w
"
,
newline
=
""
)
as
f
:
logdata
=
cpu_
info
()
logdata
=
cpu_
constants
()
if
device
!=
"
cpu
"
:
if
device
!=
"
cpu
"
:
logdata
+=
gpu_
info
()
logdata
+=
gpu_
constants
()
logdata
=
((
"
model_size
"
,
n
),)
+
logdata
logdata
+
=
((
"
model_size
"
,
n
),)
logwriter
=
csv
.
DictWriter
(
f
,
fieldnames
=
[
k
[
0
]
for
k
in
logdata
])
logwriter
=
csv
.
DictWriter
(
f
,
fieldnames
=
[
k
[
0
]
for
k
in
logdata
])
logwriter
.
writeheader
()
logwriter
.
writeheader
()
logwriter
.
writerow
(
dict
(
k
for
k
in
logdata
))
logwriter
.
writerow
(
dict
(
k
for
k
in
logdata
))
...
...
This diff is collapsed.
Click to expand it.
bob/ip/binseg/utils/resources.py
+
77
−
57
View file @
c6b0ad72
...
@@ -4,7 +4,6 @@
...
@@ -4,7 +4,6 @@
"""
Tools for interacting with the running computer or GPU
"""
"""
Tools for interacting with the running computer or GPU
"""
import
os
import
os
import
re
import
subprocess
import
subprocess
import
shutil
import
shutil
...
@@ -17,29 +16,13 @@ logger = logging.getLogger(__name__)
...
@@ -17,29 +16,13 @@ logger = logging.getLogger(__name__)
_nvidia_smi
=
shutil
.
which
(
"
nvidia-smi
"
)
_nvidia_smi
=
shutil
.
which
(
"
nvidia-smi
"
)
"""
Location of the nvidia-smi program, if one exists
"""
"""
Location of the nvidia-smi program, if one exists
"""
_nvidia_starter_query
=
(
# obtain possible values with ``nvidia-smi --help-query-gpu``
"
gpu_name
"
,
"
driver_version
"
,
"
memory.total
"
,
)
"""
Query parameters for logging static GPU information
"""
_nvidia_log_query
=
(
# obtain possible values with ``nvidia-smi --help-query-gpu``
"
memory.used
"
,
"
memory.free
"
,
"
utilization.memory
"
,
"
utilization.gpu
"
,
)
"""
Query parameters for logging performance of GPU
"""
GB
=
float
(
2
**
30
)
GB
=
float
(
2
**
30
)
"""
The number of bytes in a gigabyte
"""
"""
The number of bytes in a gigabyte
"""
def
gpu_info
(
query
=
_nvidia_starter_query
):
def
run_nvidia_smi
(
query
,
rename
=
None
):
"""
Returns GPU
(static)
information
using nvidia-smi
"""
Returns GPU information
from query
For a comprehensive list of options and help, execute ``nvidia-smi
For a comprehensive list of options and help, execute ``nvidia-smi
--help-query-gpu`` on a host with a GPU
--help-query-gpu`` on a host with a GPU
...
@@ -51,75 +34,112 @@ def gpu_info(query=_nvidia_starter_query):
...
@@ -51,75 +34,112 @@ def gpu_info(query=_nvidia_starter_query):
query : list
query : list
A list of query strings as defined by ``nvidia-smi --help-query-gpu``
A list of query strings as defined by ``nvidia-smi --help-query-gpu``
rename : :py:class:`list`, Optional
A list of keys to yield in the return value for each entry above. It
gives you the opportunity to rewrite some key names for convenience.
This list, if provided, must be of the same length as ``query``.
Returns
Returns
-------
-------
data :
tupl
e
data :
:py:class:`tuple`, Non
e
An ordered dictionary (organized as 2-tuples) containing the queried
An ordered dictionary (organized as 2-tuples) containing the queried
parameters. If ``nvidia-smi`` is not available,
returns a list of
parameters
(``rename`` versions)
. If ``nvidia-smi`` is not available,
``None`` objects. Dots and underscores in the original NVIDIA naming
returns ``None``. Percentage information is left alone,
convention are normalized with dashes
.
memory information is transformed to gigabytes (floating-point)
.
"""
"""
if
_nvidia_smi
is
not
None
:
if
_nvidia_smi
is
not
None
:
if
rename
is
None
:
rename
=
query
else
:
assert
len
(
rename
)
==
len
(
query
)
values
=
subprocess
.
getoutput
(
values
=
subprocess
.
getoutput
(
"
%s --query-gpu=%s --format=csv,noheader
"
"
%s --query-gpu=%s --format=csv,noheader
"
%
(
_nvidia_smi
,
"
,
"
.
join
(
query
))
%
(
_nvidia_smi
,
"
,
"
.
join
(
query
))
)
)
values
=
[
k
.
strip
()
for
k
in
values
.
split
(
"
,
"
)]
values
=
[
k
.
strip
()
for
k
in
values
.
split
(
"
,
"
)]
regexp
=
re
.
compile
(
r
"
(\.|-)
"
)
t_values
=
[]
fieldnames
=
[
regexp
.
sub
(
"
_
"
,
k
)
for
k
in
query
]
for
k
in
values
:
return
tuple
(
zip
(
fieldnames
,
values
))
if
k
.
endswith
(
"
%
"
):
t_values
.
append
(
float
(
k
[:
-
1
].
strip
()))
elif
k
.
endswith
(
"
MiB
"
):
t_values
.
append
(
float
(
k
[:
-
3
].
strip
())
/
1024
)
else
:
t_values
.
append
(
k
)
#unchanged
return
tuple
(
zip
(
rename
,
t_values
))
def
gpu_
log
(
query
=
_nvidia_log_query
):
def
gpu_
constants
(
):
"""
Returns GPU information
about current non-static status
using nvidia-smi
"""
Returns GPU
(static)
information using nvidia-smi
For a comprehensive list of options and help, execute ``nvidia-smi
See :py:func:`run_nvidia_smi` for operational details.
--help-query-gpu`` on a host with a GPU
Returns
-------
Parameters
data : :py:class:`tuple`, None
----------
If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
return an ordered dictionary (organized as 2-tuples) containing the
following ``nvidia-smi`` query information:
query : list
* ``gpu_name``, as ``gpu_name`` (:py:class:`str`)
A list of query strings as defined by ``nvidia-smi --help-query-gpu``
* ``driver_version``, as ``gpu_driver_version`` (:py:class:`str`)
* ``memory.total``, as ``gpu_memory_total`` (transformed to gigabytes,
:py:class:`float`)
"""
return
run_nvidia_smi
(
(
"
gpu_name
"
,
"
driver_version
"
,
"
memory.total
"
),
(
"
gpu_name
"
,
"
gpu_driver_version
"
,
"
gpu_memory_total
"
),
)
def
gpu_log
():
"""
Returns GPU information about current non-static status using nvidia-smi
See :py:func:`run_nvidia_smi` for operational details.
Returns
Returns
-------
-------
data : tuple
data : :py:class:`tuple`, None
An ordered dictionary (organized as 2-tuples) containing the queried
If ``nvidia-smi`` is not available, returns ``None``, otherwise, we
parameters. If ``nvidia-smi`` is not available, returns a list of
return an ordered dictionary (organized as 2-tuples) containing the
``None`` objects. Dots and underscores in the original NVIDIA naming
following ``nvidia-smi`` query information:
convention are normalized with dashes. Percentage information is left
alone, memory information is transformed in to gigabytes.
"""
* ``memory.used``, as ``gpu_memory_used`` (transformed to gigabytes,
:py:class:`float`)
* ``memory.free``, as ``gpu_memory_free`` (transformed to gigabytes,
:py:class:`float`)
* ``utilization.memory``, as ``gpu_memory_percent``,
(:py:class:`float`, in percent)
* ``utilization.gpu``, as ``gpu_utilization``,
(:py:class:`float`, in percent)
if
_nvidia_smi
is
not
None
:
"""
values
=
subprocess
.
getoutput
(
"
%s --query-gpu=%s --format=csv,noheader
"
return
run_nvidia_smi
(
%
(
_nvidia_smi
,
"
,
"
.
join
(
query
))
(
"
memory.used
"
,
"
memory.free
"
,
"
utilization.memory
"
,
"
utilization.gpu
"
),
)
(
values
=
[
k
.
strip
()
for
k
in
values
.
split
(
"
,
"
)]
"
gpu_memory_used
"
,
t_values
=
[]
"
gpu_memory_free
"
,
for
k
in
values
:
"
gpu_memory_percent
"
,
if
k
.
endswith
(
'
%
'
):
t_values
.
append
(
k
[:
-
1
].
strip
())
"
gpu_percent
"
,
elif
k
.
endswith
(
'
MiB
'
):
t_values
.
append
(
float
(
k
[:
-
3
].
strip
())
/
1024
)
),
regexp
=
re
.
compile
(
r
"
(\.|-)
"
)
)
fieldnames
=
[
regexp
.
sub
(
"
_
"
,
k
)
for
k
in
query
]
return
tuple
(
zip
(
fieldnames
,
values
))
_CLUSTER
=
[]
_CLUSTER
=
[]
"""
List of processes currently being monitored
"""
"""
List of processes currently being monitored
"""
def
cpu_
info
():
def
cpu_
constants
():
"""
Returns static CPU information about the current system.
"""
Returns static CPU information about the current system.
...
@@ -172,7 +192,7 @@ def cpu_log():
...
@@ -172,7 +192,7 @@ def cpu_log():
"""
"""
global
_CLUSTER
global
_CLUSTER
if
(
not
_CLUSTER
)
or
(
_CLUSTER
[
0
]
!=
psutil
.
Process
()):
#initialization
if
(
not
_CLUSTER
)
or
(
_CLUSTER
[
0
]
!=
psutil
.
Process
()):
#
initialization
this
=
psutil
.
Process
()
this
=
psutil
.
Process
()
_CLUSTER
=
[
this
]
+
this
.
children
(
recursive
=
True
)
_CLUSTER
=
[
this
]
+
this
.
children
(
recursive
=
True
)
# touch cpu_percent() at least once for all
# touch cpu_percent() at least once for all
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment