Skip to content
Snippets Groups Projects
Commit cc0b6c3b authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[scripts.train_analysis] Simplify and remove pandas requirements

parent c15ea640
No related branches found
No related tags found
1 merge request!6Making use of LightningDataModule and simplification of data loading
......@@ -26,7 +26,6 @@ requirements:
- click {{ click }}
- matplotlib {{ matplotlib }}
- numpy {{ numpy }}
- pandas {{ pandas }}
- pillow {{ pillow }}
- psutil {{ psutil }}
- pytorch {{ pytorch }}
......@@ -43,7 +42,6 @@ requirements:
- {{ pin_compatible('click') }}
- {{ pin_compatible('matplotlib') }}
- {{ pin_compatible('numpy') }}
- {{ pin_compatible('pandas') }}
- {{ pin_compatible('pillow') }}
- {{ pin_compatible('psutil') }}
- {{ pin_compatible('pytorch') }}
......
......@@ -117,7 +117,6 @@ autodoc_default_options = {
auto_intersphinx_packages = [
"matplotlib",
"numpy",
"pandas",
"pillow",
"psutil",
"scipy",
......
......@@ -30,7 +30,6 @@ dependencies = [
"clapper",
"click",
"numpy",
"pandas",
"scipy",
"scikit-learn",
"tqdm",
......
......@@ -5,18 +5,35 @@
import pathlib
import click
import matplotlib.figure
import matplotlib.pyplot as plt
import pandas
from clapper.click import verbosity_option
from clapper.logging import setup
from matplotlib.ticker import MaxNLocator
# avoids X11/graphical desktop requirement when creating plots
__import__("matplotlib").use("agg")
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
def create_figures(df: pandas.DataFrame) -> list[matplotlib.figure.Figure]:
def create_figures(
data: dict[str, tuple[list[int], list[float]]],
groups: list[str] = [
"total-execution-time-seconds",
"loss/*",
"learning-rate",
"memory-used-GB/cpu/*" "rss-GB/cpu/*",
"vms-GB/cpu/*",
"num-open-files/cpu/*",
"num-processes/cpu/*",
"percent-usage/cpu/*",
# nvidia gpu
"memory-percent/gpu/*",
"memory-used-GB/gpu/*",
"memory-free-GB/gpu/*",
"memory-free-GB/gpu/*",
"percent-usage/gpu/*",
],
) -> list:
"""Generates figures for each metric in the dataframe.
Each row of the dataframe correspond to an epoch and each column to a metric.
......@@ -27,8 +44,15 @@ def create_figures(df: pandas.DataFrame) -> list[matplotlib.figure.Figure]:
Parameters
----------
df:
Pandas dataframe containing the data to plot.
data:
A dictionary where keys represent all scalar names, and values
correspond to a tuple that contains an array with epoch numbers (when
values were taken), when the monitored values themselves. These lists
are pre-sorted by epoch number.
groups:
A list of scalar globs we are interested on the existing tensorboard
data, for plotting. Values with multiple matches are drawn on the same
plot. Values that do not exist are ignored.
Returns
......@@ -37,49 +61,46 @@ def create_figures(df: pandas.DataFrame) -> list[matplotlib.figure.Figure]:
figures:
List of matplotlib figures, one per metric.
"""
import fnmatch
import typing
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
figures = []
labels = sorted(df.columns)
from collections import defaultdict
# Dict of metric: subset. Subset can be None.
metrics_groups = defaultdict(list)
for label in labels:
# Separate the name of the subset from the metric
split_label = label.rsplit("/", 1)
metric = split_label[0]
subset = split_label[1] if len(split_label) > 1 else None
metrics_groups[metric].append(subset)
for metric, subsets in metrics_groups.items():
figure = plt.figure()
axes = figure.gca()
for subset in subsets:
if subset is None:
axes.plot(
df["step"].values,
df[metric],
label=metric,
)
else:
axes.plot(
df["step"].values,
df[metric + "/" + subset],
label=subset,
)
axes.xaxis.set_major_locator(MaxNLocator(integer=True))
axes.set_title(metric)
axes.set_xlabel("Epoch")
axes.legend(loc="best")
axes.grid(alpha=0.3)
figure.set_layout_engine("tight")
figures.append(figure)
for group in groups:
curves = {k: data[k] for k in fnmatch.filter(data.keys(), group)}
if len(curves) == 0:
continue
fig, ax = plt.subplots(1, 1)
ax = typing.cast(plt.Axes, ax)
fig = typing.cast(plt.Figure, fig)
if len(curves) == 1:
# there is only one curve, just plot it
title, (epochs, values) = next(iter(curves.items()))
ax.plot(epochs, values)
else:
# this is an aggregate plot, name things consistently
labels = {k: k[len(group) - 1 :] for k in curves.keys()}
title = group.rstrip("*").rstrip("/")
for key, (epochs, values) in curves.items():
ax.plot(epochs, values, label=labels[key])
ax.legend(loc="best")
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.set_title(title)
ax.set_xlabel("Epoch")
ax.set_ylabel(title)
ax.grid(alpha=0.3)
fig.tight_layout()
figures.append(fig)
return figures
......@@ -116,11 +137,13 @@ def train_analysis(
"""Creates a plot for each metric in the training logs and saves them in a
pdf file."""
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from ..utils.tensorboard import get_scalars
from ..utils.tensorboard import scalars_to_dict
data = get_scalars(logdir)
data = scalars_to_dict(logdir)
output.parent.mkdir(parents=True, exist_ok=True)
......
......@@ -3,52 +3,50 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import pathlib
import typing
import pandas
from tensorboard.backend.event_processing.event_accumulator import (
EventAccumulator,
)
def get_scalars(logdir: pathlib.Path) -> pandas.DataFrame:
def scalars_to_dict(
logdir: pathlib.Path,
) -> dict[str, tuple[list[int], list[float]]]:
"""Returns scalars stored in tensorboard event files.
This method will gather all tensorboard event files produced by a training
run, and will return a dictionary with all collected scalars, ready for
plotting.
Parameters
----------
logdir:
logdir
Directory containing the event files.
Returns
-------
data:
Pandas dataframe containing the results. Rows correspond to an epoch,
columns to the metrics.
A dictionary where keys represent all scalar names, and values
correspond to a tuple that contains an array with epoch numbers (when
values were taken), when the monitored values themselves. The lists
are pre-sorted by epoch number.
"""
tensorboard_logs = sorted(logdir.glob("events.out.tfevents.*"))
retval: dict[str, tuple[list[int], list[float]]] = {}
data: dict[str, dict[str, typing.Any]] = {}
headers = {"step"}
for logfile in tensorboard_logs:
for logfile in sorted(logdir.glob("events.out.tfevents.*")):
event_accumulator = EventAccumulator(str(logfile))
event_accumulator.Reload()
tags = event_accumulator.Tags()
# Can cause issues if different logfiles don't have the same tags
for scalar_tag in tags["scalars"]:
headers.add(scalar_tag)
tag_list = event_accumulator.Scalars(scalar_tag)
for tag_data in tag_list:
_ = tag_data.wall_time
step = tag_data.step
value = tag_data.value
for tag in event_accumulator.Tags()["scalars"]:
steps, values = retval.setdefault(tag, ([], []))
for data_point in event_accumulator.Scalars(tag):
steps.append(data_point.step)
values.append(data_point.value)
data.setdefault(step, {"step": step})["step"] = step
data.setdefault(step, {scalar_tag: value})[scalar_tag] = value
# reorder according to step number
for key, (steps, values) in retval.items():
_steps, _values = zip(*sorted(zip(steps, values)))
retval[key] = (list(_steps), list(_values)) # type: ignore
return pandas.DataFrame.from_dict(data, orient="index")
return retval
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment