Skip to content

Commit

Permalink
Universal ExperimentAnalyzer (aimclub#181)
Browse files Browse the repository at this point in the history
* Add convergence analyze

* unify iteration through paths

* add metrics analyze

* add mean, folders_to_ignore

* add analyze structural complexity

* add logging info

* add stat test analysis

* add confidence interval

* add example of usage

* add title for results plots

* lend title arg

* fix conditional expression

* add title to plots

* minor

* add test data for analyzer

* fixes after review

* fixes after review

* minor

* archive data and results

* extend docstrings

* зуз8

* зуз8 aimclub#2
  • Loading branch information
maypink authored Sep 12, 2023
1 parent 3234a52 commit 9f65481
Show file tree
Hide file tree
Showing 8 changed files with 410 additions and 3 deletions.
Empty file.
Binary file added examples/experiment_analyzer/data.tar.gz
Binary file not shown.
78 changes: 78 additions & 0 deletions examples/experiment_analyzer/experiment_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import os
import tarfile

import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu, kruskal, ttest_ind

from experiments.experiment_analyzer import ExperimentAnalyzer
from golem.core.paths import project_root


if __name__ == '__main__':
""" The result of analysis can be seen without running the script in
'~/GOLEM/examples/experiment_analyzer/result_analysis.tar.gz'. """

path_to_root = os.path.join(project_root(), 'examples', 'experiment_analyzer')

# extract data if there is an archive
if 'data.tar.gz' in os.listdir(path_to_root):
tar = tarfile.open(os.path.join(path_to_root, 'data.tar.gz'), "r:gz")
tar.extractall()
tar.close()

path_to_experiment_data = os.path.join(path_to_root, 'data')
path_to_save = os.path.join(path_to_root, 'result_analysis')

analyzer = ExperimentAnalyzer(path_to_root=path_to_experiment_data, folders_to_ignore=['result_analysis',
'Thumbs.db'])

# to get convergence table with mean values
path_to_save_convergence = os.path.join(path_to_save, 'convergence')

convergence_mean = analyzer.analyze_convergence(history_folder='histories', is_raise=False,
path_to_save=path_to_save_convergence,
is_mean=True)

# to get convergence boxplots
convergence = analyzer.analyze_convergence(history_folder='histories', is_raise=False)
path_to_save_convergence_boxplots = os.path.join(path_to_save_convergence, 'convergence_boxplots')

for dataset in convergence[list(convergence.keys())[0]].keys():
to_compare = dict()
for setup in convergence.keys():
to_compare[setup] = [i for i in convergence[setup][dataset]]
plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
plt.title(f'Convergence on {dataset}')
os.makedirs(path_to_save_convergence_boxplots, exist_ok=True)
plt.savefig(os.path.join(path_to_save_convergence_boxplots, f'convergence_{dataset}.png'))
plt.close()

# to get metrics table with mean values
path_to_save_metrics = os.path.join(path_to_save, 'metrics')
metric_names = ['roc_auc', 'f1']
metrics_dict_mean = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
is_raise=False, path_to_save=path_to_save_metrics,
is_mean=True)

# to get metrics boxplots
metrics_dict = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
is_raise=False)
path_to_save_metrics_boxplots = os.path.join(path_to_save_metrics, 'metrics_boxplot')

for metric in metric_names:
for dataset in metrics_dict[metric][list(metrics_dict[metric].keys())[0]].keys():
to_compare = dict()
for setup in metrics_dict[metric].keys():
to_compare[setup] = [-1 * i for i in metrics_dict[metric][setup][dataset]]
plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
plt.title(f'{metric} on {dataset}')
cur_path_to_save = os.path.join(path_to_save_metrics_boxplots, metric)
os.makedirs(cur_path_to_save, exist_ok=True)
plt.savefig(os.path.join(cur_path_to_save, f'{metric}_{dataset}.png'))
plt.close()

# to get stat test results table
path_to_save_stat = os.path.join(path_to_save, 'statistic')
stat_dict = analyzer.analyze_statistical_significance(data_to_analyze=metrics_dict['roc_auc'],
stat_tests=[mannwhitneyu, kruskal, ttest_ind],
path_to_save=path_to_save_stat)
Binary file not shown.
322 changes: 322 additions & 0 deletions experiments/experiment_analyzer.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions golem/core/dag/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optiona
node_color: Optional[NodeColorType] = None, dpi: Optional[int] = None,
node_size_scale: Optional[float] = None, font_size_scale: Optional[float] = None,
edge_curvature_scale: Optional[float] = None,
title: Optional[str] = None,
nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
"""Visualizes graph or saves its picture to the specified ``path``
Expand All @@ -217,13 +218,15 @@ def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optiona
font_size_scale: use to make font size bigger or lesser. Supported only for the engine 'matplotlib'.
edge_curvature_scale: use to make edges more or less curved. Supported only for the engine 'matplotlib'.
dpi: DPI of the output image. Not supported for the engine 'pyvis'.
title: title for plot
nodes_labels: labels to display near nodes
edges_labels: labels to display near edges
"""
GraphVisualizer(graph=self)\
.visualise(save_path=save_path, engine=engine, node_color=node_color, dpi=dpi,
node_size_scale=node_size_scale, font_size_scale=font_size_scale,
edge_curvature_scale=edge_curvature_scale,
title=title,
nodes_labels=nodes_labels, edges_labels=edges_labels)

@property
Expand Down
6 changes: 4 additions & 2 deletions golem/visualisation/graph_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def visualise(self, save_path: Optional[PathType] = None, engine: Optional[str]
node_color: Optional[NodeColorType] = None, dpi: Optional[int] = None,
node_size_scale: Optional[float] = None,
font_size_scale: Optional[float] = None, edge_curvature_scale: Optional[float] = None,
title: Optional[str] = None,
nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
engine = engine or self.get_predefined_value('engine')

Expand All @@ -67,7 +68,7 @@ def visualise(self, save_path: Optional[PathType] = None, engine: Optional[str]
self.__draw_with_networkx(save_path=save_path, node_color=node_color, dpi=dpi,
node_size_scale=node_size_scale, font_size_scale=font_size_scale,
edge_curvature_scale=edge_curvature_scale,
nodes_labels=nodes_labels, edges_labels=edges_labels)
title=title, nodes_labels=nodes_labels, edges_labels=edges_labels)
elif engine == 'pyvis':
self.__draw_with_pyvis(save_path, node_color)
elif engine == 'graphviz':
Expand Down Expand Up @@ -166,7 +167,7 @@ def __draw_with_networkx(self, save_path: Optional[PathType] = None,
node_color: Optional[NodeColorType] = None,
dpi: Optional[int] = None, node_size_scale: Optional[float] = None,
font_size_scale: Optional[float] = None, edge_curvature_scale: Optional[float] = None,
graph_to_nx_convert_func: Optional[Callable] = None,
graph_to_nx_convert_func: Optional[Callable] = None, title: Optional[str] = None,
nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
save_path = save_path or self.get_predefined_value('save_path')
node_color = node_color or self.get_predefined_value('node_color')
Expand All @@ -180,6 +181,7 @@ def __draw_with_networkx(self, save_path: Optional[PathType] = None,
fig, ax = plt.subplots(figsize=(7, 7))
fig.set_dpi(dpi)

plt.title(title)
self.draw_nx_dag(ax, node_color, node_size_scale, font_size_scale, edge_curvature_scale,
graph_to_nx_convert_func, nodes_labels, edges_labels)
if not save_path:
Expand Down
4 changes: 3 additions & 1 deletion golem/visualisation/opt_history/fitness_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,13 @@ def __init__(self,

def visualize(self,
save_path: Optional[Union[os.PathLike, str]] = None,
with_confidence: bool = True,
metric_id: int = 0,
dpi: Optional[int] = None):
""" Visualizes the best fitness values during the evolution in the form of line.
:param save_path: path to save the visualization. If set, then the image will be saved,
and if not, it will be displayed.
:param with_confidence: bool param specifying to use confidence interval or not.
:param metric_id: numeric index of the metric to visualize (for multi-objective opt-n).
:param dpi: DPI of the output figure.
"""
Expand All @@ -294,7 +296,7 @@ def visualize(self,

fig, ax = plt.subplots(figsize=(6.4, 4.8), facecolor='w')
xlabel = 'Generation'
self.plot_multiple_fitness_lines(ax, metric_id)
self.plot_multiple_fitness_lines(ax, metric_id, with_confidence)
setup_fitness_plot(ax, xlabel)
plt.legend()
show_or_save_figure(fig, save_path, dpi)
Expand Down

0 comments on commit 9f65481

Please sign in to comment.