Universal ExperimentAnalyzer (aimclub#181)

* Add convergence analyze * unify iteration through paths * add metrics analyze * add mean, folders_to_ignore * add analyze structural complexity * add logging info * add stat test analysis * add confidence interval * add example of usage * add title for results plots * lend title arg * fix conditional expression * add title to plots * minor * add test data for analyzer * fixes after review * fixes after review * minor * archive data and results * extend docstrings * зуз8 * зуз8 aimclub#2
SoloWayG · Sep 12, 2023 · 9f65481 · 9f65481
1 parent 3234a52
commit 9f65481
Show file tree

Hide file tree

Showing 8 changed files with 410 additions and 3 deletions.
diff --git a/examples/experiment_analyzer/__init__.py b/examples/experiment_analyzer/__init__.py
diff --git a/examples/experiment_analyzer/data.tar.gz b/examples/experiment_analyzer/data.tar.gz
diff --git a/examples/experiment_analyzer/experiment_analyzer.py b/examples/experiment_analyzer/experiment_analyzer.py
@@ -0,0 +1,78 @@
+import os
+import tarfile
+
+import matplotlib.pyplot as plt
+from scipy.stats import mannwhitneyu, kruskal, ttest_ind
+
+from experiments.experiment_analyzer import ExperimentAnalyzer
+from golem.core.paths import project_root
+
+
+if __name__ == '__main__':
+    """ The result of analysis can be seen without running the script in
+    '~/GOLEM/examples/experiment_analyzer/result_analysis.tar.gz'. """
+
+    path_to_root = os.path.join(project_root(), 'examples', 'experiment_analyzer')
+
+    # extract data if there is an archive
+    if 'data.tar.gz' in os.listdir(path_to_root):
+        tar = tarfile.open(os.path.join(path_to_root, 'data.tar.gz'), "r:gz")
+        tar.extractall()
+        tar.close()
+
+    path_to_experiment_data = os.path.join(path_to_root, 'data')
+    path_to_save = os.path.join(path_to_root, 'result_analysis')
+
+    analyzer = ExperimentAnalyzer(path_to_root=path_to_experiment_data, folders_to_ignore=['result_analysis',
+                                                                                           'Thumbs.db'])
+
+    # to get convergence table with mean values
+    path_to_save_convergence = os.path.join(path_to_save, 'convergence')
+
+    convergence_mean = analyzer.analyze_convergence(history_folder='histories', is_raise=False,
+                                                    path_to_save=path_to_save_convergence,
+                                                    is_mean=True)
+
+    # to get convergence boxplots
+    convergence = analyzer.analyze_convergence(history_folder='histories', is_raise=False)
+    path_to_save_convergence_boxplots = os.path.join(path_to_save_convergence, 'convergence_boxplots')
+
+    for dataset in convergence[list(convergence.keys())[0]].keys():
+        to_compare = dict()
+        for setup in convergence.keys():
+            to_compare[setup] = [i for i in convergence[setup][dataset]]
+        plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
+        plt.title(f'Convergence on {dataset}')
+        os.makedirs(path_to_save_convergence_boxplots, exist_ok=True)
+        plt.savefig(os.path.join(path_to_save_convergence_boxplots, f'convergence_{dataset}.png'))
+        plt.close()
+
+    # to get metrics table with mean values
+    path_to_save_metrics = os.path.join(path_to_save, 'metrics')
+    metric_names = ['roc_auc', 'f1']
+    metrics_dict_mean = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
+                                                 is_raise=False, path_to_save=path_to_save_metrics,
+                                                 is_mean=True)
+
+    # to get metrics boxplots
+    metrics_dict = analyzer.analyze_metrics(metric_names=metric_names, file_name='evaluation_results.csv',
+                                            is_raise=False)
+    path_to_save_metrics_boxplots = os.path.join(path_to_save_metrics, 'metrics_boxplot')
+
+    for metric in metric_names:
+        for dataset in metrics_dict[metric][list(metrics_dict[metric].keys())[0]].keys():
+            to_compare = dict()
+            for setup in metrics_dict[metric].keys():
+                to_compare[setup] = [-1 * i for i in metrics_dict[metric][setup][dataset]]
+            plt.boxplot(list(to_compare.values()), labels=list(to_compare.keys()))
+            plt.title(f'{metric} on {dataset}')
+            cur_path_to_save = os.path.join(path_to_save_metrics_boxplots, metric)
+            os.makedirs(cur_path_to_save, exist_ok=True)
+            plt.savefig(os.path.join(cur_path_to_save, f'{metric}_{dataset}.png'))
+            plt.close()
+
+    # to get stat test results table
+    path_to_save_stat = os.path.join(path_to_save, 'statistic')
+    stat_dict = analyzer.analyze_statistical_significance(data_to_analyze=metrics_dict['roc_auc'],
+                                                          stat_tests=[mannwhitneyu, kruskal, ttest_ind],
+                                                          path_to_save=path_to_save_stat)
diff --git a/examples/experiment_analyzer/result_analysis.tar.gz b/examples/experiment_analyzer/result_analysis.tar.gz
diff --git a/experiments/experiment_analyzer.py b/experiments/experiment_analyzer.py
diff --git a/golem/core/dag/graph.py b/golem/core/dag/graph.py
@@ -206,6 +206,7 @@ def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optiona
              node_color: Optional[NodeColorType] = None, dpi: Optional[int] = None,
              node_size_scale: Optional[float] = None, font_size_scale: Optional[float] = None,
              edge_curvature_scale: Optional[float] = None,
+             title: Optional[str] = None,
              nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
         """Visualizes graph or saves its picture to the specified ``path``
 
@@ -217,13 +218,15 @@ def show(self, save_path: Optional[Union[PathLike, str]] = None, engine: Optiona
             font_size_scale: use to make font size bigger or lesser. Supported only for the engine 'matplotlib'.
             edge_curvature_scale: use to make edges more or less curved. Supported only for the engine 'matplotlib'.
             dpi: DPI of the output image. Not supported for the engine 'pyvis'.
+            title: title for plot
             nodes_labels: labels to display near nodes
             edges_labels: labels to display near edges
         """
         GraphVisualizer(graph=self)\
             .visualise(save_path=save_path, engine=engine, node_color=node_color, dpi=dpi,
                        node_size_scale=node_size_scale, font_size_scale=font_size_scale,
                        edge_curvature_scale=edge_curvature_scale,
+                       title=title,
                        nodes_labels=nodes_labels, edges_labels=edges_labels)
 
     @property

diff --git a/golem/visualisation/graph_viz.py b/golem/visualisation/graph_viz.py
@@ -57,6 +57,7 @@ def visualise(self, save_path: Optional[PathType] = None, engine: Optional[str]
                   node_color: Optional[NodeColorType] = None, dpi: Optional[int] = None,
                   node_size_scale: Optional[float] = None,
                   font_size_scale: Optional[float] = None, edge_curvature_scale: Optional[float] = None,
+                  title: Optional[str] = None,
                   nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
         engine = engine or self.get_predefined_value('engine')
 
@@ -67,7 +68,7 @@ def visualise(self, save_path: Optional[PathType] = None, engine: Optional[str]
             self.__draw_with_networkx(save_path=save_path, node_color=node_color, dpi=dpi,
                                       node_size_scale=node_size_scale, font_size_scale=font_size_scale,
                                       edge_curvature_scale=edge_curvature_scale,
-                                      nodes_labels=nodes_labels, edges_labels=edges_labels)
+                                      title=title, nodes_labels=nodes_labels, edges_labels=edges_labels)
         elif engine == 'pyvis':
             self.__draw_with_pyvis(save_path, node_color)
         elif engine == 'graphviz':
@@ -166,7 +167,7 @@ def __draw_with_networkx(self, save_path: Optional[PathType] = None,
                              node_color: Optional[NodeColorType] = None,
                              dpi: Optional[int] = None, node_size_scale: Optional[float] = None,
                              font_size_scale: Optional[float] = None, edge_curvature_scale: Optional[float] = None,
-                             graph_to_nx_convert_func: Optional[Callable] = None,
+                             graph_to_nx_convert_func: Optional[Callable] = None, title: Optional[str] = None,
                              nodes_labels: Dict[int, str] = None, edges_labels: Dict[int, str] = None):
         save_path = save_path or self.get_predefined_value('save_path')
         node_color = node_color or self.get_predefined_value('node_color')
@@ -180,6 +181,7 @@ def __draw_with_networkx(self, save_path: Optional[PathType] = None,
         fig, ax = plt.subplots(figsize=(7, 7))
         fig.set_dpi(dpi)
 
+        plt.title(title)
         self.draw_nx_dag(ax, node_color, node_size_scale, font_size_scale, edge_curvature_scale,
                          graph_to_nx_convert_func, nodes_labels, edges_labels)
         if not save_path:

diff --git a/golem/visualisation/opt_history/fitness_line.py b/golem/visualisation/opt_history/fitness_line.py
@@ -281,11 +281,13 @@ def __init__(self,
 
     def visualize(self,
                   save_path: Optional[Union[os.PathLike, str]] = None,
+                  with_confidence: bool = True,
                   metric_id: int = 0,
                   dpi: Optional[int] = None):
         """ Visualizes the best fitness values during the evolution in the form of line.
         :param save_path: path to save the visualization. If set, then the image will be saved,
             and if not, it will be displayed.
+        :param with_confidence: bool param specifying to use confidence interval or not.
         :param metric_id: numeric index of the metric to visualize (for multi-objective opt-n).
         :param dpi: DPI of the output figure.
         """
@@ -294,7 +296,7 @@ def visualize(self,
 
         fig, ax = plt.subplots(figsize=(6.4, 4.8), facecolor='w')
         xlabel = 'Generation'
-        self.plot_multiple_fitness_lines(ax, metric_id)
+        self.plot_multiple_fitness_lines(ax, metric_id, with_confidence)
         setup_fitness_plot(ax, xlabel)
         plt.legend()
         show_or_save_figure(fig, save_path, dpi)