From b5b7ac4ce431e9ac5671ca701dba5893dc910357 Mon Sep 17 00:00:00 2001
From: "sascha.herzinger" <sascha.herzinger@uni.lu>
Date: Tue, 30 Jan 2018 14:00:35 -0500
Subject: [PATCH] added ANOVA stats to boxplots

---
 fractalis/analytics/tasks/boxplot/main.py    | 9 ++++++++-
 tests/unit/analytics/boxplot/test_boxplot.py | 3 +++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/fractalis/analytics/tasks/boxplot/main.py b/fractalis/analytics/tasks/boxplot/main.py
index 76367db..d9af0ae 100644
--- a/fractalis/analytics/tasks/boxplot/main.py
+++ b/fractalis/analytics/tasks/boxplot/main.py
@@ -51,6 +51,7 @@ class BoxplotTask(AnalyticTask):
             'categories': df['category'].unique().tolist(),
             'subsets': df['subset'].unique().tolist()
         }
+        group_values = []
         for feature in results['features']:
             for subset in results['subsets']:
                 for category in results['categories']:
@@ -60,13 +61,19 @@ class BoxplotTask(AnalyticTask):
                     values = [value for value in values if not np.isnan(value)]
                     if len(values) < 2:
                         continue
+                    label = '{}//{}//s{}'.format(feature, category, subset + 1)
+                    group_values.append(values)
                     stats = self.boxplot_statistics(values)
                     kde = scipy.stats.gaussian_kde(values)
                     xs = np.linspace(start=stats['l_wsk'],
                                      stop=stats['u_wsk'], num=100)
                     stats['kde'] = kde(xs).tolist()
-                    label = '{}//{}//s{}'.format(feature, category, subset + 1)
                     results['statistics'][label] = stats
+        f_value, p_value = scipy.stats.f_oneway(*group_values)
+        results['anova'] = {
+            'p_value': p_value,
+            'f_value': f_value
+        }
         return results
 
     @staticmethod
diff --git a/tests/unit/analytics/boxplot/test_boxplot.py b/tests/unit/analytics/boxplot/test_boxplot.py
index ac67dfa..1bd0328 100644
--- a/tests/unit/analytics/boxplot/test_boxplot.py
+++ b/tests/unit/analytics/boxplot/test_boxplot.py
@@ -27,6 +27,9 @@ class TestBoxplotAnalytics:
         json.dumps(results)  # check if result is json serializable
         assert 'data' in results
         assert 'statistics' in results
+        assert 'anova' in results
+        assert results['anova']['p_value'] == 1
+        assert results['anova']['f_value'] == 0
         assert len(json.loads(results['data'])) == 8
         assert len(results['statistics']) == 2
         assert 'foo////s1' in results['statistics']
-- 
GitLab