From b5b7ac4ce431e9ac5671ca701dba5893dc910357 Mon Sep 17 00:00:00 2001 From: "sascha.herzinger" <sascha.herzinger@uni.lu> Date: Tue, 30 Jan 2018 14:00:35 -0500 Subject: [PATCH] added ANOVA stats to boxplots --- fractalis/analytics/tasks/boxplot/main.py | 9 ++++++++- tests/unit/analytics/boxplot/test_boxplot.py | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fractalis/analytics/tasks/boxplot/main.py b/fractalis/analytics/tasks/boxplot/main.py index 76367db..d9af0ae 100644 --- a/fractalis/analytics/tasks/boxplot/main.py +++ b/fractalis/analytics/tasks/boxplot/main.py @@ -51,6 +51,7 @@ class BoxplotTask(AnalyticTask): 'categories': df['category'].unique().tolist(), 'subsets': df['subset'].unique().tolist() } + group_values = [] for feature in results['features']: for subset in results['subsets']: for category in results['categories']: @@ -60,13 +61,19 @@ class BoxplotTask(AnalyticTask): values = [value for value in values if not np.isnan(value)] if len(values) < 2: continue + label = '{}//{}//s{}'.format(feature, category, subset + 1) + group_values.append(values) stats = self.boxplot_statistics(values) kde = scipy.stats.gaussian_kde(values) xs = np.linspace(start=stats['l_wsk'], stop=stats['u_wsk'], num=100) stats['kde'] = kde(xs).tolist() - label = '{}//{}//s{}'.format(feature, category, subset + 1) results['statistics'][label] = stats + f_value, p_value = scipy.stats.f_oneway(*group_values) + results['anova'] = { + 'p_value': p_value, + 'f_value': f_value + } return results @staticmethod diff --git a/tests/unit/analytics/boxplot/test_boxplot.py b/tests/unit/analytics/boxplot/test_boxplot.py index ac67dfa..1bd0328 100644 --- a/tests/unit/analytics/boxplot/test_boxplot.py +++ b/tests/unit/analytics/boxplot/test_boxplot.py @@ -27,6 +27,9 @@ class TestBoxplotAnalytics: json.dumps(results) # check if result is json serializable assert 'data' in results assert 'statistics' in results + assert 'anova' in results + assert results['anova']['p_value'] == 1 + assert results['anova']['f_value'] == 0 assert len(json.loads(results['data'])) == 8 assert len(results['statistics']) == 2 assert 'foo////s1' in results['statistics'] -- GitLab