Skip to content
Snippets Groups Projects
Commit 4147ecbb authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

heatmap zscores

parent b75f21a0
No related branches found
No related tags found
No related merge requests found
Pipeline #
"""Module containing analysis code for heatmap analytics."""
from typing import List
from typing import List, TypeVar
from functools import reduce
import pandas as pd
from scipy.stats import zscore
from fractalis.analytics.task import AnalyticTask
T = TypeVar('T')
class HeatmapTask(AnalyticTask):
"""Heatmap Analysis Task implementing AnalyticsTask. This class is a
submittable celery task."""
......@@ -16,8 +20,29 @@ class HeatmapTask(AnalyticTask):
def main(self, numerical_arrays: List[pd.DataFrame],
numericals: List[pd.DataFrame],
categoricals: List[pd.DataFrame]) -> dict:
categoricals: List[pd.DataFrame],
subsets: List[List[T]]) -> dict:
df = reduce(lambda a, b: a.append(b), numerical_arrays)
variables = df['variable']
df = df.drop('variable', axis=1)
zscores = df.apply(zscore, axis=1)
#prepare output for front-end
df = df.transpose()
df.columns = variables
df.index.name = 'id'
df.reset_index(inplace=True)
df = pd.melt(df, id_vars='id')
zscores = zscores.transpose()
zscores.columns = variables
zscores.index.name = 'id'
zscores.reset_index(inplace=True)
zscores = pd.melt(zscores, id_vars='id')
df = pd.merge(df, zscores, on=['id', 'variable'])
df.columns = ['id', 'variable', 'value', 'zscore']
return {
'data': df.to_json(orient='index')
}
\ No newline at end of file
......@@ -32,9 +32,15 @@ class DoubleArrayETL(ETL):
def transform(self, raw_data: List[dict], descriptor: dict) -> pd.DataFrame:
data = shared.prepare_ids(raw_data)
name = descriptor['dictionary']['name']
df = [[row['id']] + row[name] for row in raw_data]
colnames = ['id'] + list(range(len(df[0]) - 1))
df = pd.DataFrame(df, columns=colnames)
df = pd.melt(df, id_vars=['id'])
ids = []
values = []
for row in raw_data:
ids.append(row['id'])
values.append(row[name])
df = pd.DataFrame(values)
df = df.transpose()
df.columns = ids
variables = pd.Series(range(df.shape[0]))
df.insert(0, 'variable', variables)
return df
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment