diff --git a/fractalis/__init__.py b/fractalis/__init__.py index d32c558ad6d4ed3a60dd546d7bed410e40bf19ac..91785facb59faf8b15aff0eaf8387904ba629964 100644 --- a/fractalis/__init__.py +++ b/fractalis/__init__.py @@ -10,6 +10,7 @@ import yaml from flask import Flask from flask_cors import CORS from flask_request_id import RequestID +from flask_compress import Compress from redis import StrictRedis from fractalis.session import RedisSessionInterface @@ -40,6 +41,9 @@ if default_config: # Plugin that assigns every request an id RequestID(app) +# Plugin that compresses all responses +Compress(app) + # create a redis instance log.info("Creating Redis connection.") redis = StrictRedis(host=app.config['REDIS_HOST'], diff --git a/fractalis/analytics/controller.py b/fractalis/analytics/controller.py index 1f937360855ab9a605a0eead03ac8314c76cf7c0..089c2bc1d4a857444de363e9269c6e5862acddb0 100644 --- a/fractalis/analytics/controller.py +++ b/fractalis/analytics/controller.py @@ -1,5 +1,6 @@ """The /analytics controller. Please refer to doc/api for more information.""" +import json import logging from typing import Tuple from uuid import UUID diff --git a/fractalis/analytics/task.py b/fractalis/analytics/task.py index 2163f1952fcd5b66cdc2e0abf03d65de8cacc5f9..2ec9382a9d5f025d8c4aa98843d4b849e1874ea5 100644 --- a/fractalis/analytics/task.py +++ b/fractalis/analytics/task.py @@ -120,7 +120,9 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta): :param value: The string to test. :return: True if argument contains data_task_id. """ - return value.startswith('$') and value.endswith('$') + return isinstance(value, str) and \ + value.startswith('$') and \ + value.endswith('$') @staticmethod def parse_value(value: str) -> Tuple[str, dict]: @@ -157,7 +159,7 @@ class AnalyticTask(Task, metaclass=abc.ABCMeta): value = args[arg] # value is data id - if isinstance(value, str) and self.contains_data_task_id(value): + if self.contains_data_task_id(value): data_task_id, filters = self.parse_value(value) df = self.data_task_id_to_data_frame( data_task_id, session_data_tasks, decrypt) diff --git a/fractalis/analytics/tasks/heatmap/main.py b/fractalis/analytics/tasks/heatmap/main.py index 9f0f811a0b6f9c339ccb2b9b5ea9ac0fef2cd97b..8c523a8e6f566fa6e310def70624335538ccc04c 100644 --- a/fractalis/analytics/tasks/heatmap/main.py +++ b/fractalis/analytics/tasks/heatmap/main.py @@ -51,32 +51,41 @@ class HeatmapTask(AnalyticTask): raise ValueError(error) # make matrix of input data - _df = df.pivot(index='feature', columns='id', values='value') + df = df.pivot(index='feature', columns='id', values='value') # create z-score matrix used for visualising the heatmap - z_df = _df.apply(lambda row: (row - row.mean()) / row.std(ddof=0), - axis=1) + z_df = [(df.iloc[i] - df.iloc[i].mean()) / df.iloc[i].std(ddof=0) + for i in range(df.shape[0])] + z_df = pd.DataFrame(z_df, columns=df.columns, index=df.index) # compute statistic for ranking - stats = self.stat_task.main(df=_df, subsets=subsets, + stats = self.stat_task.main(df=df, subsets=subsets, ranking_method=ranking_method) - del _df - - # prepare output for front-end - z_df['feature'] = z_df.index - z_df = pd.melt(z_df, id_vars='feature') - df = df.merge(z_df, on=['id', 'feature']) - df.columns = ['id', 'feature', 'value', 'zscore'] # sort by ranking_value - df['sort_value'] = df['feature'].apply( - lambda x: stats[stats['feature'] == x][ranking_method].tolist()[0]) - df = df.sort_values('sort_value', ascending=False).drop('sort_value', 1) + df = pd.merge(df, stats[['feature', ranking_method]], how='left', + left_index=True, right_on='feature') + df = df.sort_values(ranking_method, ascending=False) \ + .drop(ranking_method, axis=1) + + z_df = pd.merge(z_df, stats[['feature', ranking_method]], how='left', + left_index=True, right_on='feature') + z_df = z_df.sort_values(ranking_method, ascending=False) \ + .drop(ranking_method, axis=1) # discard rows according to max_rows - df = df[df['feature'].isin(df['feature'].unique()[:max_rows])] + df = df[:max_rows] + z_df = z_df[:max_rows] + stats = stats[:max_rows] + + # prepare output for front-end + df = pd.melt(df, id_vars='feature', var_name='id') + z_df = pd.melt(z_df, id_vars='feature', var_name='id') + df = df.merge(z_df, on=['id', 'feature']) + df.rename(columns={'value_x': 'value', 'value_y': 'zscore'}, + inplace=True) return { - 'data': df.to_json(orient='records'), - 'stats': stats.to_json(orient='records') + 'data': df.to_dict(orient='list'), + 'stats': stats.to_dict(orient='list') } diff --git a/fractalis/analytics/tasks/heatmap/stats.py b/fractalis/analytics/tasks/heatmap/stats.py index c76357edc3245f7ebcbc361982e1cbc027669965..42a1b6fb9ab6d543f574ccab54dd8ee973ea7b78 100644 --- a/fractalis/analytics/tasks/heatmap/stats.py +++ b/fractalis/analytics/tasks/heatmap/stats.py @@ -5,7 +5,6 @@ from typing import List, TypeVar import logging import pandas as pd -from numpy import mean, median, var from rpy2 import robjects as R from rpy2.robjects import r, pandas2ri from rpy2.robjects.packages import importr @@ -37,24 +36,24 @@ class StatisticTask(AnalyticTask): @staticmethod def get_mean_stats(df: pd.DataFrame) -> pd.DataFrame: - mean_series = df.apply(mean, axis=1) - df = mean_series.to_frame('mean') - df['feature'] = df.index - return df + means = [row.mean() for row in df.values] + stats = pd.DataFrame(means, columns=['mean']) + stats['feature'] = df.index + return stats @staticmethod def get_median_stats(df: pd.DataFrame) -> pd.DataFrame: - median_series = df.apply(median, axis=1) - df = median_series.to_frame('median') - df['feature'] = df.index - return df + means = [row.median() for row in df.values] + stats = pd.DataFrame(means, columns=['median']) + stats['feature'] = df.index + return stats @staticmethod def get_variance_stats(df: pd.DataFrame) -> pd.DataFrame: - var_series = df.apply(var, axis=1) - df = var_series.to_frame('var') - df['feature'] = df.index - return df + means = [row.var() for row in df.values] + stats = pd.DataFrame(means, columns=['var']) + stats['feature'] = df.index + return stats @staticmethod def get_limma_stats(df: pd.DataFrame, diff --git a/fractalis/config.py b/fractalis/config.py index 74b239e9c2af45e90968c7a44a87b755f3016308..f556f201ee29443701da8aa1e3ae0337a87bd19c 100644 --- a/fractalis/config.py +++ b/fractalis/config.py @@ -21,6 +21,7 @@ PERMANENT_SESSION_LIFETIME = timedelta(days=1) BROKER_URL = 'amqp://' CELERY_RESULT_BACKEND = 'redis://{}:{}'.format(REDIS_HOST, REDIS_PORT) CELERYD_TASK_SOFT_TIME_LIMIT = 60 * 20 +CELERYD_TASK_TIME_LIMIT = 60 * 30 CELERY_TASK_RESULT_EXPIRES = timedelta(hours=1) CELERYD_HIJACK_ROOT_LOGGER = False diff --git a/fractalis/data/etls/test/etl_random_numerical_array.py b/fractalis/data/etls/test/etl_random_numerical_array.py index 81fc63da81d71f52047d415c5be546d5dd606719..d3cb3bb53d9546ae2d00ab41d8a916f93fa1b4e5 100644 --- a/fractalis/data/etls/test/etl_random_numerical_array.py +++ b/fractalis/data/etls/test/etl_random_numerical_array.py @@ -18,7 +18,8 @@ class RandomNumericalETL(ETL): def extract(self, server: str, token: str, descriptor: dict) -> pd.DataFrame: - data = pd.DataFrame(np.random.randn(50000, 200).tolist()) + data = pd.DataFrame(np.random.randn( + descriptor['num_samples'], descriptor['num_features']).tolist()) return data def transform(self, raw_data: pd.DataFrame, diff --git a/fractalis/session.py b/fractalis/session.py index 118295b2c9d531a6565895d85cb8a0b606a95ad6..0d5f37337ac24ba0438a89b63165b6be49a1eeb2 100644 --- a/fractalis/session.py +++ b/fractalis/session.py @@ -24,6 +24,7 @@ class RedisSession(CallbackDict, SessionMixin): self.permanent = True self.modified = False + class RedisSessionInterface(SessionInterface): def __init__(self, redis, app): diff --git a/setup.py b/setup.py index 71be9c2146c2594a0e2a3575ba917047dd9790b7..b5f3d0e1e21178ca80cb94f757c825994b7237de 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ setup( 'flask-cors', 'Flask-Script', 'flask-request-id-middleware', + 'flask-compress', 'jsonschema', 'celery[redis]', 'redis', diff --git a/tests/unit/analytics/heatmap/test_main.py b/tests/unit/analytics/heatmap/test_main.py index c01bc81b72ffb5a6715c66f73b282b3824cc5465..815c24d09b6f93ea559713fa74537a12c190e4ee 100644 --- a/tests/unit/analytics/heatmap/test_main.py +++ b/tests/unit/analytics/heatmap/test_main.py @@ -34,10 +34,10 @@ class TestHeatmap: def test_functional_with_nans_and_missing(self): numerical_arrays = [ - pd.DataFrame([[101, 'foo', 5], [101, 'bar', 6], + pd.DataFrame([[101, 'foo', 5], [101, 'bar', 5], [102, 'foo', 10], - [103, 'foo', float('nan')], [103, 'bar', 16], - [104, 'foo', 20], [104, 'bar', 21]], + [103, 'foo', float('nan')], [103, 'bar', 15], + [104, 'foo', 20], [104, 'bar', 20]], columns=['id', 'feature', 'value']) ] subsets = [[101, 102], [103, 104]] @@ -48,8 +48,9 @@ class TestHeatmap: id_filter=[], max_rows=100, subsets=subsets) - stats = json.loads(result['stats']) - assert stats[0] != stats[1] + for stat in result['stats']: + if stat != 'feature' and stat != 'AveExpr': + assert result['stats'][stat][0] == result['stats'][stat][1] def test_main_raises_if_invalid_data(self): numerical_arrays = [ @@ -136,7 +137,7 @@ class TestHeatmap: id_filter=[], max_rows=100, subsets=subsets) - data = json.loads(result['data']) + data = result['data'] data = pd.DataFrame(data) assert not np.isnan(np.min(data['zscore'])) @@ -156,10 +157,10 @@ class TestHeatmap: id_filter=[], max_rows=100, subsets=subsets) - data = json.loads(result['data']) + data = result['data'] data = pd.DataFrame(data) feature_col = data['feature'].tolist() - assert ['D', 'D', 'C', 'C', 'A', 'A', 'B', 'B'] == feature_col + assert ['D', 'C', 'A', 'B', 'D', 'C', 'A', 'B'] == feature_col def test_max_rows_works(self): numerical_arrays = [ @@ -177,7 +178,7 @@ class TestHeatmap: id_filter=[], max_rows=2, subsets=subsets) - data = json.loads(result['data']) + data = result['data'] data = pd.DataFrame(data) feature_col = data['feature'].tolist() - assert ['D', 'D', 'C', 'C'] == feature_col + assert ['D', 'C', 'D', 'C'] == feature_col