From 26103cb839c9cb8fe1f396fb34534fb3bca8b1a3 Mon Sep 17 00:00:00 2001
From: Sascha Herzinger <sascha.herzinger@uni.lu>
Date: Wed, 10 May 2017 14:31:49 +0200
Subject: [PATCH] Implementing logging for fractalis

---
 fractalis/__init__.py        | 46 +++++++++++++++++++++++++-----------
 fractalis/celeryapp.py       |  9 +++++++
 fractalis/config.py          |  7 +++++-
 fractalis/data/controller.py | 12 ++++------
 fractalis/data/etl.py        | 16 +++++++++++--
 logging.yaml                 | 26 ++++++++++++++++++++
 setup.py                     |  3 ++-
 7 files changed, 93 insertions(+), 26 deletions(-)
 create mode 100644 logging.yaml

diff --git a/fractalis/__init__.py b/fractalis/__init__.py
index b0085e5..c7e06fd 100644
--- a/fractalis/__init__.py
+++ b/fractalis/__init__.py
@@ -3,56 +3,74 @@
 Modules in this package:
     - config -- Manages Fractalis Flask app configuration
 """
-import logging
+import logging.config
 
+import yaml
 from flask import Flask
 from flask_cors import CORS
 from flask_session import Session
 from redis import StrictRedis
-
+from celery.signals import after_setup_logger, after_setup_task_logger
 
 app = Flask(__name__)
 
-# allow everyone to submit requests
-CORS(app, supports_credentials=True)
 # Configure app with defaults
 app.config.from_object('fractalis.config')
 # Configure app with manually settings
 try:
     app.config.from_envvar('FRACTALIS_CONFIG')
-    app.logger.info("FRACTALIS_CONFIG environment variable is set and was "
-                    "applied to Flask app.")
+    default_config = False
 except RuntimeError:
-    app.logger.warning("FRACTALIS_CONFIG environment variable is not set. "
-                       "Using defaults for Flask app.")
+    default_config = True
+    pass
+
+# setup logging
+with open('logging.yaml', 'rt') as f:
+    log_config = yaml.safe_load(f.read())
+logging.config.dictConfig(log_config)
+log = logging.getLogger(__name__)
+
+# we can't log this earlier because the logger depends on the loaded app config
+if default_config:
+    log.error("Environment Variable FRACTALIS_CONFIG not set. Falling back "
+              "to default settings. This is not a good idea in production!")
 
 # create a redis instance
+log.info("Creating Redis connection.")
 redis = StrictRedis(host=app.config['REDIS_HOST'],
                     port=app.config['REDIS_PORT'])
 
 # Configure app with composed configurations to save admin some work
 app.config['SESSION_REDIS'] = redis
 app.config['CELERY_RESULT_BACKEND'] = 'redis://{}:{}'.format(
-        app.config['REDIS_HOST'], app.config['REDIS_PORT'])
+    app.config['REDIS_HOST'], app.config['REDIS_PORT'])
 
 # Set new session interface for app
+log.info("Replacing default session interface.")
 Session(app)
 
+# allow everyone to submit requests
+log.info("Setting up CORS.")
+CORS(app, supports_credentials=True)
+
+# create celery app
+log.info("Creating celery app.")
 from fractalis.celeryapp import make_celery, register_tasks  # noqa
 celery = make_celery(app)
 
 # register blueprints
 from fractalis.analytics.controller import analytics_blueprint  # noqa
 from fractalis.data.controller import data_blueprint  # noqa
+log.info("Registering Flask blueprints.")
 app.register_blueprint(analytics_blueprint, url_prefix='/analytics')
 app.register_blueprint(data_blueprint, url_prefix='/data')
 
+log.info("Registering celery tasks.")
 register_tasks()
 
+log.info("Initialisation of service complete.")
+
 if __name__ == '__main__':
-    handler = logging.handlers.TimedRotatingFileHandler('fractalis.log',
-                                                        when='midnight',
-                                                        backupCount=14)
-    handler.setLevel(logging.INFO)
-    app.logger.addHandler(handler)
+    log.info("Starting builtin web server.")
     app.run()
+    log.info("Builtin web server started.")
diff --git a/fractalis/celeryapp.py b/fractalis/celeryapp.py
index 895bbb4..ba0ca22 100644
--- a/fractalis/celeryapp.py
+++ b/fractalis/celeryapp.py
@@ -1,6 +1,8 @@
 """This module is responsible for the establishment and configuration of a
 Celery instance."""
 
+import logging
+
 from celery import Celery
 
 from fractalis.analytics.job import AnalyticsJob
@@ -8,6 +10,9 @@ from fractalis.data.etl import ETL
 from fractalis.utils import list_classes_with_base_class
 
 
+logger = logging.getLogger(__name__)
+
+
 def make_celery(app):
     celery = Celery(app.import_name,
                     backend=app.config['CELERY_RESULT_BACKEND'],
@@ -29,13 +34,17 @@ def make_celery(app):
 def register_tasks():
     from fractalis import celery
 
+    logger.info("Registering ETLs ...")
     etl_classes = list_classes_with_base_class('fractalis.data.etls', ETL)
     for etl_class in etl_classes:
+        logger.info("Registering task: {}".format(etl_class.name))
         celery.tasks.register(etl_class)
 
+    logger.info("Registering analysis tasks ...")
     analytics_job_classes = list_classes_with_base_class(
         'fractalis.analytics.job', AnalyticsJob)
     for analytics_job_class in analytics_job_classes:
+        logger.info("Registering task: {}".format(analytics_job_class.name))
         celery.tasks.register(analytics_job_class)
 
 
diff --git a/fractalis/config.py b/fractalis/config.py
index fa2a8ab..a71c9c9 100644
--- a/fractalis/config.py
+++ b/fractalis/config.py
@@ -1,9 +1,11 @@
 import os
+import logging
 from uuid import uuid4
 from datetime import timedelta
 
 # DO NOT MODIFY THIS FILE DIRECTLY
 
+
 # Flask
 SECRET_KEY = str(uuid4())  # set me manually in production
 DEBUG = False
@@ -15,7 +17,6 @@ SESSION_COOKIE_SECURE = False
 SESSION_REFRESH_EACH_REQUEST = True
 PERMANENT_SESSION_LIFETIME = timedelta(days=1)
 
-
 # Flask-Session
 SESSION_TYPE = 'redis'
 SESSION_PERMANENT = True
@@ -24,10 +25,14 @@ SESSION_USE_SIGNER = False
 # Celery
 BROKER_URL = 'amqp://'
 CELERYD_TASK_SOFT_TIME_LIMIT = 60 * 10
+CELERYD_HIJACK_ROOT_LOGGER = False
 
 # Fractalis
+LOG_LEVEL = logging.INFO
+LOG_FILE = os.path.join(os.sep, 'tmp', 'fractalis.log')
 FRACTALIS_TMP_DIR = os.path.abspath(os.path.join(
     os.sep, 'tmp', 'fractalis'))
 FRACTALIS_CACHE_EXP = timedelta(days=10)
 
+
 # DO NOT MODIFY THIS FILE DIRECTLY
diff --git a/fractalis/data/controller.py b/fractalis/data/controller.py
index 60a2d63..39742ca 100644
--- a/fractalis/data/controller.py
+++ b/fractalis/data/controller.py
@@ -91,12 +91,6 @@ def get_data_state(params):
     return jsonify({'data_state': data_obj}), 200
 
 
-def delete_data_id(data_id):
-    redis.delete('data:{}'.format(data_id))
-    redis.delete('shadow:data:{}'.format(data_id))
-    session['data_ids'].remove(data_id)
-
-
 @data_blueprint.route('/<string:params>', methods=['DELETE'])
 def delete_data(params):
     wait = request.args.get('wait') == '1'
@@ -116,12 +110,14 @@ def delete_data(params):
     async_result = remove_file.delay(file_path)
     if wait:
         async_result.get(propagate=False)
-    delete_data_id(data_id)
+    redis.delete('data:{}'.format(data_id))
+    redis.delete('shadow:data:{}'.format(data_id))
+    session['data_ids'].remove(data_id)
     return '', 200
 
 
 @data_blueprint.route('', methods=['DELETE'])
 def delete_all_data():
     for data_id in session['data_ids']:
-        delete_data_id(data_id)
+        delete_data(data_id)
     return '', 200
diff --git a/fractalis/data/etl.py b/fractalis/data/etl.py
index dbabcb5..4a0e828 100644
--- a/fractalis/data/etl.py
+++ b/fractalis/data/etl.py
@@ -2,6 +2,7 @@
 
 import abc
 import json
+import logging
 from typing import List
 
 from celery import Task
@@ -10,6 +11,9 @@ from pandas import DataFrame
 from fractalis import redis
 
 
+logger = logging.getLogger(__name__)
+
+
 class ETL(Task, metaclass=abc.ABCMeta):
     """This is an abstract class that implements a celery Task and provides a
     factory method to create instances of implementations of itself. Its main
@@ -136,11 +140,19 @@ class ETL(Task, metaclass=abc.ABCMeta):
         :param session_id: The id of the session that requested this job
         :param data_id: The id of the data object that is related to this ETL
         """
+        logger.info("Starting ETL process ...")
+        logger.info("(E)xtracting data from server '{}'.".format(server))
         raw_data = self.extract(server, token, descriptor)
+        logger.info("(T)ransforming data to Fractalis format.")
         data_frame = self.transform(raw_data)
         if not isinstance(data_frame, DataFrame):
-            raise TypeError("transform() must return 'pandas.DataFrame', but"
-                            "returned '{}' instead.".format(type(data_frame)))
+            try:
+                raise TypeError(
+                    "transform() must return 'pandas.DataFrame', but returned "
+                    "'{}' instead.".format(type(data_frame)))
+            except TypeError as e:
+                logging.exception(e)
+                raise
         self.load(data_frame, file_path)
         # at this point we know that the session has permission to read the data
         # otherwise authentication with target API would have failed
diff --git a/logging.yaml b/logging.yaml
new file mode 100644
index 0000000..4a9c085
--- /dev/null
+++ b/logging.yaml
@@ -0,0 +1,26 @@
+version: 1
+disable_existing_loggers: False
+formatters:
+  default:
+    format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+handlers:
+  default:
+    class: logging.handlers.TimedRotatingFileHandler
+    level: WARNING
+    formatter: default
+    filename: fractalis.log
+    when: midnight
+    backupCount: 14
+    encoding: utf8
+  console:
+    class: logging.StreamHandler
+    level: DEBUG
+    formatter: default
+
+root:
+    level: INFO
+    handlers: [default, console]
+fractalis:
+  Flask:
+    level: INFO
+    handlers: [default, console]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 92ef5ba..ee851b4 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,8 @@ setup(
         'pandas',
         'numpy',
         'scipy',
-        'requests'
+        'requests',
+        'PyYAML'
     ],
     setup_requires=[
         'pytest-runner',
-- 
GitLab