From a9ea7ad61c0433e2ace65769f39e70da3db5389d Mon Sep 17 00:00:00 2001
From: "sascha.herzinger" <sascha.herzinger@uni.lu>
Date: Thu, 26 Oct 2017 15:22:59 +0200
Subject: [PATCH] Implemented Janitor

---
 fractalis/sync.py                          | 10 +++------
 manage.py                                  | 13 ++++++-----
 tests/unit/test_manage.py                  | 26 ++++++++++++++++++++++
 tests/{functional => unit}/test_session.py |  0
 4 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100644 tests/unit/test_manage.py
 rename tests/{functional => unit}/test_session.py (100%)

diff --git a/fractalis/sync.py b/fractalis/sync.py
index 4c12754..dc4d78b 100644
--- a/fractalis/sync.py
+++ b/fractalis/sync.py
@@ -14,7 +14,7 @@ from fractalis import redis, app, celery
 logger = logging.getLogger(__name__)
 
 
-def remove_data(task_id: str, wait: bool=False) -> None:
+def remove_data(task_id: str) -> None:
     """Remove all traces of any data associated with the given id. That includes
     redis and the file system.
     :param task_id: The id associated with a data state
@@ -26,18 +26,14 @@ def remove_data(task_id: str, wait: bool=False) -> None:
     redis.delete(key)
     if value:
         data_state = json.loads(value)
-        async_result = remove_file.delay(data_state['file_path'])
-        if wait:
-            async_result.get(propagate=False)
+        remove_file(data_state['file_path'])
     else:
         logger.warning("Can't delete file for task id '{}',because there is "
                        "no associated entry in Redis.".format(task_id))
 
 
-@celery.task
 def remove_file(file_path: str) -> None:
-    """Remove the file for the given file path. This is a task because celery
-    workers might not have the same file system than the web service.
+    """Remove the file for the given file path.
     :param file_path: Path of file to remove.
     """
     try:
diff --git a/manage.py b/manage.py
index 79036e9..67687cd 100644
--- a/manage.py
+++ b/manage.py
@@ -1,21 +1,24 @@
 import os
-import json
 
 from flask_script import Manager
 
-from fractalis import app, redis
-from fractalis.sync import remove_file
+from fractalis import app, redis, sync
 
 
 manager = Manager(app)
 
 
 @manager.command
-def janitor() -> None:
+def janitor():
     """Ideally this is maintained by a systemd service to cleanup redis and the
     file system while Fractalis is running.
     """
-    raise NotImplementedError()
+    tmp_dir = app.config['FRACTALIS_TMP_DIR']
+    tracked_files = [key.split(':')[1] for key in redis.scan_iter('data:*')]
+    cached_files = [f for f in os.listdir(tmp_dir) if os.path.isfile(os.path.join(tmp_dir, f))]
+    for cached_file in cached_files:
+        if cached_file not in tracked_files:
+            sync.remove_file(os.path.join(tmp_dir, cached_file))
 
 
 if __name__ == "__main__":
diff --git a/tests/unit/test_manage.py b/tests/unit/test_manage.py
new file mode 100644
index 0000000..52b9c35
--- /dev/null
+++ b/tests/unit/test_manage.py
@@ -0,0 +1,26 @@
+"""This module provides tests for the janitor"""
+
+import os
+from pathlib import Path
+
+import manage
+from fractalis import app, redis
+
+
+# noinspection PyMissingOrEmptyDocstring,PyMissingTypeHints
+class TestManage:
+
+    def test_janitor_removes_untracked_files(self):
+        tmp_dir = app.config['FRACTALIS_TMP_DIR']
+        os.makedirs(tmp_dir, exist_ok=True)
+        Path(os.path.join(tmp_dir, 'abc')).touch()
+        manage.janitor()
+        assert not os.path.exists(os.path.join(tmp_dir, 'abc'))
+
+    def test_janitor_does_not_remove_tracked_files(self):
+        tmp_dir = app.config['FRACTALIS_TMP_DIR']
+        os.makedirs(tmp_dir, exist_ok=True)
+        Path(os.path.join(tmp_dir, 'abc')).touch()
+        redis.set('data:abc', '')
+        manage.janitor()
+        assert os.path.exists(os.path.join(tmp_dir, 'abc'))
diff --git a/tests/functional/test_session.py b/tests/unit/test_session.py
similarity index 100%
rename from tests/functional/test_session.py
rename to tests/unit/test_session.py
-- 
GitLab