Skip to content
Snippets Groups Projects
Commit 9946fb98 authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

Added API for deleting data

parent 0acfcfec
No related branches found
No related tags found
No related merge requests found
Pipeline #
......@@ -5,13 +5,35 @@ from flask import Blueprint, session, request, jsonify
from .etlhandler import ETLHandler
from .schema import create_data_schema
from fractalis.validator import validate_json, validate_schema
from fractalis import celery
from fractalis import redis
from fractalis import celery, redis
from fractalis.sync import remove_file
data_blueprint = Blueprint('data_blueprint', __name__)
def get_data_by_id(data_id, wait):
data_obj = json.loads(redis.get('data:{}'.format(data_id)))
job_id = data_obj['job_id']
async_result = celery.AsyncResult(job_id)
if wait:
async_result.get(propagate=False) # wait for results
state = async_result.state
result = async_result.result
if isinstance(result, Exception): # Exception -> str
result = "{}: {}".format(type(result).__name__, str(result))
data_obj['state'] = state
data_obj['message'] = result
data_obj['data_id'] = data_id
# remove internal information from response
del data_obj['file_path']
del data_obj['access']
return data_obj
@data_blueprint.before_request
def prepare_session():
session.permanent = True
......@@ -19,6 +41,13 @@ def prepare_session():
session['data_ids'] = []
@data_blueprint.before_request
def cleanup_session():
for data_id in session['data_ids']:
if not redis.exists('shadow:data:{}'.format(data_id)):
session['data_ids'].remove(data_id)
@data_blueprint.route('', methods=['POST'])
@validate_json
@validate_schema(create_data_schema)
......@@ -36,31 +65,16 @@ def create_data():
return jsonify({'data_ids': data_ids}), 201
def get_data_by_id(data_id, wait):
value = redis.get('data:{}'.format(data_id))
data_obj = json.loads(value)
job_id = data_obj['job_id']
async_result = celery.AsyncResult(job_id)
if wait:
async_result.get(propagate=False) # wait for results
state = async_result.state
result = async_result.result
if isinstance(result, Exception): # Exception -> str
result = "{}: {}".format(type(result).__name__, str(result))
data_obj['state'] = state
data_obj['message'] = result
data_obj['data_id'] = data_id
# remove internal information from response
del data_obj['file_path']
del data_obj['access']
return data_obj
@data_blueprint.route('', methods=['GET'])
def get_all_data_state():
wait = request.args.get('wait') == '1'
data_states = [get_data_by_id(data_id, wait)
for data_id in session['data_ids']]
return jsonify({'data_states': data_states}), 200
@data_blueprint.route('/<string:params>', methods=['GET'])
def get_data_by_params(params):
def get_data_state(params):
wait = request.args.get('wait') == '1'
# params can be data_id or dict
try:
......@@ -76,9 +90,31 @@ def get_data_by_params(params):
return jsonify({'data_state': data_obj}), 200
@data_blueprint.route('', methods=['GET'])
def get_all_data_state():
@data_blueprint.route('/<string:params>', methods=['DELETE'])
def delete_data(params):
wait = request.args.get('wait') == '1'
data_states = [get_data_by_id(data_id, wait)
for data_id in session['data_ids']]
return jsonify({'data_states': data_states}), 200
# params can be data_id or dict
try:
params = json.loads(params)
data_id = ETLHandler.compute_data_id(server=params['server'],
descriptor=params['descriptor'])
except ValueError:
data_id = params
if data_id not in session['data_ids']: # access control
return jsonify(
{'error_msg': "No matching data found. Maybe expired?"}), 404
data_obj = json.loads(redis.get('data:{}'.format(data_id)))
file_path = data_obj['file_path']
async_result = remove_file.delay(file_path)
if wait:
async_result.get(propagate=False)
redis.delete('data:{}'.format(data_id))
redis.delete('shadow:data:{}'.format(data_id))
return '', 200
@data_blueprint.route('/', methods=['DELETE'])
def delete_all_data():
for data_id in session['data_ids']:
delete_data(data_id)
return '', 200
......@@ -89,11 +89,11 @@ class ETLHandler(metaclass=abc.ABCMeta):
file_name = str(uuid4())
file_path = os.path.join(data_dir, file_name)
access = []
# test if description for data is specified
# test if label for data is specified
try:
description = descriptor['description']
label = descriptor['label']
except KeyError:
description = str(descriptor)
label = str(descriptor)
etl = ETL.factory(handler=self._handler,
data_type=descriptor['data_type'])
task_id = uuid()
......@@ -101,7 +101,8 @@ class ETLHandler(metaclass=abc.ABCMeta):
'file_path': file_path,
'job_id': task_id,
'data_type': etl.produces,
'description': description,
'label': label,
'descriptor': descriptor,
'access': access
}
redis.set(name='data:{}'.format(data_id),
......
......@@ -38,7 +38,6 @@ def get_dictionary(server: str, data_set: str,
cookies=cookie)
if r.status_code != 200:
dictionary = None
# TODO: Log this
pass
else:
dictionary = r.json()
......
......@@ -6,9 +6,18 @@ db and the file system.
import os
from shutil import rmtree
from fractalis import redis, app
from fractalis import redis, app, celery
@celery.task
def remove_file(file_path: str) -> None:
try:
os.remove(file_path)
except FileNotFoundError:
pass
@celery.task
def cleanup_all() -> None:
"""Reset redis and the filesystem. This is only useful for testing and
should !!!NEVER!!! be used otherwise.
......
......@@ -4,6 +4,7 @@ import json
from flask_script import Manager
from fractalis import app, redis
from fractalis.sync import remove_file
manager = Manager(app)
......@@ -25,10 +26,7 @@ def janitor():
print('deleting file: ', file_path)
print('deleting redis key: ', expired_key)
redis.delete(expired_key)
try:
os.remove(file_path)
except FileNotFoundError:
pass
remove_file.delay()
if __name__ == "__main__":
......
......@@ -260,12 +260,13 @@ class TestData:
body = flask.json.loads(rv.get_data())
data_state = body['data_state']
assert rv.status_code == 200, data_state
assert len(data_state) == 6 # include only minimal data in response
assert len(data_state) == 7 # include only minimal data in response
assert not data_state['message']
assert data_state['state']
assert data_state['job_id']
assert data_state['data_type']
assert data_state['description']
assert data_state['label']
assert data_state['descriptor']
assert data_state['data_id']
def test_GET_by_all_and_valid_response(self, test_client, big_post):
......@@ -279,12 +280,13 @@ class TestData:
body = flask.json.loads(rv.get_data())
for data_state in body['data_states']:
assert len(data_state) == 6
assert len(data_state) == 7
assert not data_state['message']
assert data_state['state']
assert data_state['job_id']
assert data_state['data_type']
assert data_state['description']
assert data_state['label']
assert data_state['descriptor']
assert data_state['data_id']
def test_GET_by_params_and_valid_response(self, test_client):
......@@ -308,12 +310,13 @@ class TestData:
body = flask.json.loads(rv.get_data())
data_state = body['data_state']
assert rv.status_code == 200, data_state
assert len(data_state) == 6 # include only minimal data in response
assert len(data_state) == 7 # include only minimal data in response
assert not data_state['message']
assert data_state['state']
assert data_state['job_id']
assert data_state['data_type']
assert data_state['description']
assert data_state['label']
assert data_state['descriptor']
assert data_state['data_id']
def test_404_on_GET_by_id_if_no_auth(self, test_client, big_post):
......@@ -415,7 +418,7 @@ class TestData:
with test_client.session_transaction() as sess:
assert len(sess['data_ids']) == 1
def test_no_access_on_failure(self, test_client, small_post):
def test_no_access_on_failure(self, test_client):
rv = test_client.post(
'/data', data=flask.json.dumps(dict(
handler='test',
......@@ -440,4 +443,59 @@ class TestData:
sess['data_ids'] = []
rv = test_client.get('/data/{}'.format(data_id))
assert rv.status_code == 404
\ No newline at end of file
assert rv.status_code == 404
def test_delete_and_no_db_entries(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
data_obj = redis.get('data:{}'.format(data_id))
assert data_obj
assert redis.exists('shadow:data:{}'.format(data_id))
test_client.delete('/data/{}?wait=1'.format(data_id))
assert not redis.exists('data:{}'.format(data_id))
assert not redis.exists('shadow:data:{}'.format(data_id))
def test_delete_and_no_files(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
test_client.get('/data/{}?wait=1'.format(data_id))
data_obj = json.loads(redis.get('data:{}'.format(data_id)))
assert os.path.exists(data_obj['file_path'])
test_client.delete('/data/{}?wait=1'.format(data_id))
assert not os.path.exists(data_obj['file_path'])
def test_delete_and_no_id_in_session(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
test_client.get('/data/{}?wait=1'.format(data_id))
with test_client.session_transaction() as sess:
assert data_id in sess['data_ids']
test_client.delete('/data/{}?wait=1'.format(data_id))
test_client.get('/data/{}?wait=1'.format(data_id))
with test_client.session_transaction() as sess:
assert data_id not in sess['data_ids']
def test_cannot_delete_without_auth(self, test_client, small_post):
rv = small_post(random=True)
body = flask.json.loads(rv.get_data())
assert rv.status_code == 201, body
data_ids = body['data_ids']
assert len(data_ids) == 1
data_id = data_ids[0]
test_client.get('/data/{}?wait=1'.format(data_id))
with test_client.session_transaction() as sess:
sess['data_ids'] = []
rv = test_client.delete('/data/{}?wait=1'.format(data_id))
assert rv.status_code == 404
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment