Skip to content
Snippets Groups Projects
Commit 0a1484cb authored by Sascha Herzinger's avatar Sascha Herzinger
Browse files

added more random data generators

parent 6a5d9f8b
No related branches found
No related tags found
No related merge requests found
Pipeline #
"""This module provides sample data."""
import pandas as pd
import random
from fractalis.data.etl import ETL
class RandomCategoricalETL(ETL):
name = 'test_categorical_etl'
produces = 'categorical'
@staticmethod
def can_handle(handler: str, descriptor: dict) -> bool:
return handler == 'test' and \
descriptor['data_type'] == 'categorical'
def extract(self, server: str,
token: str, descriptor: dict) -> pd.DataFrame:
data = pd.DataFrame([random.choice(descriptor['values'])
for i in range(descriptor['num_samples'])])
return data
def transform(self, raw_data: pd.DataFrame,
descriptor: dict) -> pd.DataFrame:
raw_data.insert(0, 'id', raw_data.index.astype('str'))
df = pd.melt(raw_data, id_vars='id', var_name='feature')
return df
"""This module provides sample data."""
import pandas as pd
import numpy as np
import string
import random
from fractalis.data.etl import ETL
class RandomNumericalETL(ETL):
name = 'test_numerical_etl'
produces = 'numerical'
@staticmethod
def can_handle(handler: str, descriptor: dict) -> bool:
return handler == 'test' and \
descriptor['data_type'] == 'numerical'
def extract(self, server: str,
token: str, descriptor: dict) -> pd.DataFrame:
feature = ''.join(random.choice(string.ascii_letters + string.digits)
for _ in range(30))
data = pd.DataFrame(np.random.randn(descriptor['num_samples']).tolist(),
columns=[feature])
return data
def transform(self, raw_data: pd.DataFrame,
descriptor: dict) -> pd.DataFrame:
raw_data.insert(0, 'id', raw_data.index.astype('str'))
df = pd.melt(raw_data, id_vars='id', var_name='feature')
return df
......@@ -2,11 +2,13 @@
import pandas as pd
import numpy as np
import string
import random
from fractalis.data.etl import ETL
class RandomNumericalETL(ETL):
class RandomNumericalArrayETL(ETL):
name = 'test_numerical_array_etl'
produces = 'numerical_array'
......@@ -18,8 +20,12 @@ class RandomNumericalETL(ETL):
def extract(self, server: str,
token: str, descriptor: dict) -> pd.DataFrame:
features = [''.join(random.choice(string.ascii_letters + string.digits)
for _ in range(10))
for _ in range(descriptor['num_features'])]
data = pd.DataFrame(np.random.randn(
descriptor['num_samples'], descriptor['num_features']).tolist())
descriptor['num_samples'], descriptor['num_features']).tolist(),
columns=features)
return data
def transform(self, raw_data: pd.DataFrame,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment