"""
File: app/deepbeat_converter.py
Project: 22HLT01 QUMPHY
Contact: nando.hegemann@ptb.de
Gitlab: https://gitlab.com/qumphy
Description: Convert DeepBeat Database into PPG training chunks.
SPDX-License-Identifier: EUPL-1.2
"""
import os
import numpy as np
from sklearn.model_selection import StratifiedKFold
from tqdm.auto import tqdm
[docs]
def convert(file_name: str, save_path: str, folds: int = 1) -> str:
"""Chunk data into multiple npy files.
Parameters
----------
file_name : str
Path to file.
save_path : str
Save directory.
folds : int, optional
Number of different files the file is split into.
Returns
-------
:
Save location.
"""
assert folds > 0
os.makedirs(save_path, exist_ok=True)
assert os.path.basename(file_name)[-4:] == ".npz"
save_file_id = os.path.basename(file_name)[:-4]
data = np.load(file_name, allow_pickle=True)
quality = np.where(data["qa_label"] > 0)[1]
label = np.where(data["rhythm"] > 0)[1]
subject_ids = data["parameters"][:, 2]
if folds == 1: # no file splitting
np.save(save_path + save_file_id + "_signal.npy", data["signal"][:, :, 0])
np.save(save_path + save_file_id + "_subject_id.npy", subject_ids)
np.save(save_path + save_file_id + "_label.npy", label)
np.save(save_path + save_file_id + "_quality.npy", quality)
return save_path + save_file_id + "_*.npy"
af_ids = np.unique(subject_ids[label == 1])
non_af_ids = np.setdiff1d(np.unique(subject_ids), af_ids)
all_ids = np.concatenate([af_ids, non_af_ids])
all_labels = np.array([1] * (len(af_ids)) + [0] * (len(non_af_ids)))
skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42)
folds = []
for _, tst in skf.split(all_labels, all_labels):
folds.append(all_ids[tst])
folds_samples = [
np.concatenate([np.where(subject_ids == x)[0] for x in f]) for f in folds
]
for j, smpl in tqdm(enumerate(folds_samples), total=len(folds_samples)):
np.save(
save_path + save_file_id + f"_signal_{j+1:02d}.npy",
data["signal"][smpl, :, 0],
)
np.save(
save_path + save_file_id + f"_subject_id_{j+1:02d}.npy", subject_ids[smpl]
)
np.save(save_path + save_file_id + f"_label_{j+1:02d}.npy", label[smpl])
np.save(save_path + save_file_id + f"_quality_{j+1:02d}.npy", quality[smpl])
return save_path + save_file_id + "_*.npy"
if __name__ == "__main__":
# training data
print("Convert training data (this may take some time - and RAM)")
saved = convert(
"../data/deepbeat/train.npz", "../data/deepbeat/formatted/", folds=20
)
print(f"save files to: {saved}")
# validation data
print("Convert validation data")
saved = convert(
"../data/deepbeat/validate.npz", "../data/deepbeat/formatted/", folds=3
)
print(f"save files to: {saved}")
# test data
print("Convert test data")
saved = convert("../data/deepbeat/test.npz", "../data/deepbeat/formatted/", folds=1)
print(f"save files to: {saved}")