Source code for app.deepbeat_converter

"""
File: app/deepbeat_converter.py
Project: 22HLT01 QUMPHY
Contact: nando.hegemann@ptb.de
Gitlab: https://gitlab.com/qumphy
Description: Convert DeepBeat Database into PPG training chunks.
SPDX-License-Identifier: EUPL-1.2
"""

import os
import numpy as np
from sklearn.model_selection import StratifiedKFold
from tqdm.auto import tqdm


[docs] def convert(file_name: str, save_path: str, folds: int = 1) -> str: """Chunk data into multiple npy files. Parameters ---------- file_name : str Path to file. save_path : str Save directory. folds : int, optional Number of different files the file is split into. Returns ------- : Save location. """ assert folds > 0 os.makedirs(save_path, exist_ok=True) assert os.path.basename(file_name)[-4:] == ".npz" save_file_id = os.path.basename(file_name)[:-4] data = np.load(file_name, allow_pickle=True) quality = np.where(data["qa_label"] > 0)[1] label = np.where(data["rhythm"] > 0)[1] subject_ids = data["parameters"][:, 2] if folds == 1: # no file splitting np.save(save_path + save_file_id + "_signal.npy", data["signal"][:, :, 0]) np.save(save_path + save_file_id + "_subject_id.npy", subject_ids) np.save(save_path + save_file_id + "_label.npy", label) np.save(save_path + save_file_id + "_quality.npy", quality) return save_path + save_file_id + "_*.npy" af_ids = np.unique(subject_ids[label == 1]) non_af_ids = np.setdiff1d(np.unique(subject_ids), af_ids) all_ids = np.concatenate([af_ids, non_af_ids]) all_labels = np.array([1] * (len(af_ids)) + [0] * (len(non_af_ids))) skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=42) folds = [] for _, tst in skf.split(all_labels, all_labels): folds.append(all_ids[tst]) folds_samples = [ np.concatenate([np.where(subject_ids == x)[0] for x in f]) for f in folds ] for j, smpl in tqdm(enumerate(folds_samples), total=len(folds_samples)): np.save( save_path + save_file_id + f"_signal_{j+1:02d}.npy", data["signal"][smpl, :, 0], ) np.save( save_path + save_file_id + f"_subject_id_{j+1:02d}.npy", subject_ids[smpl] ) np.save(save_path + save_file_id + f"_label_{j+1:02d}.npy", label[smpl]) np.save(save_path + save_file_id + f"_quality_{j+1:02d}.npy", quality[smpl]) return save_path + save_file_id + "_*.npy"
if __name__ == "__main__": # training data print("Convert training data (this may take some time - and RAM)") saved = convert( "../data/deepbeat/train.npz", "../data/deepbeat/formatted/", folds=20 ) print(f"save files to: {saved}") # validation data print("Convert validation data") saved = convert( "../data/deepbeat/validate.npz", "../data/deepbeat/formatted/", folds=3 ) print(f"save files to: {saved}") # test data print("Convert test data") saved = convert("../data/deepbeat/test.npz", "../data/deepbeat/formatted/", folds=1) print(f"save files to: {saved}")