Source code for sleepless.data.EDF

# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Sleep-EDF (expanded) dataset for sleep analysis.

The database includes 197 all night PSGs recording in 2 subsets (SC and ST):

ST contains 44 PSG (9-hours-night) of 22 Caucasian (between  18-79 years old),
healthy (without medication) but with difficulty to fall asleep.

SC contains 153 PSG (20 hours) of 78 Caucasian (between  25-101 years old),
healthy (without medication)

* Reference: [SLEEP_EDF-2018]_

* Protocol ``st_subset``:

  * Train split: 28 (from ST subset)
  * Validation split: 8 (from ST subset)
  * Test split: 8 (from ST subset)

* Protocol ``sc_subset``:

  * Train split: 97 (from Sc subset)
  * Validation split: 28 (from SC subset)
  * Test split: 28 (from SC subset)
"""
from __future__ import annotations

import importlib.resources
import logging
import os
import pathlib

from collections.abc import Mapping

import numpy as np
import pandas as pd

from ...utils.rc import load_rc
from ..dataset import JSONDataset
from ..loader import Loader, load_annotation_raw, load_edf_raw

logger = logging.getLogger(__name__)

_root_path = load_rc().get("datadir.EDF", os.path.realpath(os.curdir))

_root_path_preprocess = load_rc().get("cachedatadir.EDF")


[docs] class LoaderEdf(Loader): def __init__(self, transform_parameters, csv_subset, protocol_name) -> None: super().__init__(transform_parameters, csv_subset, protocol_name) self.preproc_path = _root_path_preprocess def _get_metadata_from_csv(self, csv_subset: str) -> dict: """Generate an id for the patient with filepath by removing night number. Work for ST and SC subset but only for EDF database. :param filepath: file path :return: patient id """ csv_sc = os.path.join(_root_path, "SC-subjects.xls") csv_st = os.path.join(_root_path, "ST-subjects.xls") _id = None age = None gender = None medication = None if csv_subset == "sc": if pathlib.Path(csv_sc).is_file(): df = pd.read_excel(csv_sc) _id = df.iloc[:, 0].values.astype(str) night_number = df.iloc[:, 1].values age = df.iloc[:, 2].values gender = df.iloc[:, 3].values gender = ["F" if i == 1 else "M" for i in gender] medication = ["None"] * len(gender) key = [ "".join(["SC4", str(i).zfill(2), str(night_number[index])]) for index, i in enumerate(_id) ] else: logger.warning( f"The file {csv_sc} could not be access, metadata are not attached" ) elif csv_subset == "st": if pathlib.Path(csv_st).is_file(): df = pd.read_excel(csv_st) _id = df.iloc[:, 0][1:].values.astype(str) _id = np.concatenate((_id, _id)) age = df.iloc[:, 1][1:].values age = np.concatenate((age, age)) gender = df.iloc[:, 2][1:].values gender = ["F" if i == 1 else "M" for i in gender] gender += gender placebo_night_number = df.iloc[:, 3][1:].values medicate_night_number = df.iloc[:, 5][1:].values medication = ["Temazepam"] * len(placebo_night_number) + [ "Placebo" ] * len(medicate_night_number) night_number = np.concatenate( (placebo_night_number, medicate_night_number) ) key = [ "".join(["ST7", str(i).zfill(2), str(night_number[index])]) for index, i in enumerate(_id) ] else: logger.info( f"The file {csv_st} could not be access, metadata are not attached" ) else: logger.error("Unknown id") attribute = ["id", "age", "gender", "medication"] if _id is None: output_dic = {} else: output_dic = { key[index]: dict(zip(attribute, values)) for index, values in enumerate( zip(_id, age, gender, medication) ) } return output_dic def _raw_data_loader(self, sample): infer_types = True preload = False misc = ["Temp rectal", "Event marker", "Marker"] raw_from_file = load_edf_raw( os.path.join(_root_path, sample["data"]), infer_types, preload, misc ) label_from_file = load_annotation_raw( os.path.join(_root_path, sample["label"]) ) return (raw_from_file, label_from_file) def _map_key_metadata(self, key): return key.split("/")[1][0:6]
_EVENT_DICT: dict[str, int] = { "Sleep stage W": 0, "Sleep stage 1": 1, "Sleep stage 2": 2, "Sleep stage 3": 3, "Sleep stage 4": 3, "Sleep stage R": 4, "Movement time": 0, } _UNFILTERED = { "raw-to-epochs-params": { "event_id": _EVENT_DICT, "chunk_duration": 30.0, "crop_wake_time": 0.0, }, } _FILTERED = { "raw-to-epochs-params": { "event_id": _EVENT_DICT, "chunk_duration": 30.0, "crop_wake_time": 0.0, }, "band-filter": {"freq-range": [0.3, 30], "filter-len": 300}, } _FILTERED_WAKE_30_MIN = { "raw-to-epochs-params": { "event_id": _EVENT_DICT, "chunk_duration": 30.0, "crop_wake_time": 30.0, "wake_stage_name": "Sleep stage W", }, "band-filter": {"freq-range": [0.3, 30], "filter-len": 300}, } _protocols: dict[ str, tuple[ str | pathlib.Path | importlib.abc.Traversable, Mapping, ], ] = { "st-unfiltered": ( importlib.resources.files(__name__).joinpath("st_subset.json"), _UNFILTERED, ), "sc-unfiltered": ( importlib.resources.files(__name__).joinpath("sc_subset.json"), _UNFILTERED, ), "st-filtered": ( importlib.resources.files(__name__).joinpath("st_subset.json"), _FILTERED, ), "sc-filtered": ( importlib.resources.files(__name__).joinpath("sc_subset.json"), _FILTERED, ), "sc-filtered-crop-wake": ( importlib.resources.files(__name__).joinpath("sc_subset.json"), _FILTERED_WAKE_30_MIN, ), } dataset = JSONDataset( protocols=_protocols, fieldnames=("data", "label"), loader=LoaderEdf, ) """Sleep-EDF dataset object."""