Source code for sleepless.data.EDF

# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Sleep-EDF (expanded) dataset for sleep analysis.

The database includes 197 all night PSGs recording in 2 subsets (SC and ST):

ST contains 44 PSG (9-hours-night) of 22 Caucasian (between  18-79 years old),
healthy (without medication) but with difficulty to fall asleep.

SC contains 153 PSG (20 hours) of 78 Caucasian (between  25-101 years old),
healthy (without medication)

* Reference: [SLEEP_EDF-2018]_

* Protocol ``st_subset``:

  * Train split: 28 (from ST subset)
  * Validation split: 8 (from ST subset)
  * Test split: 8 (from ST subset)

* Protocol ``sc_subset``:

  * Train split: 97 (from Sc subset)
  * Validation split: 28 (from SC subset)
  * Test split: 28 (from SC subset)
"""
from __future__ import annotations

import importlib.resources
import logging
import os
import pathlib

from collections.abc import Mapping

import numpy as np
import pandas as pd

from ...utils.rc import load_rc
from ..dataset import JSONDataset
from ..loader import Loader, load_annotation_raw, load_edf_raw

logger = logging.getLogger(__name__)

_root_path = load_rc().get("datadir.EDF", os.path.realpath(os.curdir))

_root_path_preprocess = load_rc().get("cachedatadir.EDF")



[docs]
class LoaderEdf(Loader):
    def __init__(self, transform_parameters, csv_subset, protocol_name) -> None:
        super().__init__(transform_parameters, csv_subset, protocol_name)
        self.preproc_path = _root_path_preprocess

    def _get_metadata_from_csv(self, csv_subset: str) -> dict:
        """Generate an id for the patient with filepath by removing night
        number.

        Work for ST and SC subset but only for EDF database.

        :param filepath: file path
        :return: patient id
        """
        csv_sc = os.path.join(_root_path, "SC-subjects.xls")

        csv_st = os.path.join(_root_path, "ST-subjects.xls")

        _id = None
        age = None
        gender = None
        medication = None

        if csv_subset == "sc":
            if pathlib.Path(csv_sc).is_file():
                df = pd.read_excel(csv_sc)

                _id = df.iloc[:, 0].values.astype(str)

                night_number = df.iloc[:, 1].values

                age = df.iloc[:, 2].values

                gender = df.iloc[:, 3].values
                gender = ["F" if i == 1 else "M" for i in gender]

                medication = ["None"] * len(gender)

                key = [
                    "".join(["SC4", str(i).zfill(2), str(night_number[index])])
                    for index, i in enumerate(_id)
                ]

            else:
                logger.warning(
                    f"The file {csv_sc} could not be access, metadata are not attached"
                )

        elif csv_subset == "st":
            if pathlib.Path(csv_st).is_file():
                df = pd.read_excel(csv_st)

                _id = df.iloc[:, 0][1:].values.astype(str)
                _id = np.concatenate((_id, _id))

                age = df.iloc[:, 1][1:].values
                age = np.concatenate((age, age))

                gender = df.iloc[:, 2][1:].values
                gender = ["F" if i == 1 else "M" for i in gender]
                gender += gender

                placebo_night_number = df.iloc[:, 3][1:].values
                medicate_night_number = df.iloc[:, 5][1:].values

                medication = ["Temazepam"] * len(placebo_night_number) + [
                    "Placebo"
                ] * len(medicate_night_number)

                night_number = np.concatenate(
                    (placebo_night_number, medicate_night_number)
                )

                key = [
                    "".join(["ST7", str(i).zfill(2), str(night_number[index])])
                    for index, i in enumerate(_id)
                ]

            else:
                logger.info(
                    f"The file {csv_st} could not be access, metadata are not attached"
                )

        else:
            logger.error("Unknown id")

        attribute = ["id", "age", "gender", "medication"]

        if _id is None:
            output_dic = {}

        else:
            output_dic = {
                key[index]: dict(zip(attribute, values))
                for index, values in enumerate(
                    zip(_id, age, gender, medication)
                )
            }

        return output_dic

    def _raw_data_loader(self, sample):
        infer_types = True

        preload = False

        misc = ["Temp rectal", "Event marker", "Marker"]

        raw_from_file = load_edf_raw(
            os.path.join(_root_path, sample["data"]), infer_types, preload, misc
        )

        label_from_file = load_annotation_raw(
            os.path.join(_root_path, sample["label"])
        )

        return (raw_from_file, label_from_file)

    def _map_key_metadata(self, key):
        return key.split("/")[1][0:6]



_EVENT_DICT: dict[str, int] = {
    "Sleep stage W": 0,
    "Sleep stage 1": 1,
    "Sleep stage 2": 2,
    "Sleep stage 3": 3,
    "Sleep stage 4": 3,
    "Sleep stage R": 4,
    "Movement time": 0,
}

_UNFILTERED = {
    "raw-to-epochs-params": {
        "event_id": _EVENT_DICT,
        "chunk_duration": 30.0,
        "crop_wake_time": 0.0,
    },
}

_FILTERED = {
    "raw-to-epochs-params": {
        "event_id": _EVENT_DICT,
        "chunk_duration": 30.0,
        "crop_wake_time": 0.0,
    },
    "band-filter": {"freq-range": [0.3, 30], "filter-len": 300},
}

_FILTERED_WAKE_30_MIN = {
    "raw-to-epochs-params": {
        "event_id": _EVENT_DICT,
        "chunk_duration": 30.0,
        "crop_wake_time": 30.0,
        "wake_stage_name": "Sleep stage W",
    },
    "band-filter": {"freq-range": [0.3, 30], "filter-len": 300},
}

_protocols: dict[
    str,
    tuple[
        str | pathlib.Path | importlib.abc.Traversable,
        Mapping,
    ],
] = {
    "st-unfiltered": (
        importlib.resources.files(__name__).joinpath("st_subset.json"),
        _UNFILTERED,
    ),
    "sc-unfiltered": (
        importlib.resources.files(__name__).joinpath("sc_subset.json"),
        _UNFILTERED,
    ),
    "st-filtered": (
        importlib.resources.files(__name__).joinpath("st_subset.json"),
        _FILTERED,
    ),
    "sc-filtered": (
        importlib.resources.files(__name__).joinpath("sc_subset.json"),
        _FILTERED,
    ),
    "sc-filtered-crop-wake": (
        importlib.resources.files(__name__).joinpath("sc_subset.json"),
        _FILTERED_WAKE_30_MIN,
    ),
}

dataset = JSONDataset(
    protocols=_protocols,
    fieldnames=("data", "label"),
    loader=LoaderEdf,
)
"""Sleep-EDF dataset object."""