Skip to content

sleeplab_format.extractor.cli

CLI for extracting and preprocessing a subset of data in sleeplab format.

extract(src_dir, dst_dir, cfg)

Read, preprocess, and write data in sleeplab format.

Parameters:

Name Type Description Default
src_dir Path

The source SLF dataset folder.

required
dst_dir Path

The root folder where the extracted dataset will be saved.

required
cfg DatasetConfig

The extractor config.

required
Source code in src/sleeplab_format/extractor/cli.py
def extract(src_dir: Path, dst_dir: Path, cfg: config.DatasetConfig) -> None:
    """Read, preprocess, and write data in sleeplab format.

    Arguments:
        src_dir: The source SLF dataset folder.
        dst_dir: The root folder where the extracted dataset will be saved.
        cfg: The extractor config.
    """ 
    logger.info(f'Reading dataset from {src_dir}')
    series_names = [series_config.name for series_config in cfg.series_configs]
    ds = reader.read_dataset(src_dir, series_names=series_names)

    updated_series = {}
    series_skipped = {}

    for series_config in cfg.series_configs:
        logger.info(f'Creating updated series {series_config.name}')
        _series, _skipped = preprocess.process_series(ds.series[series_config.name], series_config)
        updated_series[series_config.name] = _series
        series_skipped[series_config.name] = _skipped

    logger.info('Creating updated Dataset')
    ds = ds.model_copy(update={'name': cfg.new_dataset_name, 'series': updated_series})

    logger.info(f'Applying preprocessing and writing dataset to {dst_dir}')
    writer.write_dataset(
        ds, dst_dir, annotation_format=cfg.annotation_format, array_format=cfg.array_format)

    if series_skipped != {}:
        skipped_path = Path(dst_dir) / ds.name / '.extractor_skipped_subjects.json'
        logger.info(f'Writing skipped subject IDs and reasons to {skipped_path}')
        with open(skipped_path, 'w') as f:
            json.dump(series_skipped, f, indent=2)

sleeplab_format.extractor.preprocess

highpass(s, attributes, *, cutoff, dtype=np.float32)

Source code in src/sleeplab_format/extractor/preprocess.py
def highpass(
        s: np.array,
        attributes: ArrayAttributes, *,
        cutoff: float,
        dtype=np.float32) -> np.array:
    return cheby2_filtfilt(s, attributes.sampling_rate, cutoff, btype='highpass').astype(dtype)

lowpass(s, attributes, *, cutoff, dtype=np.float32)

Source code in src/sleeplab_format/extractor/preprocess.py
def lowpass(
        s: np.array,
        attributes: ArrayAttributes, *,
        cutoff: float,
        dtype=np.float32) -> np.array:
    return cheby2_filtfilt(s, attributes.sampling_rate, cutoff, btype='lowpass').astype(dtype)

resample_polyphase(s, attributes, *, fs_new, dtype=np.float32)

Resample the signal using scipy.signal.resample_polyphase.

Source code in src/sleeplab_format/extractor/preprocess.py
def resample_polyphase(
        s: np.array,
        attributes: ArrayAttributes, *,
        fs_new: float,
        dtype: np.dtype = np.float32) -> np.array:
    """Resample the signal using scipy.signal.resample_polyphase."""
    # Cast to float64 before filtering
    s = s.astype(np.float64)

    up = int(fs_new)
    down = int(attributes.sampling_rate)

    resampled = scipy.signal.resample_poly(s, up, down)
    return resampled.astype(dtype)

decimate(s, attributes, *, fs_new, dtype=np.float32)

Source code in src/sleeplab_format/extractor/preprocess.py
def decimate(
        s: np.array,
        attributes: ArrayAttributes, *,
        fs_new: float,
        dtype: np.dtype = np.float32) -> np.array:
    # Cast to float64 before IIR filtering!!!
    s = s.astype(np.float64)
    ds_factor = int(attributes.sampling_rate // fs_new)
    return _decimate(s, ds_factor).astype(dtype)

upsample_linear(s, attributes, *, fs_new, dtype=np.float32)

Linear interpolation for upsampling signals such as SpO2.

Source code in src/sleeplab_format/extractor/preprocess.py
def upsample_linear(
        s: np.array,
        attributes: ArrayAttributes, *,
        fs_new: float,
        dtype=np.float32):
    """Linear interpolation for upsampling signals such as SpO2."""
    fs_orig = attributes.sampling_rate
    n = len(s)
    int_factor = fs_new // fs_orig
    x = np.arange(0, int_factor*n, int_factor)
    x_new = np.arange(int_factor*n - 1)
    s_interp = np.interp(x_new, x, s)

    # Repeat the last element to match signal lengths
    s_interp = np.append(s_interp, s_interp[-1])
    return s_interp.astype(dtype)

iqr_norm(s, attributes, dtype=np.float32)

Interquartile range standardization for the signal.

Source code in src/sleeplab_format/extractor/preprocess.py
def iqr_norm(
	s: np.array, attributes: 
	ArrayAttributes, 
	dtype=np.float32) -> np.array:
    """Interquartile range standardization for the signal."""
    q75, q25 = np.percentile(s, [75 ,25])
    iqr = q75 - q25
    if iqr == 0: 
        return np.zeros(s.shape, dtype=dtype)
    else:
        return ((s - np.median(s)) / iqr).astype(dtype)

z_score_norm(s, attributes, dtype=np.float32)

Source code in src/sleeplab_format/extractor/preprocess.py
def z_score_norm(
        s: np.array,
        attributes: ArrayAttributes,
        dtype=np.float32) -> np.array:
    return ((s - np.mean(s)) / np.std(s)).astype(dtype)

add_ref(s, attributes, *, ref_s, dtype=np.float32)

Source code in src/sleeplab_format/extractor/preprocess.py
def add_ref(
        s: np.array,
        attributes: ArrayAttributes, *,
        ref_s: np.array,
        dtype=np.float32) -> np.array:
    return (s + ref_s).astype(dtype)

sub_ref(s, attributes, *, ref_s, dtype=np.float32)

Source code in src/sleeplab_format/extractor/preprocess.py
def sub_ref(
        s: np.array,
        attributes: ArrayAttributes, *,
        ref_s: np.array,
        dtype=np.float32) -> np.array:
    return (s - ref_s).astype(dtype)

sleeplab_format.extractor.config

DatasetConfig

Bases: BaseModel

Source code in src/sleeplab_format/extractor/config.py
class DatasetConfig(BaseModel, extra='forbid'):
    new_dataset_name: str
    series_configs: list[SeriesConfig]
    annotation_format: str = 'json'
    array_format: str = 'numpy'

SeriesConfig

Bases: BaseModel

Source code in src/sleeplab_format/extractor/config.py
class SeriesConfig(BaseModel, extra='forbid'):
    name: str
    array_configs: list[ArrayConfig]
    filter_conds: list[FilterCond] | None = None

    # If given, ignore subjects who do not have all of these signals in the resulting dataset
    required_result_array_names: list[str] | None = None

ArrayConfig

Bases: BaseModel

Source code in src/sleeplab_format/extractor/config.py
class ArrayConfig(BaseModel, extra='forbid'):
    name: str
    alt_names: list[str] | None = None
    actions: list[ArrayAction] | None = None

ArrayAction

Bases: BaseModel

Source code in src/sleeplab_format/extractor/config.py
class ArrayAction(BaseModel, extra='forbid'):
    name: str
    method: str

    # The name of an optional reference signal
    ref_name: str | None = None
    alt_ref_names: list[str] | None = None

    kwargs: dict[str, Any] = {}
    updated_attributes: dict[str, Any] | None = None

FilterCond

Bases: BaseModel

Source code in src/sleeplab_format/extractor/config.py
class FilterCond(BaseModel, extra='forbid'):
    name: str
    method: str
    kwargs: dict[str, Any] | None = None