Skip to content

Commit b44878c

Browse files
Add load_edf function (#54)
* Add load_edf function Now EDF files can be read using the storage module, providing signals and corresponding metadata. * Replace load_edf function with python native implementation Removes PyEDFLib package from requirements, which required a C compiler. The load_edf function now reads the file and extracts the information by decoding it directly. Signals, annotations and metadata are extracted. * Deal with EDF annotations for adequate output In case annotations exist, removes the last values from metadata and signals.
1 parent fd267bd commit b44878c

File tree

2 files changed

+164
-1
lines changed

2 files changed

+164
-1
lines changed

biosppy/storage.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import json
2121
import os
2222
import zipfile
23+
import struct
2324

2425
# 3rd party
2526
import h5py
@@ -438,6 +439,168 @@ def load_txt(path):
438439
return data, mdata
439440

440441

442+
def load_edf(path):
443+
"""Load data from an EDF+ (European Data Format) file.
444+
445+
Parameters
446+
----------
447+
path : str
448+
Path to the EDF file.
449+
450+
Returns
451+
-------
452+
signals : array
453+
Array of signals read from the EDF file. Each column represents a signal.
454+
mdata : dict
455+
Metadata extracted from the EDF file, including:
456+
- version : str
457+
- patient_id : str
458+
- recording_id : str
459+
- start_date : str
460+
- start_time : str
461+
- header_bytes : str
462+
- reserved : str
463+
- num_data_records : int
464+
- duration_per_data_record : float
465+
- num_signals : int
466+
- labels : list of str
467+
- units : list of str
468+
- sampling_rates : list of int
469+
- physical_min : list of float
470+
- physical_max : list of float
471+
- digital_min : list of int
472+
- digital_max : list of int
473+
- annotations : list of tuples (onset, duration, annotation)
474+
475+
Notes
476+
-----
477+
This function reads the EDF file header and data records, scales the signals
478+
into physical units, and parses the annotations according to the EDF+ specification.
479+
"""
480+
481+
def parse_annotations(data):
482+
annotations = []
483+
i = 0
484+
while i < len(data):
485+
if data[i] == 0:
486+
break
487+
onset = ''
488+
duration = ''
489+
while data[i] != 20:
490+
onset += chr(data[i])
491+
i += 1
492+
i += 1
493+
if data[i] == 21:
494+
i += 1
495+
while data[i] != 20:
496+
duration += chr(data[i])
497+
i += 1
498+
i += 1
499+
annotation = ''
500+
while data[i] != 0:
501+
if data[i] == 20:
502+
# convert to string in HH:MM:SS format
503+
onset = float(onset) # seconds
504+
onset = str(datetime.timedelta(seconds=onset))
505+
506+
duration = float(duration) if duration else 0
507+
duration = str(datetime.timedelta(seconds=duration))
508+
509+
# remove leading and trailing white space
510+
annotation = annotation.strip()
511+
if annotation != '':
512+
annotations.append((onset, duration, annotation))
513+
annotation = ''
514+
i += 1
515+
else:
516+
annotation += chr(data[i])
517+
i += 1
518+
i += 1
519+
return annotations
520+
521+
with open(path, 'rb') as f:
522+
# Read the header
523+
header = f.read(256)
524+
525+
# Extract fixed fields
526+
version = header[:8].decode('ascii').strip()
527+
patient_id = header[8:88].decode('ascii').strip()
528+
recording_id = header[88:168].decode('ascii').strip()
529+
start_date = header[168:176].decode('ascii').strip()
530+
start_time = header[176:184].decode('ascii').strip()
531+
header_bytes = header[184:192].decode('ascii').strip()
532+
reserved = header[192:236].decode('ascii').strip()
533+
num_data_records = int(header[236:244].decode('ascii').strip())
534+
duration_per_data_record = float(header[244:252].decode('ascii').strip())
535+
num_signals = int(header[252:256].decode('ascii').strip())
536+
537+
# Read signal metadata
538+
labels = [f.read(16).decode('ascii').strip() for _ in range(num_signals)]
539+
transducer_types = [f.read(80).decode('ascii').strip() for _ in range(num_signals)]
540+
units = [f.read(8).decode('ascii').strip() for _ in range(num_signals)]
541+
physical_min = [float(f.read(8).decode('ascii').strip()) for _ in range(num_signals)]
542+
physical_max = [float(f.read(8).decode('ascii').strip()) for _ in range(num_signals)]
543+
digital_min = [int(f.read(8).decode('ascii').strip()) for _ in range(num_signals)]
544+
digital_max = [int(f.read(8).decode('ascii').strip()) for _ in range(num_signals)]
545+
prefiltering = [f.read(80).decode('ascii').strip() for _ in range(num_signals)]
546+
num_samples_per_data_record = [int(f.read(8).decode('ascii').strip()) for _ in range(num_signals)]
547+
reserved_space = f.read(32 * num_signals).decode('ascii').strip()
548+
sampling_rates = [int(num_samples / duration_per_data_record) for num_samples in num_samples_per_data_record]
549+
550+
# Read data records
551+
signals = [[] for _ in range(num_signals)]
552+
annotations = []
553+
for _ in range(num_data_records):
554+
for i in range(num_signals):
555+
num_samples = num_samples_per_data_record[i]
556+
if labels[i] == 'EDF Annotations':
557+
annotation_data = f.read(num_samples * 2)
558+
annotations.extend(parse_annotations(annotation_data))
559+
else:
560+
for _ in range(num_samples):
561+
signals[i].append(struct.unpack('<h', f.read(2))[0])
562+
563+
# Scale the signals into physical units
564+
for i in range(num_signals-1):
565+
signals[i] = np.array(signals[i])
566+
signals[i] = (signals[i] - digital_min[i]) / (digital_max[i] - digital_min[i]) * (physical_max[i] - physical_min[i]) + physical_min[i]
567+
568+
# remove annotation from signals
569+
if 'EDF Annotations' in labels:
570+
num_signals -= 1
571+
signals = signals[:-1]
572+
labels = labels[:-1]
573+
units = units[:-1]
574+
sampling_rates = sampling_rates[:-1]
575+
physical_min = physical_min[:-1]
576+
physical_max = physical_max[:-1]
577+
digital_min = digital_min[:-1]
578+
digital_max = digital_max[:-1]
579+
580+
mdata = {
581+
'version': version,
582+
'patient_id': patient_id,
583+
'recording_id': recording_id,
584+
'start_date': start_date,
585+
'start_time': start_time,
586+
'header_bytes': header_bytes,
587+
'reserved': reserved,
588+
'num_data_records': num_data_records,
589+
'duration_per_data_record': duration_per_data_record,
590+
'num_signals': num_signals,
591+
'labels': labels,
592+
'units': units,
593+
'sampling_rates': sampling_rates,
594+
'physical_min': physical_min,
595+
'physical_max': physical_max,
596+
'digital_min': digital_min,
597+
'digital_max': digital_max,
598+
'annotations': annotations
599+
}
600+
601+
return np.array(signals).T, mdata
602+
603+
441604
class HDF(object):
442605
"""Wrapper class to operate on BioSPPy HDF5 files.
443606

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ scipy>=1.2.0
77
shortuuid>=0.5.0
88
six>=1.11.0
99
joblib>=0.11
10-
pywavelets>=1.4.1
10+
pywavelets>=1.4.1

0 commit comments

Comments
 (0)