|
20 | 20 | import json
|
21 | 21 | import os
|
22 | 22 | import zipfile
|
| 23 | +import struct |
23 | 24 |
|
24 | 25 | # 3rd party
|
25 | 26 | import h5py
|
@@ -438,6 +439,168 @@ def load_txt(path):
|
438 | 439 | return data, mdata
|
439 | 440 |
|
440 | 441 |
|
| 442 | +def load_edf(path): |
| 443 | + """Load data from an EDF+ (European Data Format) file. |
| 444 | +
|
| 445 | + Parameters |
| 446 | + ---------- |
| 447 | + path : str |
| 448 | + Path to the EDF file. |
| 449 | +
|
| 450 | + Returns |
| 451 | + ------- |
| 452 | + signals : array |
| 453 | + Array of signals read from the EDF file. Each column represents a signal. |
| 454 | + mdata : dict |
| 455 | + Metadata extracted from the EDF file, including: |
| 456 | + - version : str |
| 457 | + - patient_id : str |
| 458 | + - recording_id : str |
| 459 | + - start_date : str |
| 460 | + - start_time : str |
| 461 | + - header_bytes : str |
| 462 | + - reserved : str |
| 463 | + - num_data_records : int |
| 464 | + - duration_per_data_record : float |
| 465 | + - num_signals : int |
| 466 | + - labels : list of str |
| 467 | + - units : list of str |
| 468 | + - sampling_rates : list of int |
| 469 | + - physical_min : list of float |
| 470 | + - physical_max : list of float |
| 471 | + - digital_min : list of int |
| 472 | + - digital_max : list of int |
| 473 | + - annotations : list of tuples (onset, duration, annotation) |
| 474 | +
|
| 475 | + Notes |
| 476 | + ----- |
| 477 | + This function reads the EDF file header and data records, scales the signals |
| 478 | + into physical units, and parses the annotations according to the EDF+ specification. |
| 479 | + """ |
| 480 | + |
| 481 | + def parse_annotations(data): |
| 482 | + annotations = [] |
| 483 | + i = 0 |
| 484 | + while i < len(data): |
| 485 | + if data[i] == 0: |
| 486 | + break |
| 487 | + onset = '' |
| 488 | + duration = '' |
| 489 | + while data[i] != 20: |
| 490 | + onset += chr(data[i]) |
| 491 | + i += 1 |
| 492 | + i += 1 |
| 493 | + if data[i] == 21: |
| 494 | + i += 1 |
| 495 | + while data[i] != 20: |
| 496 | + duration += chr(data[i]) |
| 497 | + i += 1 |
| 498 | + i += 1 |
| 499 | + annotation = '' |
| 500 | + while data[i] != 0: |
| 501 | + if data[i] == 20: |
| 502 | + # convert to string in HH:MM:SS format |
| 503 | + onset = float(onset) # seconds |
| 504 | + onset = str(datetime.timedelta(seconds=onset)) |
| 505 | + |
| 506 | + duration = float(duration) if duration else 0 |
| 507 | + duration = str(datetime.timedelta(seconds=duration)) |
| 508 | + |
| 509 | + # remove leading and trailing white space |
| 510 | + annotation = annotation.strip() |
| 511 | + if annotation != '': |
| 512 | + annotations.append((onset, duration, annotation)) |
| 513 | + annotation = '' |
| 514 | + i += 1 |
| 515 | + else: |
| 516 | + annotation += chr(data[i]) |
| 517 | + i += 1 |
| 518 | + i += 1 |
| 519 | + return annotations |
| 520 | + |
| 521 | + with open(path, 'rb') as f: |
| 522 | + # Read the header |
| 523 | + header = f.read(256) |
| 524 | + |
| 525 | + # Extract fixed fields |
| 526 | + version = header[:8].decode('ascii').strip() |
| 527 | + patient_id = header[8:88].decode('ascii').strip() |
| 528 | + recording_id = header[88:168].decode('ascii').strip() |
| 529 | + start_date = header[168:176].decode('ascii').strip() |
| 530 | + start_time = header[176:184].decode('ascii').strip() |
| 531 | + header_bytes = header[184:192].decode('ascii').strip() |
| 532 | + reserved = header[192:236].decode('ascii').strip() |
| 533 | + num_data_records = int(header[236:244].decode('ascii').strip()) |
| 534 | + duration_per_data_record = float(header[244:252].decode('ascii').strip()) |
| 535 | + num_signals = int(header[252:256].decode('ascii').strip()) |
| 536 | + |
| 537 | + # Read signal metadata |
| 538 | + labels = [f.read(16).decode('ascii').strip() for _ in range(num_signals)] |
| 539 | + transducer_types = [f.read(80).decode('ascii').strip() for _ in range(num_signals)] |
| 540 | + units = [f.read(8).decode('ascii').strip() for _ in range(num_signals)] |
| 541 | + physical_min = [float(f.read(8).decode('ascii').strip()) for _ in range(num_signals)] |
| 542 | + physical_max = [float(f.read(8).decode('ascii').strip()) for _ in range(num_signals)] |
| 543 | + digital_min = [int(f.read(8).decode('ascii').strip()) for _ in range(num_signals)] |
| 544 | + digital_max = [int(f.read(8).decode('ascii').strip()) for _ in range(num_signals)] |
| 545 | + prefiltering = [f.read(80).decode('ascii').strip() for _ in range(num_signals)] |
| 546 | + num_samples_per_data_record = [int(f.read(8).decode('ascii').strip()) for _ in range(num_signals)] |
| 547 | + reserved_space = f.read(32 * num_signals).decode('ascii').strip() |
| 548 | + sampling_rates = [int(num_samples / duration_per_data_record) for num_samples in num_samples_per_data_record] |
| 549 | + |
| 550 | + # Read data records |
| 551 | + signals = [[] for _ in range(num_signals)] |
| 552 | + annotations = [] |
| 553 | + for _ in range(num_data_records): |
| 554 | + for i in range(num_signals): |
| 555 | + num_samples = num_samples_per_data_record[i] |
| 556 | + if labels[i] == 'EDF Annotations': |
| 557 | + annotation_data = f.read(num_samples * 2) |
| 558 | + annotations.extend(parse_annotations(annotation_data)) |
| 559 | + else: |
| 560 | + for _ in range(num_samples): |
| 561 | + signals[i].append(struct.unpack('<h', f.read(2))[0]) |
| 562 | + |
| 563 | + # Scale the signals into physical units |
| 564 | + for i in range(num_signals-1): |
| 565 | + signals[i] = np.array(signals[i]) |
| 566 | + signals[i] = (signals[i] - digital_min[i]) / (digital_max[i] - digital_min[i]) * (physical_max[i] - physical_min[i]) + physical_min[i] |
| 567 | + |
| 568 | + # remove annotation from signals |
| 569 | + if 'EDF Annotations' in labels: |
| 570 | + num_signals -= 1 |
| 571 | + signals = signals[:-1] |
| 572 | + labels = labels[:-1] |
| 573 | + units = units[:-1] |
| 574 | + sampling_rates = sampling_rates[:-1] |
| 575 | + physical_min = physical_min[:-1] |
| 576 | + physical_max = physical_max[:-1] |
| 577 | + digital_min = digital_min[:-1] |
| 578 | + digital_max = digital_max[:-1] |
| 579 | + |
| 580 | + mdata = { |
| 581 | + 'version': version, |
| 582 | + 'patient_id': patient_id, |
| 583 | + 'recording_id': recording_id, |
| 584 | + 'start_date': start_date, |
| 585 | + 'start_time': start_time, |
| 586 | + 'header_bytes': header_bytes, |
| 587 | + 'reserved': reserved, |
| 588 | + 'num_data_records': num_data_records, |
| 589 | + 'duration_per_data_record': duration_per_data_record, |
| 590 | + 'num_signals': num_signals, |
| 591 | + 'labels': labels, |
| 592 | + 'units': units, |
| 593 | + 'sampling_rates': sampling_rates, |
| 594 | + 'physical_min': physical_min, |
| 595 | + 'physical_max': physical_max, |
| 596 | + 'digital_min': digital_min, |
| 597 | + 'digital_max': digital_max, |
| 598 | + 'annotations': annotations |
| 599 | + } |
| 600 | + |
| 601 | + return np.array(signals).T, mdata |
| 602 | + |
| 603 | + |
441 | 604 | class HDF(object):
|
442 | 605 | """Wrapper class to operate on BioSPPy HDF5 files.
|
443 | 606 |
|
|
0 commit comments