Fix count samples from annotations (#47)

vejretvejret · messybear · web-flow · commit 13869652a2d8 · 2024-01-19T09:40:14.000-08:00
Fixed two scenarios where SigMFFile._count_samples() failed:

* No data_file registered: sample_count should be calculated from annotation with highest end index, not from annotation with highest start index
* If no core:sample_count is provided in the annotation, core:sample_start should be used (sample count must at least be equal to this)

---------

Co-authored-by: messybear &lt;messybear@thatsmessy.net&gt;
diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py
@@ -402,11 +402,23 @@ def get_annotations(self, index=None):
         list of dict
             Each dictionary contains one annotation for the sample at `index`.
         '''
-        return [
-            x for x in self._metadata.get(self.ANNOTATION_KEY, [])
-            if index is None or (x[self.START_INDEX_KEY] <= index
-            and x[self.START_INDEX_KEY] + x[self.LENGTH_INDEX_KEY] > index)
-        ]
+        annotations = self._metadata.get(self.ANNOTATION_KEY, [])
+        if index is None:
+            return annotations
+        
+        annotations_including_index = []
+        for annotation in annotations:
+            if index < annotation[self.START_INDEX_KEY]:
+                # index is before annotation starts -> skip
+                continue
+            if self.LENGTH_INDEX_KEY in annotation:
+                # Annotation includes sample_count -> check end index
+                if index >= annotation[self.START_INDEX_KEY] + annotation[self.LENGTH_INDEX_KEY]:
+                    # index is after annotation end -> skip
+                    continue
+            
+            annotations_including_index.append(annotation)
+        return annotations_including_index
 
     def get_sample_size(self):
         """
@@ -418,16 +430,13 @@ def get_sample_size(self):
     def _count_samples(self):
         """
         Count, set, and return the total number of samples in the data file.
-        If there is no data file but there are annotations, use the end index
-        of the final annotation instead. If there are no annotations, use 0.
+        If there is no data file but there are annotations, use the sample_count
+        from the annotation with the highest end index. If there are no annotations,
+        use 0.
         For complex data, a 'sample' includes both the real and imaginary part.
         """
-        annotations = self.get_annotations()
         if self.data_file is None:
-            if len(annotations) > 0:
-                sample_count = annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY]
-            else:
-                sample_count = 0
+            sample_count = self._get_sample_count_from_annotations()
         else:
             header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
             file_size = path.getsize(self.data_file) if self.offset_and_size is None else self.offset_and_size[1]
@@ -438,12 +447,32 @@ def _count_samples(self):
             if file_data_size % (sample_size * num_channels) != 0:
                 warnings.warn(f'File `{self.data_file}` does not contain an integer '
                     'number of samples across channels. It may be invalid data.')
-            if len(annotations) > 0 and annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY] > sample_count:
+            if self._get_sample_count_from_annotations() > sample_count:
                 warnings.warn(f'File `{self.data_file}` ends before the final annotation '
                     'in the corresponding SigMF metadata.')
         self.sample_count = sample_count
         return sample_count
 
+    def _get_sample_count_from_annotations(self):
+        """
+        Returns the number of samples based on annotation with highest end index.
+        NOTE: Annotations are ordered by START_INDEX_KEY and not end index, so we
+        need to go through all annotations
+        """
+        annon_sample_count = []
+        for annon in self.get_annotations():
+            if self.LENGTH_INDEX_KEY in annon:
+                # Annotation with sample_count
+                annon_sample_count.append(annon[self.START_INDEX_KEY] + annon[self.LENGTH_INDEX_KEY])
+            else:
+                # Annotation without sample_count - sample count must be at least sample_start
+                annon_sample_count.append(annon[self.START_INDEX_KEY])
+
+        if annon_sample_count:
+            return max(annon_sample_count)
+        else:
+            return 0
+
     def calculate_hash(self):
         """
         Calculates the hash of the data file and adds it to the global section.
diff --git a/tests/test_sigmffile.py b/tests/test_sigmffile.py
@@ -25,6 +25,7 @@
 from pathlib import Path
 import numpy as np
 import unittest
+import copy
 
 from sigmf import sigmffile, utils
 from sigmf.sigmffile import SigMFFile
@@ -61,6 +62,60 @@ def test_iterator_basic(self):
             count += 1
         self.assertEqual(count, len(self.sigmf_object))
 
+class TestAnnotationHandling(unittest.TestCase):
+
+    def test_get_annotations_with_index(self):
+        """Test that only annotations containing index are returned from get_annotations()"""
+        smf = SigMFFile(copy.deepcopy(TEST_METADATA))
+        smf.add_annotation(start_index=1)
+        smf.add_annotation(start_index=4, length=4)
+        annotations_idx10 = smf.get_annotations(index=10)
+        self.assertListEqual(
+            annotations_idx10,
+            [
+                {SigMFFile.START_INDEX_KEY: 0, SigMFFile.LENGTH_INDEX_KEY: 16},
+                {SigMFFile.START_INDEX_KEY: 1},
+            ]
+        )
+    
+    def test__count_samples_from_annotation(self):
+        """Make sure sample count from annotations use correct end index"""
+        smf = SigMFFile(copy.deepcopy(TEST_METADATA))
+        smf.add_annotation(start_index=0, length=32)
+        smf.add_annotation(start_index=4, length=4)
+        sample_count = smf._count_samples()
+        self.assertEqual(sample_count, 32)
+    
+    def test_set_data_file_without_annotations(self):
+        """
+        Make sure setting data_file with no annotations registered does not
+        raise any errors
+        """
+        smf = SigMFFile(copy.deepcopy(TEST_METADATA))
+        smf._metadata[SigMFFile.ANNOTATION_KEY].clear()
+        with tempfile.TemporaryDirectory() as tmpdir:
+            temp_path_data = os.path.join(tmpdir, "datafile")
+            TEST_FLOAT32_DATA.tofile(temp_path_data)
+            smf.set_data_file(temp_path_data)
+            samples = smf.read_samples()
+            self.assertTrue(len(samples)==16)
+
+    def test_set_data_file_with_annotations(self):
+        """
+        Make sure setting data_file with annotations registered use sample
+        count from data_file and issue a warning if annotations have end
+        indices bigger than file end index
+        """
+        smf = SigMFFile(copy.deepcopy(TEST_METADATA))
+        smf.add_annotation(start_index=0, length=32)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            temp_path_data = os.path.join(tmpdir, "datafile")
+            TEST_FLOAT32_DATA.tofile(temp_path_data)
+            with self.assertWarns(Warning):
+                # Issues warning since file ends before the final annotatio
+                smf.set_data_file(temp_path_data)
+                samples = smf.read_samples()
+                self.assertTrue(len(samples)==16)
 
 def simulate_capture(sigmf_md, n, capture_len):
     start_index = capture_len * n