Skip to content

Commit 271b12a

Browse files
authored
MRG: Merge pull request #483 from octue/enhancement/add-metadata-hash-property
Add metadata hash property
2 parents efb0132 + cb0298b commit 271b12a

File tree

5 files changed

+75
-6
lines changed

5 files changed

+75
-6
lines changed

octue/mixins/metadata.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,48 @@
22

33
import pkg_resources
44

5+
from octue.mixins.hashable import Hashable
6+
57

68
class Metadata:
79
_METADATA_ATTRIBUTES = tuple()
810

9-
def metadata(self, include_sdk_version=True):
10-
"""Get the instance's metadata in primitive form.
11+
@property
12+
def metadata_hash_value(self):
13+
"""Get the hash of the instance's metadata, not including its ID.
14+
15+
:return str:
16+
"""
17+
return self._metadata_hash_value()
18+
19+
def metadata(self, include_id=True, include_sdk_version=True, **kwargs):
20+
"""Get the instance's metadata in primitive form. The metadata is the set of attributes included in the class
21+
variable `self._METADATA_ATTRIBUTES`.
1122
12-
:param bool include_sdk_version: if `True`, include the `octue` version that instantiated the instance in the metadata
23+
:param bool include_id: if `True`, include the ID of the instance if it is included in `self._METADATA_ATTRIBUTES`
24+
:param bool include_sdk_version: if `True`, include the `octue` version that instantiated the instance
25+
:param kwargs: any kwargs to use in an overridden `self.metadata` method
1326
:return dict:
1427
"""
1528
metadata = {name: getattr(self, name) for name in self._METADATA_ATTRIBUTES}
1629

1730
if include_sdk_version:
1831
metadata["sdk_version"] = pkg_resources.get_distribution("octue").version
1932

33+
if not include_id and "id" in metadata:
34+
del metadata["id"]
35+
2036
return metadata
2137

38+
def _metadata_hash_value(self, **kwargs):
39+
"""Get the hash of the instance's metadata, not including its ID. Override this method to change what kwargs
40+
`self.metadata` gets.
41+
42+
:param kwargs: any kwargs to use in an overridden `self.metadata` method when calculating the metadata hash value
43+
:return str:
44+
"""
45+
return Hashable.hash_non_class_object(self.metadata(include_id=False, include_sdk_version=False, **kwargs))
46+
2247
@abstractmethod
2348
def _set_metadata(self, metadata):
2449
"""Set the instance's metadata.

octue/resources/datafile.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,15 +427,16 @@ def download(self, local_path=None):
427427
self.reset_hash()
428428
return self._local_path
429429

430-
def metadata(self, include_sdk_version=True, use_octue_namespace=True):
430+
def metadata(self, include_id=True, include_sdk_version=True, use_octue_namespace=True):
431431
"""Get the datafile's metadata in a serialised form (i.e. the attributes `id`, `timestamp`, `labels`, `tags`,
432432
and `sdk_version`).
433433
434+
:param bool include_id: if `True`, include the ID of the datafile
434435
:param bool include_sdk_version: if `True`, include the `octue` version that instantiated the datafile in the metadata
435436
:param bool use_octue_namespace: if `True`, prefix metadata names with "octue__"
436437
:return dict:
437438
"""
438-
metadata = super().metadata(include_sdk_version=include_sdk_version)
439+
metadata = super().metadata(include_sdk_version=include_sdk_version, include_id=include_id)
439440

440441
if not use_octue_namespace:
441442
return metadata
@@ -593,6 +594,13 @@ def _set_metadata(self, metadata):
593594

594595
setattr(self, attribute, metadata[attribute])
595596

597+
def _metadata_hash_value(self):
598+
"""Get the hash of the datafile's metadata, not including its ID.
599+
600+
:return str:
601+
"""
602+
return super()._metadata_hash_value(use_octue_namespace=False)
603+
596604
def _calculate_hash(self):
597605
"""Get the hash of the datafile according to the first of the following methods that is applicable:
598606

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "octue"
3-
version = "0.28.1"
3+
version = "0.28.2"
44
description = "A package providing template applications for data services, and a python SDK to the Octue API."
55
readme = "README.md"
66
authors = ["Marcus Lugg <[email protected]>", "Thomas Clark <[email protected]>"]

tests/resources/test_datafile.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,29 @@ def test_hash_of_cloud_datafile_avoids_downloading_file(self):
172172
# Check that the reloaded datafile hasn't been downloaded.
173173
self.assertIsNone(datafile_reloaded_from_cloud._local_path)
174174

175+
def test_metadata_hash_is_same_for_different_files_with_the_same_metadata(self):
176+
"""Test that the metadata hash is the same for datafiles with different files but the same metadata."""
177+
with tempfile.NamedTemporaryFile(delete=False) as temporary_file:
178+
first_file = Datafile(path=temporary_file.name, labels={"a", "b", "c"})
179+
180+
with first_file.open("w") as f:
181+
f.write("hello")
182+
183+
with tempfile.NamedTemporaryFile(delete=False) as temporary_file:
184+
second_file = Datafile(path=temporary_file.name, labels={"a", "b", "c"})
185+
186+
with second_file.open("w") as f:
187+
f.write("goodbye")
188+
189+
self.assertEqual(first_file.metadata_hash_value, second_file.metadata_hash_value)
190+
191+
def test_metadata_hash_is_different_for_same_file_but_different_metadata(self):
192+
"""Test that the metadata hash is different for datafiles with the same files but different metadata."""
193+
first_file = Datafile(path=self.path, labels={"a", "b", "c"})
194+
second_file = copy.deepcopy(first_file)
195+
second_file.labels = {"d", "e", "f"}
196+
self.assertNotEqual(first_file.metadata_hash_value, second_file.metadata_hash_value)
197+
175198
def test_exists_in_cloud(self):
176199
"""Test whether it can be determined that a datafile exists in the cloud or not."""
177200
self.assertFalse(self.create_valid_datafile().exists_in_cloud)

tests/resources/test_dataset.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,19 @@ def test_hashes_for_the_same_dataset_are_the_same(self):
368368
second_dataset = copy.deepcopy(first_dataset)
369369
self.assertEqual(first_dataset.hash_value, second_dataset.hash_value)
370370

371+
def test_metadata_hash_is_same_for_different_datasets_with_the_same_metadata(self):
372+
"""Test that the metadata hash is the same for datasets with different files but the same metadata."""
373+
first_dataset = Dataset(labels={"a", "b", "c"})
374+
second_dataset = Dataset(files={Datafile(path="blah", hypothetical=True)}, labels={"a", "b", "c"})
375+
self.assertEqual(first_dataset.metadata_hash_value, second_dataset.metadata_hash_value)
376+
377+
def test_metadata_hash_is_different_for_same_dataset_but_different_metadata(self):
378+
"""Test that the metadata hash is different for datasets with the same files but different metadata."""
379+
first_dataset = self.create_valid_dataset()
380+
second_dataset = copy.deepcopy(first_dataset)
381+
second_dataset.labels = {"d", "e", "f"}
382+
self.assertNotEqual(first_dataset.metadata_hash_value, second_dataset.metadata_hash_value)
383+
371384
def test_serialisation_and_deserialisation(self):
372385
"""Test that a dataset can be serialised and deserialised."""
373386
dataset_id = "e376fb31-8f66-414d-b99f-b43395cebbf1"

0 commit comments

Comments
 (0)