Skip to content

Commit f8fe80f

Browse files
author
Tom Clark
committed
IMP Added multi manifest class to deal with having multiple datasets in the input
1 parent 5ddca8d commit f8fe80f

File tree

2 files changed

+60
-18
lines changed

2 files changed

+60
-18
lines changed

octue/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ class FolderNotPresent(InvalidInput):
1010
"""
1111

1212

13+
class ManifestNotFound(InvalidInput):
14+
""" Raise when a multi manifest can not be refined to a single manifest in a search
15+
"""
16+
1317
class InvalidManifest(InvalidInput):
1418
"""Raise when a manifest loaded from JSON does not pass validation
1519
"""

octue/resources/manifest.py

Lines changed: 56 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
from json import JSONEncoder, JSONDecoder
33

4-
from octue.exceptions import InvalidManifestType, InvalidInput
4+
from octue.exceptions import InvalidManifestType, InvalidInput, UnexpectedNumberOfResults, ManifestNotFound
55
from octue import utils
66

77
from .data_file import DataFile
@@ -36,21 +36,52 @@ def default(self, obj):
3636
return json.JSONEncoder.default(self, obj)
3737

3838

39+
class MultiManifest(object):
40+
""" A manifest that can contain multiple datasets
41+
"""
42+
def __init__(self, manifests):
43+
self.manifests = manifests
44+
45+
def from_dataset(self, method='name_icontains', filter_value=None):
46+
# Search through the input list of files or by default all files in the manifest
47+
48+
for man in self.manifests:
49+
print(man.data_set)
50+
if method == 'name_icontains' and filter_value.lower() in man.data_set['name'].lower():
51+
return man
52+
if method == 'name_contains' and filter_value in man.data_set['name']:
53+
return man
54+
if method == 'name_endswith' and man.data_set['name'].endswith(filter_value):
55+
return man
56+
if method == 'tag_exact' and filter_value in man.data_set['tags']:
57+
return man
58+
if method == 'tag_startswith':
59+
for tag in man.data_set['tags']:
60+
if tag.startswith(filter_value):
61+
return man
62+
if method == 'tag_endswith':
63+
for tag in man.data_set['tags']:
64+
if tag.endswith(filter_value):
65+
return man
66+
# TODO turn DataSet dict into an SDK object
67+
raise ManifestNotFound('None of the datasets in the present manifest match this search criterion')
68+
69+
3970
class Manifest(object):
40-
""" Manifest of files in one or more datasets
71+
""" Manifest of files in a dataset
4172
4273
A manifest is used to read a list of files (and their associated properties) into octue analysis, or to compile a
4374
list of output files (results) and their properties that will be sent back to the octue system.
4475
4576
"""
4677

47-
uuid = None
48-
type = None
49-
files = None
50-
5178
def __init__(self, **kwargs):
5279
"""Construct a file Manifest
5380
"""
81+
self.uuid = None
82+
self.type = None
83+
self.files = None
84+
5485
self.__dict__.update(**kwargs)
5586

5687
if self.type not in TYPE_CHOICES:
@@ -87,6 +118,9 @@ def append(self, **kwargs):
87118
# Append a single file, constructed by passing the arguments through to DataFile()
88119
self.files.append(DataFile(**kwargs))
89120

121+
def get_dataset_manifest(self):
122+
return self
123+
90124
def get_files(self, method='name_icontains', files=None, filter_value=None):
91125
""" Get a list of data files in a manifest whose name contains the input string
92126
@@ -122,14 +156,12 @@ def get_files(self, method='name_icontains', files=None, filter_value=None):
122156
results.append(file)
123157
break
124158
if method == 'in_sequence':
125-
for tag in file.tags:
126-
if tag.startswith('sequence'):
127-
results.append(file)
128-
break
159+
if file.sequence is not None:
160+
results.append(file)
129161

130162
return results
131163

132-
def get_file_sequence(self, filter_value=None, method='name_icontains', files=None):
164+
def get_file_sequence(self, method='name_icontains', filter_value=None, files=None):
133165
""" Get an ordered sequence of files matching a criterion
134166
135167
Accepts the same search arguments as `get_files`.
@@ -139,15 +171,16 @@ def get_file_sequence(self, filter_value=None, method='name_icontains', files=No
139171
results = self.get_files(filter_value=filter_value, method=method, files=files)
140172
results = self.get_files(method='in_sequence', files=results)
141173

142-
# Take second element for sort
143174
def get_sequence_number(file):
144-
for tag in file.tags:
145-
if tag.startswith('sequence'):
146-
sequence_number = int(tag.split(':')[1])
175+
return file.sequence
147176

148177
# Sort the results on ascending sequence number
149178
results.sort(key=get_sequence_number)
150179

180+
# TODO check sequence is unique and sequential!!!
181+
return results
182+
183+
151184
def get_file_by_tag(self, tag_string):
152185
""" Gets a data file from a manifest by searching for files with the provided tag(s)\
153186
@@ -186,11 +219,16 @@ def deserialise(json):
186219
def as_data_file_list(json_object):
187220
files = []
188221
if 'files' in json_object:
189-
files = [DataFile.deserialise(data_file_json) for data_file_json in json_object.pop('files')]
222+
files = [DataFile(**data_file_dict) for data_file_dict in json_object.pop('files')]
190223

191224
return {**json_object, 'files': files}
192-
193-
return Manifest(**JSONDecoder(object_hook=as_data_file_list).decode(json))
225+
decoded = JSONDecoder(object_hook=as_data_file_list).decode(json)
226+
227+
# Handle multi-manifest case
228+
if 'manifests' in decoded:
229+
return MultiManifest(manifests=[Manifest(**man) for man in decoded['manifests']])
230+
else:
231+
return Manifest(**decoded)
194232

195233
@staticmethod
196234
def load(file_name=None):

0 commit comments

Comments
 (0)