Skip to content

Commit 894f794

Browse files
committed
IBLGlobusPatcher bugfix + test (resolves #945)
1 parent bb78256 commit 894f794

File tree

2 files changed

+121
-8
lines changed

2 files changed

+121
-8
lines changed

ibllib/oneibl/patcher.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ class IBLGlobusPatcher(Patcher, globus.Globus):
386386
"""
387387
def __init__(self, alyx=None, client_name='default'):
388388
"""
389+
A Globus patcher for IBL data.
389390
390391
Parameters
391392
----------
@@ -395,7 +396,7 @@ def __init__(self, alyx=None, client_name='default'):
395396
The Globus client name.
396397
"""
397398
self.alyx = alyx or AlyxClient()
398-
globus.Globus.__init__(client_name=client_name) # NB we don't init Patcher as we're not using ONE
399+
globus.Globus.__init__(self, client_name=client_name) # NB we don't init Patcher as we're not using ONE
399400

400401
def delete_dataset(self, dataset, dry=False):
401402
"""
@@ -474,6 +475,18 @@ def is_aws(repository_name):
474475
self.alyx.rest('datasets', 'delete', id=did)
475476
return task_ids, files_by_repo
476477

478+
def _rm(self):
479+
raise NotImplementedError('Use delete_dataset instead')
480+
481+
def _scp(self):
482+
raise NotImplementedError('Use transfer_data instead')
483+
484+
def patch_dataset(self):
485+
raise NotImplementedError
486+
487+
def patch_datasets(self):
488+
raise NotImplementedError
489+
477490

478491
class SSHPatcher(Patcher):
479492
"""

ibllib/tests/test_oneibl.py

Lines changed: 107 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import one.alf.exceptions as alferr
2020
from one.alf.cache import QC_TYPE
2121
import iblutil.io.params as iopar
22+
from iblutil.util import Bunch
2223

2324
from ibllib.oneibl import patcher, registration, data_handlers as handlers
2425
import ibllib.io.extractors.base
@@ -84,12 +85,13 @@ def mock_input(prompt):
8485
return FTP_pars[next(k for k in FTP_pars.keys() if k in prompt.replace(',', '').split())]
8586

8687

87-
class TestGlobusPatcher(unittest.TestCase):
88-
"""Tests for the ibllib.oneibl.patcher.GlobusPatcher class."""
89-
88+
class _GlobusPatcherTest(unittest.TestCase):
9089
globus_sdk_mock = None
9190
"""unittest.mock._patch: Mock object for globus_sdk package."""
9291

92+
patcher_class = None
93+
"""object: The patcher class to instantiate during setup."""
94+
9395
@mock.patch('one.remote.globus._setup')
9496
def setUp(self, _) -> None:
9597
# Create a temp dir for writing datasets to
@@ -107,12 +109,21 @@ def setUp(self, _) -> None:
107109
'expires_at_seconds': datetime.datetime.now().timestamp() + 60**2
108110
})
109111
# Mock the globus SDK so that no actual tasks are submitted
110-
self.globus_sdk_mock = mock.patch('one.remote.globus.globus_sdk')
111-
self.globus_sdk_mock.start()
112-
self.addCleanup(self.globus_sdk_mock.stop)
112+
globus_sdk_mock = mock.patch('one.remote.globus.globus_sdk')
113+
self.globus_sdk_mock = globus_sdk_mock.start()
114+
self.addCleanup(globus_sdk_mock.stop)
113115
self.one = ONE(**TEST_DB)
116+
117+
118+
class TestGlobusPatcher(_GlobusPatcherTest):
119+
"""Tests for the ibllib.oneibl.patcher.GlobusPatcher class."""
120+
121+
patcher_class = patcher.GlobusPatcher
122+
123+
def setUp(self) -> None:
124+
super().setUp()
114125
with mock.patch('one.remote.globus.load_client_params', return_value=self.pars):
115-
self.globus_patcher = patcher.GlobusPatcher(one=self.one)
126+
self.globus_patcher = self.patcher_class(one=self.one)
116127

117128
def test_patch_datasets(self):
118129
"""Tests for GlobusPatcher.patch_datasets and GlobusPatcher.launch_transfers methods."""
@@ -163,6 +174,95 @@ def test_patch_datasets(self):
163174
self.globus_patcher.client.submit_transfer.assert_called()
164175

165176

177+
class TestIBLGlobusPatcher(_GlobusPatcherTest):
178+
"""Tests for the ibllib.oneibl.patcher.IBLGlobusPatcher class."""
179+
180+
patcher_class = patcher.IBLGlobusPatcher
181+
182+
def setUp(self) -> None:
183+
super().setUp()
184+
with mock.patch('one.remote.globus.load_client_params', return_value=self.pars):
185+
self.globus_patcher = self.patcher_class(alyx=self.one.alyx)
186+
187+
def test_delete_datasets(self):
188+
"""Tests for IBLGlobusPatcher.delete_datasets method."""
189+
# The following dataset should have two file records, a flatiron one that exists and an SR one that doesn't
190+
did = '80fabd30-9dc8-4778-b349-d175af63e1bd'
191+
self.dset = self.one.alyx.rest('datasets', 'read', id=did)
192+
assert len(self.dset['file_records']) == 2, 'expected two file records for this test dataset'
193+
194+
# Some Globus endpoint IDs to return with Alyx REST mock
195+
self.endpoint_ids = {name: str(uuid4()) for name in ('mainen_lab_SR', 'flatiron_mainenlab')}
196+
197+
task_id = uuid4()
198+
self.globus_patcher.client.submit_delete.return_value = Bunch(data={'task_id': str(task_id)})
199+
200+
# TEST 1: Test delete of flatiron dataset with UUID
201+
with mock.patch.object(self.one.alyx, 'rest', side_effect=self._alyx_patch) as alyx_mock:
202+
task_ids, deleted = self.globus_patcher.delete_dataset(did)
203+
alyx_mock.assert_called_with('datasets', 'delete', id=did)
204+
self.globus_sdk_mock.DeleteData.assert_called_once()
205+
self.assertEqual(task_ids, [task_id])
206+
self.assertCountEqual(deleted, ['flatiron_mainenlab'])
207+
expected = [PurePosixPath(f'ZFM-01935/2021-02-05/001/alf/_ibl_wheelMoves.intervals.{did}.npy')]
208+
self.assertEqual(expected, deleted['flatiron_mainenlab'])
209+
210+
# TEST 2: Test deleting with dataset record dict, an existing SR and AWS file record, and missing globus ID for flatiron
211+
for fr in self.dset['file_records']:
212+
if fr['data_repository'] == 'mainen_lab_SR':
213+
fr['exists'] = True # False -> True
214+
elif fr['data_repository'] == 'flatiron_mainenlab':
215+
# Add an AWS file record
216+
s3_fr = fr.copy()
217+
s3_fr['data_repository'] = 'aws_mainenlab'
218+
s3_fr['data_repository_path'] = 'data' + s3_fr['data_repository_path']
219+
relative_path = '/'.join(Path(s3_fr['data_url']).parts[4:])
220+
s3_fr['data_url'] = (
221+
'https://bucket.s3.amazonaws.com/' + s3_fr['data_repository_path'] + relative_path)
222+
self.dset['file_records'].append(s3_fr)
223+
# Also make the flatiron endpoint ID None
224+
del self.endpoint_ids['flatiron_mainenlab']
225+
# Reset mock calls
226+
self.globus_patcher.client.reset_mock()
227+
self.globus_sdk_mock.reset_mock()
228+
with mock.patch.object(self.one.alyx, 'rest', side_effect=self._alyx_patch) as alyx_mock, \
229+
mock.patch('ibllib.oneibl.patcher.Popen') as proc_mock, self.assertLogs(patcher.__name__, level='ERROR') as log:
230+
line = mock.MagicMock()
231+
line.decode.return_value = '...'
232+
proc_mock().wait.return_value = 0
233+
proc_mock().stdout.readline.side_effect = (line,)
234+
proc_mock.reset_mock() # reset call count
235+
# Test dry
236+
task_ids, deleted = self.globus_patcher.delete_dataset(self.dset, dry=True)
237+
self.assertEqual([], task_ids)
238+
self.assertCountEqual(['mainen_lab_SR', 'aws_mainenlab'], deleted)
239+
self.assertFalse(any(args == ('datasets', 'delete') for args, kwargs in alyx_mock.call_args_list))
240+
self.globus_sdk_mock.DeleteData.assert_not_called()
241+
expected = ['aws', 's3', 'rm', 's3://bucket' + s3_fr['data_url'][31:],
242+
'--profile', 'ibladmin', '--dryrun', '--only-show-errors']
243+
proc_mock.assert_called_once_with(expected, stdout=-1, stderr=-2)
244+
245+
# Test not dry
246+
task_ids, deleted = self.globus_patcher.delete_dataset(self.dset, dry=False)
247+
# Should log failure due to missing endpoint ID in Alyx
248+
self.assertEqual('Unable to delete from flatiron_mainenlab', log.records[-1].getMessage())
249+
alyx_mock.assert_called_with('datasets', 'delete', id=did)
250+
self.globus_sdk_mock.DeleteData.assert_called_once()
251+
expected = ['aws', 's3', 'rm', 's3://bucket' + s3_fr['data_url'][31:], '--profile', 'ibladmin', '--only-show-errors']
252+
proc_mock.assert_called_with(expected, stdout=-1, stderr=-2)
253+
254+
def _alyx_patch(self, endpoint, action, **kwargs):
255+
"""Patch the AlyxClient to return the given dataset."""
256+
if endpoint == 'datasets' and action == 'read':
257+
self.assertEqual(kwargs['id'], self.dset['url'][-36:])
258+
return self.dset
259+
if endpoint == 'data-repository' and action == 'read':
260+
fr = next(fr for fr in self.dset['file_records'] if fr['data_repository'] == kwargs['id'])
261+
return {'name': fr['data_repository'], 'globus_path': fr['data_repository_path'],
262+
'repository_type': 'Fileserver', 'globus_is_personal': fr['data_url'] is None,
263+
'globus_endpoint_id': self.endpoint_ids.get(fr['data_repository'])}
264+
265+
166266
class TestAlyx2Path(unittest.TestCase):
167267
dset = {
168268
'url': 'https://alyx.internationalbrainlab.org/'

0 commit comments

Comments
 (0)