Skip to content

Commit e2b65e0

Browse files
Merge pull request #768 from Labelbox/develop
Release 3.30.1
2 parents 3df20a0 + 471b926 commit e2b65e0

File tree

9 files changed

+172
-15
lines changed

9 files changed

+172
-15
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
# Version 3.30.1 (2022-11-16)
4+
### Fixed
5+
* Running `project.setup_editor()` multiple times no longer resets the ontology, and instead raises an error if the editor is already set up for the project
6+
37
# Version 3.30.0 (2022-11-11)
48
### Changed
59
* create_data_rows, create_data_rows_sync, create_data_row, and update data rows all accept the new data row input format for row data

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
copyright = '2021, Labelbox'
2222
author = 'Labelbox'
2323

24-
release = '3.30.0'
24+
release = '3.30.1'
2525

2626
# -- General configuration ---------------------------------------------------
2727

labelbox/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "labelbox"
2-
__version__ = "3.30.0"
2+
__version__ = "3.30.1"
33

44
from labelbox.client import Client
55
from labelbox.schema.project import Project
@@ -27,4 +27,4 @@
2727
from labelbox.schema.resource_tag import ResourceTag
2828
from labelbox.schema.project_resource_tag import ProjectResourceTag
2929
from labelbox.schema.media_type import MediaType
30-
from labelbox.schema.slice import Slice, CatalogSlice
30+
from labelbox.schema.slice import Slice, CatalogSlice

labelbox/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,8 @@ class MALValidationError(LabelboxError):
129129
class OperationNotAllowedException(Exception):
130130
"""Raised when user does not have permissions to a resource or has exceeded usage limit"""
131131
pass
132+
133+
134+
class ProcessingWaitTimeout(Exception):
135+
"""Raised when waiting for the data rows to be processed takes longer than allowed"""
136+
pass

labelbox/schema/batch.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,15 @@ class Batch(DbObject):
3737
# Relationships
3838
created_by = Relationship.ToOne("User")
3939

40-
def __init__(self, client, project_id, *args, **kwargs):
40+
def __init__(self,
41+
client,
42+
project_id,
43+
*args,
44+
failed_data_row_ids=None,
45+
**kwargs):
4146
super().__init__(client, *args, **kwargs)
4247
self.project_id = project_id
48+
self._failed_data_row_ids = failed_data_row_ids
4349

4450
def project(self) -> 'Project': # type: ignore
4551
""" Returns Project which this Batch belongs to
@@ -174,3 +180,7 @@ def delete_labels(self, set_labels_as_template=False) -> None:
174180
},
175181
experimental=True)
176182
return res
183+
184+
@property
185+
def failed_data_row_ids(self):
186+
return (x for x in self._failed_data_row_ids)

labelbox/schema/project.py

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44
from collections import namedtuple
55
from datetime import datetime, timezone
66
from pathlib import Path
7-
from typing import TYPE_CHECKING, Dict, Union, Iterable, List, Optional, Any
7+
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
88
from urllib.parse import urlparse
99

1010
import ndjson
1111
import requests
1212

1313
from labelbox import utils
14-
from labelbox.exceptions import InvalidQueryError, LabelboxError
14+
from labelbox.exceptions import (InvalidQueryError, LabelboxError,
15+
ProcessingWaitTimeout, ResourceConflict)
1516
from labelbox.orm import query
16-
from labelbox.orm.db_object import DbObject, Updateable, Deletable
17+
from labelbox.orm.db_object import DbObject, Deletable, Updateable
1718
from labelbox.orm.model import Entity, Field, Relationship
1819
from labelbox.pagination import PaginatedCollection
1920
from labelbox.schema.consensus_settings import ConsensusSettings
@@ -90,6 +91,9 @@ class Project(DbObject, Updateable, Deletable):
9091
benchmarks = Relationship.ToMany("Benchmark", False)
9192
ontology = Relationship.ToOne("Ontology", True)
9293

94+
#
95+
_wait_processing_max_seconds = 3600
96+
9397
def update(self, **kwargs):
9498
""" Updates this project with the specified attributes
9599
@@ -319,7 +323,7 @@ def _validate_datetime(string_date: str) -> bool:
319323
return True
320324
except ValueError:
321325
pass
322-
raise ValueError(f"""Incorrect format for: {string_date}.
326+
raise ValueError(f"""Incorrect format for: {string_date}.
323327
Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""")
324328
return True
325329

@@ -507,6 +511,9 @@ def setup_editor(self, ontology) -> None:
507511
Args:
508512
ontology (Ontology): The ontology to attach to the project
509513
"""
514+
if self.labeling_frontend() is not None:
515+
raise ResourceConflict("Editor is already set up.")
516+
510517
labeling_frontend = next(
511518
self.client.get_labeling_frontends(
512519
where=Entity.LabelingFrontend.name == "Editor"))
@@ -546,6 +553,9 @@ def setup(self, labeling_frontend, labeling_frontend_options) -> None:
546553
to `str` using `json.dumps`.
547554
"""
548555

556+
if self.labeling_frontend() is not None:
557+
raise ResourceConflict("Editor is already set up.")
558+
549559
if not isinstance(labeling_frontend_options, str):
550560
labeling_frontend_options = json.dumps(labeling_frontend_options)
551561

@@ -595,11 +605,16 @@ def create_batch(self,
595605
if not len(dr_ids):
596606
raise ValueError("You need at least one data row in a batch")
597607

598-
method = 'createBatch'
608+
self._wait_until_data_rows_are_processed(
609+
data_rows, self._wait_processing_max_seconds)
610+
method = 'createBatchV2'
599611
query_str = """mutation %sPyApi($projectId: ID!, $batchInput: CreateBatchInput!) {
600612
project(where: {id: $projectId}) {
601613
%s(input: $batchInput) {
602-
%s
614+
batch {
615+
%s
616+
}
617+
failedDataRowIds
603618
}
604619
}
605620
}
@@ -622,9 +637,12 @@ def create_batch(self,
622637
params,
623638
timeout=180.0,
624639
experimental=True)["project"][method]
625-
626-
res['size'] = len(dr_ids)
627-
return Entity.Batch(self.client, self.uid, res)
640+
batch = res['batch']
641+
batch['size'] = len(dr_ids)
642+
return Entity.Batch(self.client,
643+
self.uid,
644+
batch,
645+
failed_data_row_ids=res['failedDataRowIds'])
628646

629647
def _update_queue_mode(self, mode: "QueueMode") -> "QueueMode":
630648
"""
@@ -977,6 +995,42 @@ def _is_url_valid(url: Union[str, Path]) -> bool:
977995
raise ValueError(
978996
f'Invalid annotations given of type: {type(annotations)}')
979997

998+
def _wait_until_data_rows_are_processed(self,
999+
data_row_ids: List[str],
1000+
wait_processing_max_seconds: int,
1001+
sleep_interval=30):
1002+
""" Wait until all the specified data rows are processed"""
1003+
start_time = datetime.now()
1004+
while True:
1005+
if (datetime.now() -
1006+
start_time).total_seconds() >= wait_processing_max_seconds:
1007+
raise ProcessingWaitTimeout(
1008+
"Maximum wait time exceeded while waiting for data rows to be processed. Try creating a batch a bit later"
1009+
)
1010+
1011+
all_good = self.__check_data_rows_have_been_processed(data_row_ids)
1012+
if all_good:
1013+
return
1014+
1015+
logger.debug(
1016+
'Some of the data rows are still being processed, waiting...')
1017+
time.sleep(sleep_interval)
1018+
1019+
def __check_data_rows_have_been_processed(self, data_row_ids: List[str]):
1020+
data_row_ids_param = "data_row_ids"
1021+
1022+
query_str = """query CheckAllDataRowsHaveBeenProcessedPyApi($%s: [ID!]!) {
1023+
queryAllDataRowsHaveBeenProcessed(dataRowIds:$%s) {
1024+
allDataRowsHaveBeenProcessed
1025+
}
1026+
}""" % (data_row_ids_param, data_row_ids_param)
1027+
1028+
params = {}
1029+
params[data_row_ids_param] = data_row_ids
1030+
response = self.client.execute(query_str, params)
1031+
return response["queryAllDataRowsHaveBeenProcessed"][
1032+
"allDataRowsHaveBeenProcessed"]
1033+
9801034

9811035
class ProjectMember(DbObject):
9821036
user = Relationship.ToOne("User", cache=True)

tests/integration/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,13 @@ def dataset(client, rand_gen):
191191
dataset.delete()
192192

193193

194+
@pytest.fixture(scope='function')
195+
def unique_dataset(client, rand_gen):
196+
dataset = client.create_dataset(name=rand_gen(str))
197+
yield dataset
198+
dataset.delete()
199+
200+
194201
@pytest.fixture
195202
def datarow(dataset, image_url):
196203
task = dataset.create_data_rows([

tests/integration/test_batch.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
from labelbox.exceptions import ProcessingWaitTimeout
12
import pytest
2-
33
from labelbox import Dataset, Project
44

55
IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg"
@@ -31,6 +31,23 @@ def small_dataset(dataset: Dataset):
3131
yield dataset
3232

3333

34+
@pytest.fixture(scope='function')
35+
def dataset_with_invalid_data_rows(unique_dataset: Dataset):
36+
upload_invalid_data_rows_for_dataset(unique_dataset)
37+
38+
yield unique_dataset
39+
40+
41+
def upload_invalid_data_rows_for_dataset(dataset: Dataset):
42+
task = dataset.create_data_rows([
43+
{
44+
"row_data": 'gs://invalid-bucket/example.png', # forbidden
45+
"external_id": "image-without-access.jpg"
46+
},
47+
] * 2)
48+
task.wait_till_done()
49+
50+
3451
def test_create_batch(batch_project: Project, big_dataset: Dataset):
3552
data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
3653
batch = batch_project.create_batch("test-batch", data_rows, 3)
@@ -72,12 +89,63 @@ def test_batch_project(batch_project: Project, small_dataset: Dataset):
7289
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
7390
batch = batch_project.create_batch("batch to test project relationship",
7491
data_rows)
92+
7593
project_from_batch = batch.project()
7694

7795
assert project_from_batch.uid == batch_project.uid
7896
assert project_from_batch.name == batch_project.name
7997

8098

99+
def test_batch_creation_for_data_rows_with_issues(
100+
batch_project: Project, small_dataset: Dataset,
101+
dataset_with_invalid_data_rows: Dataset):
102+
"""
103+
Create a batch containing both valid and invalid data rows
104+
"""
105+
valid_data_rows = [dr.uid for dr in list(small_dataset.data_rows())]
106+
invalid_data_rows = [
107+
dr.uid for dr in list(dataset_with_invalid_data_rows.data_rows())
108+
]
109+
data_rows_to_add = valid_data_rows + invalid_data_rows
110+
111+
assert len(data_rows_to_add) == 5
112+
batch = batch_project.create_batch("batch to test failed data rows",
113+
data_rows_to_add)
114+
failed_data_row_ids = [x for x in batch.failed_data_row_ids]
115+
assert len(failed_data_row_ids) == 2
116+
117+
failed_data_row_ids_set = set(failed_data_row_ids)
118+
invalid_data_rows_set = set(invalid_data_rows)
119+
assert len(failed_data_row_ids_set.intersection(invalid_data_rows_set)) == 2
120+
121+
122+
def test_batch_creation_with_processing_timeout(batch_project: Project,
123+
small_dataset: Dataset,
124+
unique_dataset: Dataset):
125+
"""
126+
Create a batch with zero wait time, this means that the waiting logic will throw exception immediately
127+
"""
128+
# wait for these data rows to be processed
129+
valid_data_rows = [dr.uid for dr in list(small_dataset.data_rows())]
130+
batch_project._wait_until_data_rows_are_processed(
131+
valid_data_rows, wait_processing_max_seconds=3600, sleep_interval=5)
132+
133+
# upload data rows for this dataset and don't wait
134+
upload_invalid_data_rows_for_dataset(unique_dataset)
135+
unprocessed_data_rows = [dr.uid for dr in list(unique_dataset.data_rows())]
136+
137+
data_row_ids = valid_data_rows + unprocessed_data_rows
138+
139+
stashed_wait_timeout = batch_project._wait_processing_max_seconds
140+
with pytest.raises(ProcessingWaitTimeout):
141+
# emulate the situation where there are still some data rows being
142+
# processed but wait timeout exceeded
143+
batch_project._wait_processing_max_seconds = 0
144+
batch_project.create_batch("batch to test failed data rows",
145+
data_row_ids)
146+
batch_project._wait_processing_max_seconds = stashed_wait_timeout
147+
148+
81149
def test_export_data_rows(batch_project: Project, dataset: Dataset):
82150
n_data_rows = 5
83151
task = dataset.create_data_rows([

tests/integration/test_project_setup.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
from labelbox import LabelingFrontend
9-
from labelbox.exceptions import InvalidQueryError
9+
from labelbox.exceptions import InvalidQueryError, ResourceConflict
1010

1111

1212
def simple_ontology():
@@ -67,3 +67,12 @@ def test_project_editor_setup(client, project, rand_gen):
6767
time.sleep(3) # Search takes a second
6868
assert [ontology.name for ontology in client.get_ontologies(ontology_name)
6969
] == [ontology_name]
70+
71+
72+
def test_project_editor_setup_cant_call_multiple_times(client, project,
73+
rand_gen):
74+
ontology_name = f"test_project_editor_setup_ontology_name-{rand_gen(str)}"
75+
ontology = client.create_ontology(ontology_name, simple_ontology())
76+
project.setup_editor(ontology)
77+
with pytest.raises(ResourceConflict):
78+
project.setup_editor(ontology)

0 commit comments

Comments
 (0)